{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 21246, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001412030499858797, "grad_norm": 11.016289456388648, "learning_rate": 3.134796238244514e-08, "loss": 1.2185, "step": 1 }, { "epoch": 0.0002824060999717594, "grad_norm": 8.120133111158005, "learning_rate": 6.269592476489028e-08, "loss": 1.1331, "step": 2 }, { "epoch": 0.0004236091499576391, "grad_norm": 7.250787374374918, "learning_rate": 9.404388714733543e-08, "loss": 1.0514, "step": 3 }, { "epoch": 0.0005648121999435188, "grad_norm": 7.695053669816028, "learning_rate": 1.2539184952978057e-07, "loss": 1.0558, "step": 4 }, { "epoch": 0.0007060152499293985, "grad_norm": 9.360748970299243, "learning_rate": 1.567398119122257e-07, "loss": 1.1812, "step": 5 }, { "epoch": 0.0008472182999152782, "grad_norm": 8.227430059936243, "learning_rate": 1.8808777429467086e-07, "loss": 1.0638, "step": 6 }, { "epoch": 0.000988421349901158, "grad_norm": 11.252963008349193, "learning_rate": 2.19435736677116e-07, "loss": 1.5209, "step": 7 }, { "epoch": 0.0011296243998870376, "grad_norm": 9.05785886340715, "learning_rate": 2.5078369905956113e-07, "loss": 1.3215, "step": 8 }, { "epoch": 0.0012708274498729173, "grad_norm": 7.12566384828119, "learning_rate": 2.821316614420063e-07, "loss": 1.1655, "step": 9 }, { "epoch": 0.001412030499858797, "grad_norm": 9.534474543883421, "learning_rate": 3.134796238244514e-07, "loss": 1.2507, "step": 10 }, { "epoch": 0.0015532335498446767, "grad_norm": 8.249516296143845, "learning_rate": 3.4482758620689656e-07, "loss": 1.311, "step": 11 }, { "epoch": 0.0016944365998305564, "grad_norm": 7.883106644572163, "learning_rate": 3.761755485893417e-07, "loss": 1.0434, "step": 12 }, { "epoch": 0.0018356396498164361, "grad_norm": 7.800790851628648, "learning_rate": 4.0752351097178683e-07, "loss": 1.1142, "step": 13 }, { "epoch": 0.001976842699802316, "grad_norm": 8.734356264599777, "learning_rate": 4.38871473354232e-07, "loss": 1.1967, "step": 14 }, { "epoch": 0.0021180457497881953, "grad_norm": 9.706526880499672, "learning_rate": 4.7021943573667715e-07, "loss": 1.1889, "step": 15 }, { "epoch": 0.0022592487997740753, "grad_norm": 9.839295917755758, "learning_rate": 5.015673981191223e-07, "loss": 1.1732, "step": 16 }, { "epoch": 0.0024004518497599547, "grad_norm": 8.631626705724988, "learning_rate": 5.329153605015674e-07, "loss": 1.1017, "step": 17 }, { "epoch": 0.0025416548997458347, "grad_norm": 10.755118935271149, "learning_rate": 5.642633228840126e-07, "loss": 1.4075, "step": 18 }, { "epoch": 0.002682857949731714, "grad_norm": 10.148848929833456, "learning_rate": 5.956112852664577e-07, "loss": 1.2353, "step": 19 }, { "epoch": 0.002824060999717594, "grad_norm": 7.610801905336504, "learning_rate": 6.269592476489028e-07, "loss": 1.0546, "step": 20 }, { "epoch": 0.0029652640497034735, "grad_norm": 8.24765505083955, "learning_rate": 6.58307210031348e-07, "loss": 1.056, "step": 21 }, { "epoch": 0.0031064670996893535, "grad_norm": 9.498512915102472, "learning_rate": 6.896551724137931e-07, "loss": 1.4084, "step": 22 }, { "epoch": 0.003247670149675233, "grad_norm": 8.45574073987412, "learning_rate": 7.210031347962383e-07, "loss": 1.2365, "step": 23 }, { "epoch": 0.003388873199661113, "grad_norm": 7.381880650658492, "learning_rate": 7.523510971786834e-07, "loss": 1.1363, "step": 24 }, { "epoch": 0.0035300762496469924, "grad_norm": 9.82656867976583, "learning_rate": 7.836990595611286e-07, "loss": 1.185, "step": 25 }, { "epoch": 0.0036712792996328723, "grad_norm": 7.8957415859238775, "learning_rate": 8.150470219435737e-07, "loss": 1.0926, "step": 26 }, { "epoch": 0.0038124823496187518, "grad_norm": 9.804516056540306, "learning_rate": 8.463949843260188e-07, "loss": 1.0909, "step": 27 }, { "epoch": 0.003953685399604632, "grad_norm": 10.38514489323193, "learning_rate": 8.77742946708464e-07, "loss": 1.2265, "step": 28 }, { "epoch": 0.004094888449590511, "grad_norm": 9.613709288614304, "learning_rate": 9.090909090909091e-07, "loss": 1.0937, "step": 29 }, { "epoch": 0.004236091499576391, "grad_norm": 8.52739686979812, "learning_rate": 9.404388714733543e-07, "loss": 1.2375, "step": 30 }, { "epoch": 0.00437729454956227, "grad_norm": 8.407880490682919, "learning_rate": 9.717868338557995e-07, "loss": 0.9995, "step": 31 }, { "epoch": 0.0045184975995481505, "grad_norm": 8.818852133609342, "learning_rate": 1.0031347962382445e-06, "loss": 0.9853, "step": 32 }, { "epoch": 0.00465970064953403, "grad_norm": 7.6608076447519124, "learning_rate": 1.0344827586206898e-06, "loss": 1.027, "step": 33 }, { "epoch": 0.0048009036995199095, "grad_norm": 7.045474268264931, "learning_rate": 1.0658307210031348e-06, "loss": 0.9492, "step": 34 }, { "epoch": 0.004942106749505789, "grad_norm": 7.997538865617388, "learning_rate": 1.0971786833855801e-06, "loss": 1.1267, "step": 35 }, { "epoch": 0.005083309799491669, "grad_norm": 7.878590188779293, "learning_rate": 1.1285266457680252e-06, "loss": 1.2463, "step": 36 }, { "epoch": 0.005224512849477549, "grad_norm": 8.243444524822978, "learning_rate": 1.1598746081504702e-06, "loss": 1.1125, "step": 37 }, { "epoch": 0.005365715899463428, "grad_norm": 7.498789580843632, "learning_rate": 1.1912225705329155e-06, "loss": 1.1791, "step": 38 }, { "epoch": 0.005506918949449308, "grad_norm": 8.711680777445437, "learning_rate": 1.2225705329153605e-06, "loss": 1.1524, "step": 39 }, { "epoch": 0.005648121999435188, "grad_norm": 9.095212085184661, "learning_rate": 1.2539184952978056e-06, "loss": 1.1614, "step": 40 }, { "epoch": 0.005789325049421068, "grad_norm": 8.076909928890133, "learning_rate": 1.2852664576802509e-06, "loss": 1.0627, "step": 41 }, { "epoch": 0.005930528099406947, "grad_norm": 7.877567616556983, "learning_rate": 1.316614420062696e-06, "loss": 1.1967, "step": 42 }, { "epoch": 0.006071731149392827, "grad_norm": 7.811444399828281, "learning_rate": 1.3479623824451412e-06, "loss": 1.1811, "step": 43 }, { "epoch": 0.006212934199378707, "grad_norm": 10.697303496487786, "learning_rate": 1.3793103448275862e-06, "loss": 1.1331, "step": 44 }, { "epoch": 0.006354137249364586, "grad_norm": 8.077993683082106, "learning_rate": 1.4106583072100313e-06, "loss": 0.9725, "step": 45 }, { "epoch": 0.006495340299350466, "grad_norm": 6.865107269829995, "learning_rate": 1.4420062695924766e-06, "loss": 1.2191, "step": 46 }, { "epoch": 0.006636543349336345, "grad_norm": 7.078868194913755, "learning_rate": 1.4733542319749216e-06, "loss": 1.1047, "step": 47 }, { "epoch": 0.006777746399322226, "grad_norm": 7.673040623741436, "learning_rate": 1.5047021943573669e-06, "loss": 1.0038, "step": 48 }, { "epoch": 0.006918949449308105, "grad_norm": 7.583881587623022, "learning_rate": 1.536050156739812e-06, "loss": 1.0244, "step": 49 }, { "epoch": 0.007060152499293985, "grad_norm": 9.38341982167418, "learning_rate": 1.5673981191222572e-06, "loss": 0.9941, "step": 50 }, { "epoch": 0.007201355549279864, "grad_norm": 8.912233805021561, "learning_rate": 1.5987460815047023e-06, "loss": 1.0193, "step": 51 }, { "epoch": 0.0073425585992657446, "grad_norm": 7.778349403085027, "learning_rate": 1.6300940438871473e-06, "loss": 1.0961, "step": 52 }, { "epoch": 0.007483761649251624, "grad_norm": 6.973044642406554, "learning_rate": 1.6614420062695926e-06, "loss": 0.9429, "step": 53 }, { "epoch": 0.0076249646992375035, "grad_norm": 7.273640449409624, "learning_rate": 1.6927899686520376e-06, "loss": 0.9659, "step": 54 }, { "epoch": 0.007766167749223383, "grad_norm": 8.45866624072379, "learning_rate": 1.724137931034483e-06, "loss": 1.0845, "step": 55 }, { "epoch": 0.007907370799209263, "grad_norm": 7.933551688799783, "learning_rate": 1.755485893416928e-06, "loss": 0.9936, "step": 56 }, { "epoch": 0.008048573849195143, "grad_norm": 7.3752957818911975, "learning_rate": 1.786833855799373e-06, "loss": 1.2214, "step": 57 }, { "epoch": 0.008189776899181022, "grad_norm": 7.759633648039131, "learning_rate": 1.8181818181818183e-06, "loss": 1.1288, "step": 58 }, { "epoch": 0.008330979949166902, "grad_norm": 8.297481290277021, "learning_rate": 1.8495297805642633e-06, "loss": 1.0711, "step": 59 }, { "epoch": 0.008472182999152781, "grad_norm": 7.707348532551644, "learning_rate": 1.8808777429467086e-06, "loss": 1.1045, "step": 60 }, { "epoch": 0.00861338604913866, "grad_norm": 8.440734580994208, "learning_rate": 1.9122257053291537e-06, "loss": 1.2053, "step": 61 }, { "epoch": 0.00875458909912454, "grad_norm": 9.379713463539574, "learning_rate": 1.943573667711599e-06, "loss": 1.0985, "step": 62 }, { "epoch": 0.008895792149110422, "grad_norm": 8.393582865795228, "learning_rate": 1.974921630094044e-06, "loss": 1.1986, "step": 63 }, { "epoch": 0.009036995199096301, "grad_norm": 8.07011113417986, "learning_rate": 2.006269592476489e-06, "loss": 1.0335, "step": 64 }, { "epoch": 0.00917819824908218, "grad_norm": 7.694745925151725, "learning_rate": 2.0376175548589343e-06, "loss": 1.0446, "step": 65 }, { "epoch": 0.00931940129906806, "grad_norm": 7.214117451209332, "learning_rate": 2.0689655172413796e-06, "loss": 0.9772, "step": 66 }, { "epoch": 0.00946060434905394, "grad_norm": 8.105960770628542, "learning_rate": 2.100313479623825e-06, "loss": 1.0872, "step": 67 }, { "epoch": 0.009601807399039819, "grad_norm": 6.543049973253576, "learning_rate": 2.1316614420062697e-06, "loss": 0.8508, "step": 68 }, { "epoch": 0.009743010449025698, "grad_norm": 6.828975668478929, "learning_rate": 2.163009404388715e-06, "loss": 0.7794, "step": 69 }, { "epoch": 0.009884213499011578, "grad_norm": 8.179384371223852, "learning_rate": 2.1943573667711602e-06, "loss": 0.9591, "step": 70 }, { "epoch": 0.01002541654899746, "grad_norm": 8.219708247974477, "learning_rate": 2.225705329153605e-06, "loss": 1.162, "step": 71 }, { "epoch": 0.010166619598983339, "grad_norm": 9.237842698268448, "learning_rate": 2.2570532915360503e-06, "loss": 1.1298, "step": 72 }, { "epoch": 0.010307822648969218, "grad_norm": 7.538555016516361, "learning_rate": 2.2884012539184956e-06, "loss": 0.9263, "step": 73 }, { "epoch": 0.010449025698955098, "grad_norm": 7.797504601380168, "learning_rate": 2.3197492163009404e-06, "loss": 0.9031, "step": 74 }, { "epoch": 0.010590228748940977, "grad_norm": 7.452472518693939, "learning_rate": 2.3510971786833857e-06, "loss": 1.0156, "step": 75 }, { "epoch": 0.010731431798926857, "grad_norm": 6.490277329807399, "learning_rate": 2.382445141065831e-06, "loss": 0.9288, "step": 76 }, { "epoch": 0.010872634848912736, "grad_norm": 9.092323356828192, "learning_rate": 2.4137931034482762e-06, "loss": 0.9758, "step": 77 }, { "epoch": 0.011013837898898616, "grad_norm": 7.399506935121757, "learning_rate": 2.445141065830721e-06, "loss": 1.0961, "step": 78 }, { "epoch": 0.011155040948884497, "grad_norm": 8.763661517197843, "learning_rate": 2.4764890282131664e-06, "loss": 0.9974, "step": 79 }, { "epoch": 0.011296243998870376, "grad_norm": 8.823673726434475, "learning_rate": 2.507836990595611e-06, "loss": 1.0356, "step": 80 }, { "epoch": 0.011437447048856256, "grad_norm": 9.43989059848618, "learning_rate": 2.5391849529780565e-06, "loss": 1.206, "step": 81 }, { "epoch": 0.011578650098842135, "grad_norm": 8.819174428360991, "learning_rate": 2.5705329153605017e-06, "loss": 1.033, "step": 82 }, { "epoch": 0.011719853148828015, "grad_norm": 8.598032169347169, "learning_rate": 2.601880877742947e-06, "loss": 0.9142, "step": 83 }, { "epoch": 0.011861056198813894, "grad_norm": 7.929972401192863, "learning_rate": 2.633228840125392e-06, "loss": 1.0614, "step": 84 }, { "epoch": 0.012002259248799774, "grad_norm": 6.9944529439088665, "learning_rate": 2.664576802507837e-06, "loss": 0.9095, "step": 85 }, { "epoch": 0.012143462298785653, "grad_norm": 7.736966107250294, "learning_rate": 2.6959247648902824e-06, "loss": 0.9155, "step": 86 }, { "epoch": 0.012284665348771533, "grad_norm": 7.0155854768729755, "learning_rate": 2.7272727272727272e-06, "loss": 0.8631, "step": 87 }, { "epoch": 0.012425868398757414, "grad_norm": 7.412422751254611, "learning_rate": 2.7586206896551725e-06, "loss": 0.9165, "step": 88 }, { "epoch": 0.012567071448743293, "grad_norm": 8.143433946568083, "learning_rate": 2.7899686520376178e-06, "loss": 0.8537, "step": 89 }, { "epoch": 0.012708274498729173, "grad_norm": 8.150750830633141, "learning_rate": 2.8213166144200626e-06, "loss": 0.9352, "step": 90 }, { "epoch": 0.012849477548715052, "grad_norm": 7.508385509036284, "learning_rate": 2.852664576802508e-06, "loss": 0.9612, "step": 91 }, { "epoch": 0.012990680598700932, "grad_norm": 7.447685254764651, "learning_rate": 2.884012539184953e-06, "loss": 0.9174, "step": 92 }, { "epoch": 0.013131883648686811, "grad_norm": 6.938010829660382, "learning_rate": 2.9153605015673984e-06, "loss": 0.8738, "step": 93 }, { "epoch": 0.01327308669867269, "grad_norm": 6.2478036866826265, "learning_rate": 2.9467084639498432e-06, "loss": 0.883, "step": 94 }, { "epoch": 0.01341428974865857, "grad_norm": 8.713346272613183, "learning_rate": 2.9780564263322885e-06, "loss": 0.9038, "step": 95 }, { "epoch": 0.013555492798644452, "grad_norm": 7.405605657939075, "learning_rate": 3.0094043887147338e-06, "loss": 0.8549, "step": 96 }, { "epoch": 0.013696695848630331, "grad_norm": 6.889707950820388, "learning_rate": 3.0407523510971786e-06, "loss": 0.9198, "step": 97 }, { "epoch": 0.01383789889861621, "grad_norm": 8.49207098940456, "learning_rate": 3.072100313479624e-06, "loss": 1.0015, "step": 98 }, { "epoch": 0.01397910194860209, "grad_norm": 7.938481581936447, "learning_rate": 3.103448275862069e-06, "loss": 1.0189, "step": 99 }, { "epoch": 0.01412030499858797, "grad_norm": 6.809269885595204, "learning_rate": 3.1347962382445144e-06, "loss": 0.836, "step": 100 }, { "epoch": 0.014261508048573849, "grad_norm": 8.36461382411018, "learning_rate": 3.1661442006269593e-06, "loss": 0.8465, "step": 101 }, { "epoch": 0.014402711098559728, "grad_norm": 9.138606230146802, "learning_rate": 3.1974921630094045e-06, "loss": 0.9167, "step": 102 }, { "epoch": 0.014543914148545608, "grad_norm": 8.378076403772889, "learning_rate": 3.22884012539185e-06, "loss": 1.1506, "step": 103 }, { "epoch": 0.014685117198531489, "grad_norm": 6.717403458248077, "learning_rate": 3.2601880877742946e-06, "loss": 0.7493, "step": 104 }, { "epoch": 0.014826320248517369, "grad_norm": 8.688997715121792, "learning_rate": 3.29153605015674e-06, "loss": 1.2732, "step": 105 }, { "epoch": 0.014967523298503248, "grad_norm": 7.5651936824346775, "learning_rate": 3.322884012539185e-06, "loss": 0.8016, "step": 106 }, { "epoch": 0.015108726348489128, "grad_norm": 7.775727264010563, "learning_rate": 3.35423197492163e-06, "loss": 0.8038, "step": 107 }, { "epoch": 0.015249929398475007, "grad_norm": 9.076632987472834, "learning_rate": 3.3855799373040753e-06, "loss": 1.1514, "step": 108 }, { "epoch": 0.015391132448460887, "grad_norm": 6.713356329803989, "learning_rate": 3.4169278996865206e-06, "loss": 0.7853, "step": 109 }, { "epoch": 0.015532335498446766, "grad_norm": 7.327696700434162, "learning_rate": 3.448275862068966e-06, "loss": 0.9567, "step": 110 }, { "epoch": 0.015673538548432647, "grad_norm": 7.793066393172524, "learning_rate": 3.4796238244514107e-06, "loss": 0.9615, "step": 111 }, { "epoch": 0.015814741598418527, "grad_norm": 7.799467546951294, "learning_rate": 3.510971786833856e-06, "loss": 0.956, "step": 112 }, { "epoch": 0.015955944648404406, "grad_norm": 9.153361234737188, "learning_rate": 3.542319749216301e-06, "loss": 1.1614, "step": 113 }, { "epoch": 0.016097147698390286, "grad_norm": 7.816487563568254, "learning_rate": 3.573667711598746e-06, "loss": 0.9252, "step": 114 }, { "epoch": 0.016238350748376165, "grad_norm": 6.991350952981636, "learning_rate": 3.6050156739811913e-06, "loss": 0.9202, "step": 115 }, { "epoch": 0.016379553798362045, "grad_norm": 8.046741425650566, "learning_rate": 3.6363636363636366e-06, "loss": 1.0653, "step": 116 }, { "epoch": 0.016520756848347924, "grad_norm": 8.206035829344017, "learning_rate": 3.6677115987460823e-06, "loss": 0.9128, "step": 117 }, { "epoch": 0.016661959898333804, "grad_norm": 7.45351581410239, "learning_rate": 3.6990595611285267e-06, "loss": 1.0055, "step": 118 }, { "epoch": 0.016803162948319683, "grad_norm": 8.422218495495787, "learning_rate": 3.730407523510972e-06, "loss": 0.8757, "step": 119 }, { "epoch": 0.016944365998305563, "grad_norm": 8.19658619309238, "learning_rate": 3.7617554858934172e-06, "loss": 0.876, "step": 120 }, { "epoch": 0.017085569048291442, "grad_norm": 8.632174035744185, "learning_rate": 3.793103448275862e-06, "loss": 0.9799, "step": 121 }, { "epoch": 0.01722677209827732, "grad_norm": 7.201975064478882, "learning_rate": 3.824451410658307e-06, "loss": 0.7682, "step": 122 }, { "epoch": 0.0173679751482632, "grad_norm": 7.41939564199568, "learning_rate": 3.855799373040753e-06, "loss": 0.9128, "step": 123 }, { "epoch": 0.01750917819824908, "grad_norm": 8.227459935092366, "learning_rate": 3.887147335423198e-06, "loss": 1.1737, "step": 124 }, { "epoch": 0.017650381248234964, "grad_norm": 7.574187825540355, "learning_rate": 3.918495297805643e-06, "loss": 0.9508, "step": 125 }, { "epoch": 0.017791584298220843, "grad_norm": 7.630234001220519, "learning_rate": 3.949843260188088e-06, "loss": 0.9079, "step": 126 }, { "epoch": 0.017932787348206723, "grad_norm": 8.535128471785614, "learning_rate": 3.981191222570533e-06, "loss": 1.0725, "step": 127 }, { "epoch": 0.018073990398192602, "grad_norm": 7.9385808624909275, "learning_rate": 4.012539184952978e-06, "loss": 0.9533, "step": 128 }, { "epoch": 0.01821519344817848, "grad_norm": 8.064381624825716, "learning_rate": 4.043887147335424e-06, "loss": 0.9964, "step": 129 }, { "epoch": 0.01835639649816436, "grad_norm": 8.223493251464122, "learning_rate": 4.075235109717869e-06, "loss": 1.1347, "step": 130 }, { "epoch": 0.01849759954815024, "grad_norm": 7.183138344316719, "learning_rate": 4.1065830721003135e-06, "loss": 0.9797, "step": 131 }, { "epoch": 0.01863880259813612, "grad_norm": 7.5344265624001725, "learning_rate": 4.137931034482759e-06, "loss": 0.897, "step": 132 }, { "epoch": 0.018780005648122, "grad_norm": 7.369089939683868, "learning_rate": 4.169278996865204e-06, "loss": 0.9249, "step": 133 }, { "epoch": 0.01892120869810788, "grad_norm": 7.1172427648489505, "learning_rate": 4.20062695924765e-06, "loss": 0.8232, "step": 134 }, { "epoch": 0.01906241174809376, "grad_norm": 7.548084196420549, "learning_rate": 4.2319749216300945e-06, "loss": 1.034, "step": 135 }, { "epoch": 0.019203614798079638, "grad_norm": 8.735116825365756, "learning_rate": 4.263322884012539e-06, "loss": 1.0003, "step": 136 }, { "epoch": 0.019344817848065517, "grad_norm": 8.179101551617698, "learning_rate": 4.294670846394985e-06, "loss": 0.8194, "step": 137 }, { "epoch": 0.019486020898051397, "grad_norm": 7.33820082796791, "learning_rate": 4.32601880877743e-06, "loss": 0.8458, "step": 138 }, { "epoch": 0.019627223948037276, "grad_norm": 6.577869920045354, "learning_rate": 4.357366771159875e-06, "loss": 0.835, "step": 139 }, { "epoch": 0.019768426998023156, "grad_norm": 8.818016421185366, "learning_rate": 4.3887147335423205e-06, "loss": 1.2279, "step": 140 }, { "epoch": 0.019909630048009035, "grad_norm": 8.521624173184197, "learning_rate": 4.420062695924765e-06, "loss": 1.0621, "step": 141 }, { "epoch": 0.02005083309799492, "grad_norm": 7.32891975414404, "learning_rate": 4.45141065830721e-06, "loss": 1.0236, "step": 142 }, { "epoch": 0.020192036147980798, "grad_norm": 7.525756473002361, "learning_rate": 4.482758620689656e-06, "loss": 1.0636, "step": 143 }, { "epoch": 0.020333239197966677, "grad_norm": 7.719305967369412, "learning_rate": 4.514106583072101e-06, "loss": 1.0331, "step": 144 }, { "epoch": 0.020474442247952557, "grad_norm": 7.399362698492069, "learning_rate": 4.5454545454545455e-06, "loss": 0.9871, "step": 145 }, { "epoch": 0.020615645297938436, "grad_norm": 6.679701389904611, "learning_rate": 4.576802507836991e-06, "loss": 1.0552, "step": 146 }, { "epoch": 0.020756848347924316, "grad_norm": 8.011343652619376, "learning_rate": 4.608150470219436e-06, "loss": 1.0866, "step": 147 }, { "epoch": 0.020898051397910195, "grad_norm": 6.958944421322256, "learning_rate": 4.639498432601881e-06, "loss": 1.0084, "step": 148 }, { "epoch": 0.021039254447896075, "grad_norm": 7.886896289928861, "learning_rate": 4.670846394984327e-06, "loss": 0.9412, "step": 149 }, { "epoch": 0.021180457497881954, "grad_norm": 6.481507835100823, "learning_rate": 4.7021943573667714e-06, "loss": 0.7664, "step": 150 }, { "epoch": 0.021321660547867834, "grad_norm": 7.309729219501239, "learning_rate": 4.733542319749217e-06, "loss": 1.0342, "step": 151 }, { "epoch": 0.021462863597853713, "grad_norm": 6.633514384163003, "learning_rate": 4.764890282131662e-06, "loss": 0.8934, "step": 152 }, { "epoch": 0.021604066647839593, "grad_norm": 8.784506623524786, "learning_rate": 4.796238244514107e-06, "loss": 0.8059, "step": 153 }, { "epoch": 0.021745269697825472, "grad_norm": 8.189863490933027, "learning_rate": 4.8275862068965525e-06, "loss": 0.9883, "step": 154 }, { "epoch": 0.02188647274781135, "grad_norm": 6.552080517179776, "learning_rate": 4.858934169278997e-06, "loss": 1.0269, "step": 155 }, { "epoch": 0.02202767579779723, "grad_norm": 6.8021193253288965, "learning_rate": 4.890282131661442e-06, "loss": 0.9354, "step": 156 }, { "epoch": 0.02216887884778311, "grad_norm": 7.453130791464393, "learning_rate": 4.921630094043888e-06, "loss": 0.8857, "step": 157 }, { "epoch": 0.022310081897768994, "grad_norm": 8.595456860827616, "learning_rate": 4.952978056426333e-06, "loss": 1.0767, "step": 158 }, { "epoch": 0.022451284947754873, "grad_norm": 7.400038898894051, "learning_rate": 4.9843260188087776e-06, "loss": 0.8515, "step": 159 }, { "epoch": 0.022592487997740753, "grad_norm": 6.864972018632919, "learning_rate": 5.015673981191222e-06, "loss": 0.8471, "step": 160 }, { "epoch": 0.022733691047726632, "grad_norm": 7.244583785380982, "learning_rate": 5.047021943573668e-06, "loss": 0.9584, "step": 161 }, { "epoch": 0.02287489409771251, "grad_norm": 7.812300048995693, "learning_rate": 5.078369905956113e-06, "loss": 0.8865, "step": 162 }, { "epoch": 0.02301609714769839, "grad_norm": 7.004691874730641, "learning_rate": 5.109717868338559e-06, "loss": 1.0468, "step": 163 }, { "epoch": 0.02315730019768427, "grad_norm": 6.945133808108309, "learning_rate": 5.1410658307210035e-06, "loss": 0.9602, "step": 164 }, { "epoch": 0.02329850324767015, "grad_norm": 7.603837018151768, "learning_rate": 5.172413793103449e-06, "loss": 1.136, "step": 165 }, { "epoch": 0.02343970629765603, "grad_norm": 8.144944589234624, "learning_rate": 5.203761755485894e-06, "loss": 0.877, "step": 166 }, { "epoch": 0.02358090934764191, "grad_norm": 7.050413546355765, "learning_rate": 5.235109717868339e-06, "loss": 0.8602, "step": 167 }, { "epoch": 0.02372211239762779, "grad_norm": 7.2106434082690924, "learning_rate": 5.266457680250784e-06, "loss": 0.9794, "step": 168 }, { "epoch": 0.023863315447613668, "grad_norm": 7.654905889622911, "learning_rate": 5.297805642633229e-06, "loss": 1.0512, "step": 169 }, { "epoch": 0.024004518497599547, "grad_norm": 7.218193827248691, "learning_rate": 5.329153605015674e-06, "loss": 0.9071, "step": 170 }, { "epoch": 0.024145721547585427, "grad_norm": 6.34625892925884, "learning_rate": 5.36050156739812e-06, "loss": 0.8166, "step": 171 }, { "epoch": 0.024286924597571306, "grad_norm": 6.9293933087403925, "learning_rate": 5.391849529780565e-06, "loss": 0.7874, "step": 172 }, { "epoch": 0.024428127647557186, "grad_norm": 7.005121592916882, "learning_rate": 5.4231974921630105e-06, "loss": 0.9914, "step": 173 }, { "epoch": 0.024569330697543065, "grad_norm": 6.414056669238483, "learning_rate": 5.4545454545454545e-06, "loss": 0.9896, "step": 174 }, { "epoch": 0.02471053374752895, "grad_norm": 6.7530037277615165, "learning_rate": 5.4858934169279e-06, "loss": 0.8085, "step": 175 }, { "epoch": 0.024851736797514828, "grad_norm": 8.179725977304141, "learning_rate": 5.517241379310345e-06, "loss": 1.112, "step": 176 }, { "epoch": 0.024992939847500707, "grad_norm": 6.53333509105362, "learning_rate": 5.548589341692791e-06, "loss": 1.0543, "step": 177 }, { "epoch": 0.025134142897486587, "grad_norm": 7.512819748003157, "learning_rate": 5.5799373040752355e-06, "loss": 0.8857, "step": 178 }, { "epoch": 0.025275345947472466, "grad_norm": 7.611626302273766, "learning_rate": 5.611285266457681e-06, "loss": 0.9241, "step": 179 }, { "epoch": 0.025416548997458346, "grad_norm": 7.1173872377859695, "learning_rate": 5.642633228840125e-06, "loss": 1.0062, "step": 180 }, { "epoch": 0.025557752047444225, "grad_norm": 8.03266549777508, "learning_rate": 5.673981191222571e-06, "loss": 0.9286, "step": 181 }, { "epoch": 0.025698955097430105, "grad_norm": 7.597371042299757, "learning_rate": 5.705329153605016e-06, "loss": 0.908, "step": 182 }, { "epoch": 0.025840158147415984, "grad_norm": 7.409423818262954, "learning_rate": 5.7366771159874614e-06, "loss": 1.1088, "step": 183 }, { "epoch": 0.025981361197401864, "grad_norm": 6.864046570601934, "learning_rate": 5.768025078369906e-06, "loss": 1.0343, "step": 184 }, { "epoch": 0.026122564247387743, "grad_norm": 6.329092426366271, "learning_rate": 5.799373040752352e-06, "loss": 0.8898, "step": 185 }, { "epoch": 0.026263767297373623, "grad_norm": 8.164265970315288, "learning_rate": 5.830721003134797e-06, "loss": 1.1265, "step": 186 }, { "epoch": 0.026404970347359502, "grad_norm": 7.024578148497136, "learning_rate": 5.862068965517242e-06, "loss": 1.019, "step": 187 }, { "epoch": 0.02654617339734538, "grad_norm": 6.996290405725953, "learning_rate": 5.8934169278996865e-06, "loss": 1.0395, "step": 188 }, { "epoch": 0.02668737644733126, "grad_norm": 7.653102387363891, "learning_rate": 5.924764890282132e-06, "loss": 0.9276, "step": 189 }, { "epoch": 0.02682857949731714, "grad_norm": 7.766088426361559, "learning_rate": 5.956112852664577e-06, "loss": 1.0962, "step": 190 }, { "epoch": 0.02696978254730302, "grad_norm": 6.701338384563943, "learning_rate": 5.987460815047023e-06, "loss": 1.1105, "step": 191 }, { "epoch": 0.027110985597288903, "grad_norm": 5.864439552872452, "learning_rate": 6.0188087774294676e-06, "loss": 0.9879, "step": 192 }, { "epoch": 0.027252188647274782, "grad_norm": 6.625683545637496, "learning_rate": 6.050156739811913e-06, "loss": 1.0501, "step": 193 }, { "epoch": 0.027393391697260662, "grad_norm": 6.359718266354579, "learning_rate": 6.081504702194357e-06, "loss": 0.815, "step": 194 }, { "epoch": 0.02753459474724654, "grad_norm": 6.2709473882940125, "learning_rate": 6.112852664576803e-06, "loss": 0.992, "step": 195 }, { "epoch": 0.02767579779723242, "grad_norm": 6.831492474357971, "learning_rate": 6.144200626959248e-06, "loss": 1.0035, "step": 196 }, { "epoch": 0.0278170008472183, "grad_norm": 6.62691349429465, "learning_rate": 6.1755485893416935e-06, "loss": 0.9039, "step": 197 }, { "epoch": 0.02795820389720418, "grad_norm": 6.490045598857795, "learning_rate": 6.206896551724138e-06, "loss": 0.9872, "step": 198 }, { "epoch": 0.02809940694719006, "grad_norm": 7.788972800265241, "learning_rate": 6.238244514106584e-06, "loss": 1.2879, "step": 199 }, { "epoch": 0.02824060999717594, "grad_norm": 7.098781003122065, "learning_rate": 6.269592476489029e-06, "loss": 0.9176, "step": 200 }, { "epoch": 0.02838181304716182, "grad_norm": 7.226799120218369, "learning_rate": 6.300940438871474e-06, "loss": 1.1116, "step": 201 }, { "epoch": 0.028523016097147698, "grad_norm": 7.66234418755526, "learning_rate": 6.3322884012539185e-06, "loss": 1.0238, "step": 202 }, { "epoch": 0.028664219147133577, "grad_norm": 8.209472071357272, "learning_rate": 6.363636363636364e-06, "loss": 1.025, "step": 203 }, { "epoch": 0.028805422197119457, "grad_norm": 6.72047643899154, "learning_rate": 6.394984326018809e-06, "loss": 1.0451, "step": 204 }, { "epoch": 0.028946625247105336, "grad_norm": 6.769673768271201, "learning_rate": 6.426332288401255e-06, "loss": 0.9877, "step": 205 }, { "epoch": 0.029087828297091216, "grad_norm": 7.123716516659776, "learning_rate": 6.4576802507837e-06, "loss": 0.8619, "step": 206 }, { "epoch": 0.029229031347077095, "grad_norm": 7.413501442190635, "learning_rate": 6.489028213166145e-06, "loss": 1.148, "step": 207 }, { "epoch": 0.029370234397062978, "grad_norm": 6.967619248385326, "learning_rate": 6.520376175548589e-06, "loss": 0.9802, "step": 208 }, { "epoch": 0.029511437447048858, "grad_norm": 7.951663301564583, "learning_rate": 6.551724137931035e-06, "loss": 1.2734, "step": 209 }, { "epoch": 0.029652640497034737, "grad_norm": 6.113622492591106, "learning_rate": 6.58307210031348e-06, "loss": 0.9453, "step": 210 }, { "epoch": 0.029793843547020617, "grad_norm": 8.254114737069598, "learning_rate": 6.6144200626959255e-06, "loss": 0.9883, "step": 211 }, { "epoch": 0.029935046597006496, "grad_norm": 7.200862338252834, "learning_rate": 6.64576802507837e-06, "loss": 0.9004, "step": 212 }, { "epoch": 0.030076249646992376, "grad_norm": 7.732074227436989, "learning_rate": 6.677115987460816e-06, "loss": 1.0184, "step": 213 }, { "epoch": 0.030217452696978255, "grad_norm": 6.643036335751048, "learning_rate": 6.70846394984326e-06, "loss": 1.0927, "step": 214 }, { "epoch": 0.030358655746964135, "grad_norm": 6.990791179636594, "learning_rate": 6.739811912225706e-06, "loss": 1.0011, "step": 215 }, { "epoch": 0.030499858796950014, "grad_norm": 6.516712068369841, "learning_rate": 6.771159874608151e-06, "loss": 1.0647, "step": 216 }, { "epoch": 0.030641061846935894, "grad_norm": 7.591672062474376, "learning_rate": 6.802507836990596e-06, "loss": 1.1377, "step": 217 }, { "epoch": 0.030782264896921773, "grad_norm": 7.023053158722401, "learning_rate": 6.833855799373041e-06, "loss": 1.1098, "step": 218 }, { "epoch": 0.030923467946907653, "grad_norm": 7.682046253111874, "learning_rate": 6.865203761755487e-06, "loss": 1.0534, "step": 219 }, { "epoch": 0.031064670996893532, "grad_norm": 7.768258871589601, "learning_rate": 6.896551724137932e-06, "loss": 1.0257, "step": 220 }, { "epoch": 0.03120587404687941, "grad_norm": 8.972781908035136, "learning_rate": 6.9278996865203765e-06, "loss": 0.997, "step": 221 }, { "epoch": 0.031347077096865295, "grad_norm": 7.2369475078109, "learning_rate": 6.959247648902821e-06, "loss": 1.0176, "step": 222 }, { "epoch": 0.03148828014685117, "grad_norm": 5.763434319127323, "learning_rate": 6.990595611285267e-06, "loss": 1.0166, "step": 223 }, { "epoch": 0.031629483196837054, "grad_norm": 6.258367593132719, "learning_rate": 7.021943573667712e-06, "loss": 0.988, "step": 224 }, { "epoch": 0.03177068624682293, "grad_norm": 6.4208488380894435, "learning_rate": 7.0532915360501576e-06, "loss": 0.8481, "step": 225 }, { "epoch": 0.03191188929680881, "grad_norm": 7.00325042344024, "learning_rate": 7.084639498432602e-06, "loss": 0.9924, "step": 226 }, { "epoch": 0.03205309234679469, "grad_norm": 7.2807418741085606, "learning_rate": 7.115987460815048e-06, "loss": 1.0043, "step": 227 }, { "epoch": 0.03219429539678057, "grad_norm": 7.857645959514648, "learning_rate": 7.147335423197492e-06, "loss": 1.0441, "step": 228 }, { "epoch": 0.03233549844676645, "grad_norm": 6.590381537664548, "learning_rate": 7.178683385579938e-06, "loss": 0.8702, "step": 229 }, { "epoch": 0.03247670149675233, "grad_norm": 6.309134241317297, "learning_rate": 7.210031347962383e-06, "loss": 0.9362, "step": 230 }, { "epoch": 0.032617904546738206, "grad_norm": 7.5306558948588656, "learning_rate": 7.241379310344828e-06, "loss": 1.1168, "step": 231 }, { "epoch": 0.03275910759672409, "grad_norm": 6.924984359537364, "learning_rate": 7.272727272727273e-06, "loss": 1.0031, "step": 232 }, { "epoch": 0.03290031064670997, "grad_norm": 6.833615445982986, "learning_rate": 7.304075235109719e-06, "loss": 1.003, "step": 233 }, { "epoch": 0.03304151369669585, "grad_norm": 8.320714511863434, "learning_rate": 7.3354231974921645e-06, "loss": 1.1908, "step": 234 }, { "epoch": 0.03318271674668173, "grad_norm": 7.344265285722987, "learning_rate": 7.3667711598746085e-06, "loss": 1.1049, "step": 235 }, { "epoch": 0.03332391979666761, "grad_norm": 6.339242422888241, "learning_rate": 7.398119122257053e-06, "loss": 0.9367, "step": 236 }, { "epoch": 0.03346512284665349, "grad_norm": 7.531043499443148, "learning_rate": 7.429467084639499e-06, "loss": 1.1008, "step": 237 }, { "epoch": 0.033606325896639366, "grad_norm": 6.74515752492174, "learning_rate": 7.460815047021944e-06, "loss": 0.9319, "step": 238 }, { "epoch": 0.03374752894662525, "grad_norm": 6.641173087192396, "learning_rate": 7.49216300940439e-06, "loss": 0.8657, "step": 239 }, { "epoch": 0.033888731996611125, "grad_norm": 7.19435608221455, "learning_rate": 7.5235109717868345e-06, "loss": 1.0358, "step": 240 }, { "epoch": 0.03402993504659701, "grad_norm": 6.740198448819316, "learning_rate": 7.554858934169279e-06, "loss": 1.0419, "step": 241 }, { "epoch": 0.034171138096582884, "grad_norm": 6.899726169941688, "learning_rate": 7.586206896551724e-06, "loss": 1.0432, "step": 242 }, { "epoch": 0.03431234114656877, "grad_norm": 6.238918986390443, "learning_rate": 7.61755485893417e-06, "loss": 0.9841, "step": 243 }, { "epoch": 0.03445354419655464, "grad_norm": 5.395154920458694, "learning_rate": 7.648902821316615e-06, "loss": 0.6961, "step": 244 }, { "epoch": 0.034594747246540526, "grad_norm": 6.1640422907309524, "learning_rate": 7.68025078369906e-06, "loss": 1.0119, "step": 245 }, { "epoch": 0.0347359502965264, "grad_norm": 6.726209308033153, "learning_rate": 7.711598746081506e-06, "loss": 0.9094, "step": 246 }, { "epoch": 0.034877153346512285, "grad_norm": 10.893811698535975, "learning_rate": 7.742946708463952e-06, "loss": 0.9771, "step": 247 }, { "epoch": 0.03501835639649816, "grad_norm": 7.498601486235813, "learning_rate": 7.774294670846396e-06, "loss": 1.1061, "step": 248 }, { "epoch": 0.035159559446484044, "grad_norm": 8.986638701902864, "learning_rate": 7.80564263322884e-06, "loss": 1.3214, "step": 249 }, { "epoch": 0.03530076249646993, "grad_norm": 7.082930061150734, "learning_rate": 7.836990595611285e-06, "loss": 0.8836, "step": 250 }, { "epoch": 0.0354419655464558, "grad_norm": 7.277989450437325, "learning_rate": 7.868338557993731e-06, "loss": 0.9712, "step": 251 }, { "epoch": 0.035583168596441686, "grad_norm": 6.64538298891202, "learning_rate": 7.899686520376177e-06, "loss": 1.0079, "step": 252 }, { "epoch": 0.03572437164642756, "grad_norm": 7.373745682677788, "learning_rate": 7.93103448275862e-06, "loss": 1.0341, "step": 253 }, { "epoch": 0.035865574696413445, "grad_norm": 6.953966508907405, "learning_rate": 7.962382445141067e-06, "loss": 1.0001, "step": 254 }, { "epoch": 0.03600677774639932, "grad_norm": 7.361235977717567, "learning_rate": 7.99373040752351e-06, "loss": 1.1033, "step": 255 }, { "epoch": 0.036147980796385204, "grad_norm": 8.032169384964144, "learning_rate": 8.025078369905956e-06, "loss": 0.9814, "step": 256 }, { "epoch": 0.03628918384637108, "grad_norm": 6.712940649969857, "learning_rate": 8.056426332288402e-06, "loss": 1.0296, "step": 257 }, { "epoch": 0.03643038689635696, "grad_norm": 7.335659618829718, "learning_rate": 8.087774294670848e-06, "loss": 1.1228, "step": 258 }, { "epoch": 0.03657158994634284, "grad_norm": 6.8229346358008165, "learning_rate": 8.119122257053292e-06, "loss": 0.8835, "step": 259 }, { "epoch": 0.03671279299632872, "grad_norm": 6.844637075084648, "learning_rate": 8.150470219435737e-06, "loss": 1.0386, "step": 260 }, { "epoch": 0.0368539960463146, "grad_norm": 7.701163144197415, "learning_rate": 8.181818181818183e-06, "loss": 1.0405, "step": 261 }, { "epoch": 0.03699519909630048, "grad_norm": 7.463688175051193, "learning_rate": 8.213166144200627e-06, "loss": 0.974, "step": 262 }, { "epoch": 0.03713640214628636, "grad_norm": 6.211074177188676, "learning_rate": 8.244514106583073e-06, "loss": 0.9123, "step": 263 }, { "epoch": 0.03727760519627224, "grad_norm": 6.364075707460534, "learning_rate": 8.275862068965518e-06, "loss": 0.9195, "step": 264 }, { "epoch": 0.037418808246258116, "grad_norm": 6.426994151994939, "learning_rate": 8.307210031347962e-06, "loss": 1.0276, "step": 265 }, { "epoch": 0.037560011296244, "grad_norm": 5.918745798154022, "learning_rate": 8.338557993730408e-06, "loss": 1.0192, "step": 266 }, { "epoch": 0.03770121434622988, "grad_norm": 5.97185206372574, "learning_rate": 8.369905956112854e-06, "loss": 0.9448, "step": 267 }, { "epoch": 0.03784241739621576, "grad_norm": 6.8096930751921105, "learning_rate": 8.4012539184953e-06, "loss": 1.1044, "step": 268 }, { "epoch": 0.03798362044620164, "grad_norm": 6.4786220813846125, "learning_rate": 8.432601880877743e-06, "loss": 1.0572, "step": 269 }, { "epoch": 0.03812482349618752, "grad_norm": 6.36567265613983, "learning_rate": 8.463949843260189e-06, "loss": 1.1283, "step": 270 }, { "epoch": 0.0382660265461734, "grad_norm": 7.441630181439689, "learning_rate": 8.495297805642633e-06, "loss": 1.044, "step": 271 }, { "epoch": 0.038407229596159276, "grad_norm": 7.031370991366347, "learning_rate": 8.526645768025079e-06, "loss": 1.0039, "step": 272 }, { "epoch": 0.03854843264614516, "grad_norm": 7.535364706946894, "learning_rate": 8.557993730407524e-06, "loss": 1.1076, "step": 273 }, { "epoch": 0.038689635696131035, "grad_norm": 6.624210020710945, "learning_rate": 8.58934169278997e-06, "loss": 1.0522, "step": 274 }, { "epoch": 0.03883083874611692, "grad_norm": 7.196509976708002, "learning_rate": 8.620689655172414e-06, "loss": 1.027, "step": 275 }, { "epoch": 0.038972041796102794, "grad_norm": 7.8080749802315745, "learning_rate": 8.65203761755486e-06, "loss": 1.1557, "step": 276 }, { "epoch": 0.03911324484608868, "grad_norm": 5.386038111297556, "learning_rate": 8.683385579937304e-06, "loss": 0.8177, "step": 277 }, { "epoch": 0.03925444789607455, "grad_norm": 6.319860957540828, "learning_rate": 8.71473354231975e-06, "loss": 0.9943, "step": 278 }, { "epoch": 0.039395650946060436, "grad_norm": 6.410320102028986, "learning_rate": 8.746081504702195e-06, "loss": 0.9559, "step": 279 }, { "epoch": 0.03953685399604631, "grad_norm": 7.347415880072892, "learning_rate": 8.777429467084641e-06, "loss": 1.2127, "step": 280 }, { "epoch": 0.039678057046032195, "grad_norm": 6.731832307677044, "learning_rate": 8.808777429467087e-06, "loss": 0.9603, "step": 281 }, { "epoch": 0.03981926009601807, "grad_norm": 6.665964602881954, "learning_rate": 8.84012539184953e-06, "loss": 0.9668, "step": 282 }, { "epoch": 0.039960463146003954, "grad_norm": 6.862446246774935, "learning_rate": 8.871473354231975e-06, "loss": 0.8228, "step": 283 }, { "epoch": 0.04010166619598984, "grad_norm": 6.939353471259851, "learning_rate": 8.90282131661442e-06, "loss": 1.0398, "step": 284 }, { "epoch": 0.04024286924597571, "grad_norm": 5.509553426301109, "learning_rate": 8.934169278996866e-06, "loss": 0.8675, "step": 285 }, { "epoch": 0.040384072295961596, "grad_norm": 6.45388308838189, "learning_rate": 8.965517241379312e-06, "loss": 0.9256, "step": 286 }, { "epoch": 0.04052527534594747, "grad_norm": 8.349517078867416, "learning_rate": 8.996865203761757e-06, "loss": 1.1676, "step": 287 }, { "epoch": 0.040666478395933355, "grad_norm": 6.526353795675566, "learning_rate": 9.028213166144201e-06, "loss": 1.1668, "step": 288 }, { "epoch": 0.04080768144591923, "grad_norm": 6.645493517145117, "learning_rate": 9.059561128526645e-06, "loss": 1.1798, "step": 289 }, { "epoch": 0.040948884495905113, "grad_norm": 7.838148558461977, "learning_rate": 9.090909090909091e-06, "loss": 1.3024, "step": 290 }, { "epoch": 0.04109008754589099, "grad_norm": 6.651623417219865, "learning_rate": 9.122257053291537e-06, "loss": 1.1004, "step": 291 }, { "epoch": 0.04123129059587687, "grad_norm": 7.507640499972883, "learning_rate": 9.153605015673982e-06, "loss": 1.2961, "step": 292 }, { "epoch": 0.04137249364586275, "grad_norm": 8.060457966536285, "learning_rate": 9.184952978056428e-06, "loss": 1.0851, "step": 293 }, { "epoch": 0.04151369669584863, "grad_norm": 7.133169520998299, "learning_rate": 9.216300940438872e-06, "loss": 1.2597, "step": 294 }, { "epoch": 0.04165489974583451, "grad_norm": 7.374011658051953, "learning_rate": 9.247648902821318e-06, "loss": 1.0425, "step": 295 }, { "epoch": 0.04179610279582039, "grad_norm": 6.863258233996512, "learning_rate": 9.278996865203762e-06, "loss": 1.0537, "step": 296 }, { "epoch": 0.041937305845806266, "grad_norm": 6.645888377260501, "learning_rate": 9.310344827586207e-06, "loss": 1.2074, "step": 297 }, { "epoch": 0.04207850889579215, "grad_norm": 6.436893265619479, "learning_rate": 9.341692789968653e-06, "loss": 1.022, "step": 298 }, { "epoch": 0.042219711945778025, "grad_norm": 5.918291582500843, "learning_rate": 9.373040752351097e-06, "loss": 1.0726, "step": 299 }, { "epoch": 0.04236091499576391, "grad_norm": 7.9883766871730435, "learning_rate": 9.404388714733543e-06, "loss": 1.2079, "step": 300 }, { "epoch": 0.04250211804574979, "grad_norm": 6.742810188796365, "learning_rate": 9.435736677115989e-06, "loss": 1.0749, "step": 301 }, { "epoch": 0.04264332109573567, "grad_norm": 6.412702085699694, "learning_rate": 9.467084639498434e-06, "loss": 0.9456, "step": 302 }, { "epoch": 0.04278452414572155, "grad_norm": 6.356434035310211, "learning_rate": 9.498432601880878e-06, "loss": 1.0771, "step": 303 }, { "epoch": 0.042925727195707426, "grad_norm": 7.066962668758025, "learning_rate": 9.529780564263324e-06, "loss": 0.9493, "step": 304 }, { "epoch": 0.04306693024569331, "grad_norm": 8.20812618445608, "learning_rate": 9.561128526645768e-06, "loss": 0.9852, "step": 305 }, { "epoch": 0.043208133295679185, "grad_norm": 7.16284184553327, "learning_rate": 9.592476489028214e-06, "loss": 1.1978, "step": 306 }, { "epoch": 0.04334933634566507, "grad_norm": 6.587795861008629, "learning_rate": 9.62382445141066e-06, "loss": 1.0712, "step": 307 }, { "epoch": 0.043490539395650944, "grad_norm": 7.2249488180463315, "learning_rate": 9.655172413793105e-06, "loss": 1.1, "step": 308 }, { "epoch": 0.04363174244563683, "grad_norm": 6.432077592343574, "learning_rate": 9.686520376175549e-06, "loss": 1.0193, "step": 309 }, { "epoch": 0.0437729454956227, "grad_norm": 6.968885477214826, "learning_rate": 9.717868338557995e-06, "loss": 1.0749, "step": 310 }, { "epoch": 0.043914148545608586, "grad_norm": 6.447244929165667, "learning_rate": 9.749216300940439e-06, "loss": 0.9041, "step": 311 }, { "epoch": 0.04405535159559446, "grad_norm": 8.16839291130948, "learning_rate": 9.780564263322884e-06, "loss": 1.3465, "step": 312 }, { "epoch": 0.044196554645580345, "grad_norm": 7.098880356443508, "learning_rate": 9.81191222570533e-06, "loss": 1.0409, "step": 313 }, { "epoch": 0.04433775769556622, "grad_norm": 6.141462288488842, "learning_rate": 9.843260188087776e-06, "loss": 1.013, "step": 314 }, { "epoch": 0.044478960745552104, "grad_norm": 6.207098948110343, "learning_rate": 9.874608150470221e-06, "loss": 1.0002, "step": 315 }, { "epoch": 0.04462016379553799, "grad_norm": 6.130320763460261, "learning_rate": 9.905956112852665e-06, "loss": 1.0626, "step": 316 }, { "epoch": 0.04476136684552386, "grad_norm": 6.381605388589148, "learning_rate": 9.93730407523511e-06, "loss": 1.1705, "step": 317 }, { "epoch": 0.044902569895509746, "grad_norm": 6.270972063913611, "learning_rate": 9.968652037617555e-06, "loss": 0.9597, "step": 318 }, { "epoch": 0.04504377294549562, "grad_norm": 6.019869352545629, "learning_rate": 1e-05, "loss": 0.9363, "step": 319 }, { "epoch": 0.045184975995481505, "grad_norm": 6.901333039486993, "learning_rate": 1.0031347962382445e-05, "loss": 1.0765, "step": 320 }, { "epoch": 0.04532617904546738, "grad_norm": 6.751245381913681, "learning_rate": 1.0062695924764892e-05, "loss": 0.969, "step": 321 }, { "epoch": 0.045467382095453264, "grad_norm": 6.551280548846206, "learning_rate": 1.0094043887147336e-05, "loss": 0.9448, "step": 322 }, { "epoch": 0.04560858514543914, "grad_norm": 8.199253942789689, "learning_rate": 1.0125391849529782e-05, "loss": 0.9585, "step": 323 }, { "epoch": 0.04574978819542502, "grad_norm": 6.160985973356477, "learning_rate": 1.0156739811912226e-05, "loss": 0.8928, "step": 324 }, { "epoch": 0.0458909912454109, "grad_norm": 6.475959976778824, "learning_rate": 1.0188087774294673e-05, "loss": 1.0992, "step": 325 }, { "epoch": 0.04603219429539678, "grad_norm": 6.151307243678867, "learning_rate": 1.0219435736677117e-05, "loss": 0.9654, "step": 326 }, { "epoch": 0.04617339734538266, "grad_norm": 6.240444356304697, "learning_rate": 1.0250783699059561e-05, "loss": 1.092, "step": 327 }, { "epoch": 0.04631460039536854, "grad_norm": 5.067191185910136, "learning_rate": 1.0282131661442007e-05, "loss": 0.8948, "step": 328 }, { "epoch": 0.04645580344535442, "grad_norm": 8.138115119156627, "learning_rate": 1.0313479623824451e-05, "loss": 1.2203, "step": 329 }, { "epoch": 0.0465970064953403, "grad_norm": 5.976453536420087, "learning_rate": 1.0344827586206898e-05, "loss": 0.8202, "step": 330 }, { "epoch": 0.046738209545326176, "grad_norm": 7.3463537451702905, "learning_rate": 1.0376175548589342e-05, "loss": 1.1936, "step": 331 }, { "epoch": 0.04687941259531206, "grad_norm": 6.617975055069619, "learning_rate": 1.0407523510971788e-05, "loss": 0.9834, "step": 332 }, { "epoch": 0.04702061564529794, "grad_norm": 6.006700710428823, "learning_rate": 1.0438871473354234e-05, "loss": 1.1119, "step": 333 }, { "epoch": 0.04716181869528382, "grad_norm": 5.617955004157817, "learning_rate": 1.0470219435736678e-05, "loss": 0.8598, "step": 334 }, { "epoch": 0.0473030217452697, "grad_norm": 5.743124385312946, "learning_rate": 1.0501567398119123e-05, "loss": 0.9658, "step": 335 }, { "epoch": 0.04744422479525558, "grad_norm": 6.73849235202559, "learning_rate": 1.0532915360501567e-05, "loss": 1.0877, "step": 336 }, { "epoch": 0.04758542784524146, "grad_norm": 7.000001727656123, "learning_rate": 1.0564263322884015e-05, "loss": 1.0504, "step": 337 }, { "epoch": 0.047726630895227336, "grad_norm": 6.495995948036809, "learning_rate": 1.0595611285266459e-05, "loss": 1.02, "step": 338 }, { "epoch": 0.04786783394521322, "grad_norm": 6.361802727831078, "learning_rate": 1.0626959247648904e-05, "loss": 1.1229, "step": 339 }, { "epoch": 0.048009036995199095, "grad_norm": 6.071181411988096, "learning_rate": 1.0658307210031348e-05, "loss": 0.8984, "step": 340 }, { "epoch": 0.04815024004518498, "grad_norm": 6.487582735086764, "learning_rate": 1.0689655172413792e-05, "loss": 0.977, "step": 341 }, { "epoch": 0.048291443095170854, "grad_norm": 6.017610844470211, "learning_rate": 1.072100313479624e-05, "loss": 1.0152, "step": 342 }, { "epoch": 0.04843264614515674, "grad_norm": 6.50504700821475, "learning_rate": 1.0752351097178684e-05, "loss": 0.9227, "step": 343 }, { "epoch": 0.04857384919514261, "grad_norm": 6.164255154735449, "learning_rate": 1.078369905956113e-05, "loss": 1.0049, "step": 344 }, { "epoch": 0.048715052245128496, "grad_norm": 8.051149001280118, "learning_rate": 1.0815047021943574e-05, "loss": 1.188, "step": 345 }, { "epoch": 0.04885625529511437, "grad_norm": 7.261561894113257, "learning_rate": 1.0846394984326021e-05, "loss": 1.0836, "step": 346 }, { "epoch": 0.048997458345100255, "grad_norm": 6.867974412226677, "learning_rate": 1.0877742946708465e-05, "loss": 1.0593, "step": 347 }, { "epoch": 0.04913866139508613, "grad_norm": 7.17318256473674, "learning_rate": 1.0909090909090909e-05, "loss": 1.1374, "step": 348 }, { "epoch": 0.049279864445072014, "grad_norm": 5.813809558691895, "learning_rate": 1.0940438871473356e-05, "loss": 0.8977, "step": 349 }, { "epoch": 0.0494210674950579, "grad_norm": 6.555088464855176, "learning_rate": 1.09717868338558e-05, "loss": 0.971, "step": 350 }, { "epoch": 0.04956227054504377, "grad_norm": 7.39669928960688, "learning_rate": 1.1003134796238246e-05, "loss": 1.4236, "step": 351 }, { "epoch": 0.049703473595029656, "grad_norm": 6.984526566441196, "learning_rate": 1.103448275862069e-05, "loss": 1.104, "step": 352 }, { "epoch": 0.04984467664501553, "grad_norm": 7.007940824714307, "learning_rate": 1.1065830721003134e-05, "loss": 1.0743, "step": 353 }, { "epoch": 0.049985879695001414, "grad_norm": 6.64849902599171, "learning_rate": 1.1097178683385581e-05, "loss": 1.1107, "step": 354 }, { "epoch": 0.05012708274498729, "grad_norm": 6.141976025475596, "learning_rate": 1.1128526645768025e-05, "loss": 1.0355, "step": 355 }, { "epoch": 0.05026828579497317, "grad_norm": 5.491538837751872, "learning_rate": 1.1159874608150471e-05, "loss": 0.9341, "step": 356 }, { "epoch": 0.05040948884495905, "grad_norm": 5.722767102016528, "learning_rate": 1.1191222570532915e-05, "loss": 0.9994, "step": 357 }, { "epoch": 0.05055069189494493, "grad_norm": 6.631829679544342, "learning_rate": 1.1222570532915362e-05, "loss": 1.0061, "step": 358 }, { "epoch": 0.05069189494493081, "grad_norm": 7.281886895858746, "learning_rate": 1.1253918495297806e-05, "loss": 1.0628, "step": 359 }, { "epoch": 0.05083309799491669, "grad_norm": 7.933782928157221, "learning_rate": 1.128526645768025e-05, "loss": 1.0381, "step": 360 }, { "epoch": 0.05097430104490257, "grad_norm": 6.576475147230626, "learning_rate": 1.1316614420062698e-05, "loss": 1.0814, "step": 361 }, { "epoch": 0.05111550409488845, "grad_norm": 6.209802222934887, "learning_rate": 1.1347962382445142e-05, "loss": 0.9795, "step": 362 }, { "epoch": 0.051256707144874326, "grad_norm": 7.786458666601698, "learning_rate": 1.1379310344827587e-05, "loss": 1.1773, "step": 363 }, { "epoch": 0.05139791019486021, "grad_norm": 5.574790343903885, "learning_rate": 1.1410658307210031e-05, "loss": 1.1332, "step": 364 }, { "epoch": 0.051539113244846085, "grad_norm": 6.098493060813073, "learning_rate": 1.1442006269592479e-05, "loss": 1.0307, "step": 365 }, { "epoch": 0.05168031629483197, "grad_norm": 6.989261097373715, "learning_rate": 1.1473354231974923e-05, "loss": 0.9769, "step": 366 }, { "epoch": 0.05182151934481785, "grad_norm": 5.826928191078909, "learning_rate": 1.1504702194357367e-05, "loss": 0.9813, "step": 367 }, { "epoch": 0.05196272239480373, "grad_norm": 7.203358236222731, "learning_rate": 1.1536050156739813e-05, "loss": 1.1263, "step": 368 }, { "epoch": 0.05210392544478961, "grad_norm": 6.24780742125562, "learning_rate": 1.1567398119122257e-05, "loss": 1.1283, "step": 369 }, { "epoch": 0.052245128494775486, "grad_norm": 6.048084193381313, "learning_rate": 1.1598746081504704e-05, "loss": 1.2, "step": 370 }, { "epoch": 0.05238633154476137, "grad_norm": 6.511947459120634, "learning_rate": 1.1630094043887148e-05, "loss": 1.0383, "step": 371 }, { "epoch": 0.052527534594747245, "grad_norm": 5.908367494278469, "learning_rate": 1.1661442006269594e-05, "loss": 0.9815, "step": 372 }, { "epoch": 0.05266873764473313, "grad_norm": 6.322198148637384, "learning_rate": 1.169278996865204e-05, "loss": 0.9974, "step": 373 }, { "epoch": 0.052809940694719004, "grad_norm": 6.423055168612659, "learning_rate": 1.1724137931034483e-05, "loss": 1.0798, "step": 374 }, { "epoch": 0.05295114374470489, "grad_norm": 6.0639015615665315, "learning_rate": 1.1755485893416929e-05, "loss": 0.9342, "step": 375 }, { "epoch": 0.05309234679469076, "grad_norm": 6.8992590351614655, "learning_rate": 1.1786833855799373e-05, "loss": 0.9345, "step": 376 }, { "epoch": 0.053233549844676646, "grad_norm": 5.873807882193555, "learning_rate": 1.181818181818182e-05, "loss": 1.0018, "step": 377 }, { "epoch": 0.05337475289466252, "grad_norm": 6.609613070967434, "learning_rate": 1.1849529780564264e-05, "loss": 1.284, "step": 378 }, { "epoch": 0.053515955944648405, "grad_norm": 5.698838549028839, "learning_rate": 1.188087774294671e-05, "loss": 0.9497, "step": 379 }, { "epoch": 0.05365715899463428, "grad_norm": 6.072739066698938, "learning_rate": 1.1912225705329154e-05, "loss": 1.0029, "step": 380 }, { "epoch": 0.053798362044620164, "grad_norm": 6.588778790896909, "learning_rate": 1.1943573667711598e-05, "loss": 1.1751, "step": 381 }, { "epoch": 0.05393956509460604, "grad_norm": 6.118528728411645, "learning_rate": 1.1974921630094045e-05, "loss": 1.1642, "step": 382 }, { "epoch": 0.05408076814459192, "grad_norm": 6.252547973893893, "learning_rate": 1.200626959247649e-05, "loss": 1.1578, "step": 383 }, { "epoch": 0.054221971194577806, "grad_norm": 6.535542859953819, "learning_rate": 1.2037617554858935e-05, "loss": 1.2021, "step": 384 }, { "epoch": 0.05436317424456368, "grad_norm": 6.710044909907913, "learning_rate": 1.206896551724138e-05, "loss": 1.2086, "step": 385 }, { "epoch": 0.054504377294549565, "grad_norm": 6.81495157343338, "learning_rate": 1.2100313479623827e-05, "loss": 1.0907, "step": 386 }, { "epoch": 0.05464558034453544, "grad_norm": 5.634081917231528, "learning_rate": 1.213166144200627e-05, "loss": 0.9641, "step": 387 }, { "epoch": 0.054786783394521324, "grad_norm": 6.170800752005018, "learning_rate": 1.2163009404388715e-05, "loss": 1.0229, "step": 388 }, { "epoch": 0.0549279864445072, "grad_norm": 5.595215697006681, "learning_rate": 1.2194357366771162e-05, "loss": 0.9963, "step": 389 }, { "epoch": 0.05506918949449308, "grad_norm": 7.311732811142929, "learning_rate": 1.2225705329153606e-05, "loss": 1.0786, "step": 390 }, { "epoch": 0.05521039254447896, "grad_norm": 6.0555475349761485, "learning_rate": 1.2257053291536052e-05, "loss": 0.8199, "step": 391 }, { "epoch": 0.05535159559446484, "grad_norm": 7.586531151959653, "learning_rate": 1.2288401253918496e-05, "loss": 1.1295, "step": 392 }, { "epoch": 0.05549279864445072, "grad_norm": 8.536236638622649, "learning_rate": 1.2319749216300943e-05, "loss": 0.9228, "step": 393 }, { "epoch": 0.0556340016944366, "grad_norm": 7.023745557804657, "learning_rate": 1.2351097178683387e-05, "loss": 1.0789, "step": 394 }, { "epoch": 0.05577520474442248, "grad_norm": 5.884896707857233, "learning_rate": 1.2382445141065831e-05, "loss": 1.0251, "step": 395 }, { "epoch": 0.05591640779440836, "grad_norm": 5.754018586417228, "learning_rate": 1.2413793103448277e-05, "loss": 1.0936, "step": 396 }, { "epoch": 0.056057610844394236, "grad_norm": 6.08443834146453, "learning_rate": 1.244514106583072e-05, "loss": 1.0688, "step": 397 }, { "epoch": 0.05619881389438012, "grad_norm": 8.164243328699555, "learning_rate": 1.2476489028213168e-05, "loss": 1.0192, "step": 398 }, { "epoch": 0.056340016944366, "grad_norm": 6.383066440696785, "learning_rate": 1.2507836990595612e-05, "loss": 1.18, "step": 399 }, { "epoch": 0.05648121999435188, "grad_norm": 5.732033358217053, "learning_rate": 1.2539184952978058e-05, "loss": 1.0299, "step": 400 }, { "epoch": 0.05662242304433776, "grad_norm": 5.5882468073872476, "learning_rate": 1.2570532915360503e-05, "loss": 1.0071, "step": 401 }, { "epoch": 0.05676362609432364, "grad_norm": 6.451365586740487, "learning_rate": 1.2601880877742947e-05, "loss": 1.0087, "step": 402 }, { "epoch": 0.05690482914430952, "grad_norm": 6.328321035610544, "learning_rate": 1.2633228840125393e-05, "loss": 1.1824, "step": 403 }, { "epoch": 0.057046032194295396, "grad_norm": 5.778553569926996, "learning_rate": 1.2664576802507837e-05, "loss": 1.0006, "step": 404 }, { "epoch": 0.05718723524428128, "grad_norm": 6.161754346703109, "learning_rate": 1.2695924764890284e-05, "loss": 1.055, "step": 405 }, { "epoch": 0.057328438294267155, "grad_norm": 6.4316646835760185, "learning_rate": 1.2727272727272728e-05, "loss": 1.1508, "step": 406 }, { "epoch": 0.05746964134425304, "grad_norm": 6.464118752577883, "learning_rate": 1.2758620689655174e-05, "loss": 1.1197, "step": 407 }, { "epoch": 0.057610844394238914, "grad_norm": 6.230579768015557, "learning_rate": 1.2789968652037618e-05, "loss": 1.0087, "step": 408 }, { "epoch": 0.0577520474442248, "grad_norm": 6.3455758299734395, "learning_rate": 1.2821316614420062e-05, "loss": 1.0038, "step": 409 }, { "epoch": 0.05789325049421067, "grad_norm": 6.869550755127195, "learning_rate": 1.285266457680251e-05, "loss": 1.0798, "step": 410 }, { "epoch": 0.058034453544196556, "grad_norm": 7.5630217101904105, "learning_rate": 1.2884012539184954e-05, "loss": 1.2505, "step": 411 }, { "epoch": 0.05817565659418243, "grad_norm": 6.154565001301907, "learning_rate": 1.29153605015674e-05, "loss": 0.9161, "step": 412 }, { "epoch": 0.058316859644168315, "grad_norm": 6.421016396089281, "learning_rate": 1.2946708463949845e-05, "loss": 1.172, "step": 413 }, { "epoch": 0.05845806269415419, "grad_norm": 7.237135973611175, "learning_rate": 1.297805642633229e-05, "loss": 1.2409, "step": 414 }, { "epoch": 0.058599265744140074, "grad_norm": 6.964626385298887, "learning_rate": 1.3009404388714735e-05, "loss": 1.2627, "step": 415 }, { "epoch": 0.058740468794125957, "grad_norm": 6.130986684964869, "learning_rate": 1.3040752351097179e-05, "loss": 0.8719, "step": 416 }, { "epoch": 0.05888167184411183, "grad_norm": 6.4891033533190035, "learning_rate": 1.3072100313479626e-05, "loss": 1.1481, "step": 417 }, { "epoch": 0.059022874894097715, "grad_norm": 6.261390374690267, "learning_rate": 1.310344827586207e-05, "loss": 0.9482, "step": 418 }, { "epoch": 0.05916407794408359, "grad_norm": 7.39329935205469, "learning_rate": 1.3134796238244516e-05, "loss": 1.0942, "step": 419 }, { "epoch": 0.059305280994069474, "grad_norm": 6.324103294687855, "learning_rate": 1.316614420062696e-05, "loss": 1.0391, "step": 420 }, { "epoch": 0.05944648404405535, "grad_norm": 6.788249018384576, "learning_rate": 1.3197492163009404e-05, "loss": 1.1609, "step": 421 }, { "epoch": 0.05958768709404123, "grad_norm": 6.571402566366432, "learning_rate": 1.3228840125391851e-05, "loss": 1.3202, "step": 422 }, { "epoch": 0.05972889014402711, "grad_norm": 5.9262192112467345, "learning_rate": 1.3260188087774295e-05, "loss": 1.0198, "step": 423 }, { "epoch": 0.05987009319401299, "grad_norm": 5.935144328380646, "learning_rate": 1.329153605015674e-05, "loss": 1.083, "step": 424 }, { "epoch": 0.06001129624399887, "grad_norm": 6.09784368949035, "learning_rate": 1.3322884012539186e-05, "loss": 1.2458, "step": 425 }, { "epoch": 0.06015249929398475, "grad_norm": 6.244017748538626, "learning_rate": 1.3354231974921632e-05, "loss": 1.1666, "step": 426 }, { "epoch": 0.06029370234397063, "grad_norm": 6.061248209200218, "learning_rate": 1.3385579937304076e-05, "loss": 0.9919, "step": 427 }, { "epoch": 0.06043490539395651, "grad_norm": 6.296866126158882, "learning_rate": 1.341692789968652e-05, "loss": 1.1745, "step": 428 }, { "epoch": 0.060576108443942386, "grad_norm": 6.445561375865732, "learning_rate": 1.3448275862068967e-05, "loss": 0.9518, "step": 429 }, { "epoch": 0.06071731149392827, "grad_norm": 7.230463456975804, "learning_rate": 1.3479623824451411e-05, "loss": 1.1376, "step": 430 }, { "epoch": 0.060858514543914145, "grad_norm": 6.959747448277786, "learning_rate": 1.3510971786833857e-05, "loss": 1.0553, "step": 431 }, { "epoch": 0.06099971759390003, "grad_norm": 6.595465120612675, "learning_rate": 1.3542319749216301e-05, "loss": 1.073, "step": 432 }, { "epoch": 0.06114092064388591, "grad_norm": 5.383404595788152, "learning_rate": 1.3573667711598749e-05, "loss": 0.885, "step": 433 }, { "epoch": 0.06128212369387179, "grad_norm": 6.119023697119455, "learning_rate": 1.3605015673981193e-05, "loss": 1.3059, "step": 434 }, { "epoch": 0.06142332674385767, "grad_norm": 6.617801794193726, "learning_rate": 1.3636363636363637e-05, "loss": 1.2297, "step": 435 }, { "epoch": 0.061564529793843546, "grad_norm": 5.7943172628970565, "learning_rate": 1.3667711598746082e-05, "loss": 1.0181, "step": 436 }, { "epoch": 0.06170573284382943, "grad_norm": 6.179960661932418, "learning_rate": 1.3699059561128526e-05, "loss": 0.9769, "step": 437 }, { "epoch": 0.061846935893815305, "grad_norm": 6.412368905043698, "learning_rate": 1.3730407523510974e-05, "loss": 0.9235, "step": 438 }, { "epoch": 0.06198813894380119, "grad_norm": 5.756054839510773, "learning_rate": 1.3761755485893418e-05, "loss": 1.0357, "step": 439 }, { "epoch": 0.062129341993787064, "grad_norm": 5.4813663413032385, "learning_rate": 1.3793103448275863e-05, "loss": 0.8484, "step": 440 }, { "epoch": 0.06227054504377295, "grad_norm": 6.965157066220166, "learning_rate": 1.3824451410658309e-05, "loss": 1.0424, "step": 441 }, { "epoch": 0.06241174809375882, "grad_norm": 5.644234255718042, "learning_rate": 1.3855799373040753e-05, "loss": 1.0126, "step": 442 }, { "epoch": 0.0625529511437447, "grad_norm": 6.035212831918365, "learning_rate": 1.3887147335423199e-05, "loss": 1.0505, "step": 443 }, { "epoch": 0.06269415419373059, "grad_norm": 7.256280102345634, "learning_rate": 1.3918495297805643e-05, "loss": 1.1926, "step": 444 }, { "epoch": 0.06283535724371646, "grad_norm": 6.182333192512244, "learning_rate": 1.394984326018809e-05, "loss": 1.1631, "step": 445 }, { "epoch": 0.06297656029370234, "grad_norm": 5.800825645136416, "learning_rate": 1.3981191222570534e-05, "loss": 0.9231, "step": 446 }, { "epoch": 0.06311776334368822, "grad_norm": 6.549637896102323, "learning_rate": 1.401253918495298e-05, "loss": 1.1561, "step": 447 }, { "epoch": 0.06325896639367411, "grad_norm": 6.17637429100724, "learning_rate": 1.4043887147335424e-05, "loss": 1.1849, "step": 448 }, { "epoch": 0.06340016944365999, "grad_norm": 7.240368391150512, "learning_rate": 1.4075235109717868e-05, "loss": 1.1659, "step": 449 }, { "epoch": 0.06354137249364586, "grad_norm": 5.807241012418055, "learning_rate": 1.4106583072100315e-05, "loss": 1.0264, "step": 450 }, { "epoch": 0.06368257554363174, "grad_norm": 6.71173316611635, "learning_rate": 1.4137931034482759e-05, "loss": 1.1706, "step": 451 }, { "epoch": 0.06382377859361762, "grad_norm": 6.169610363982729, "learning_rate": 1.4169278996865205e-05, "loss": 1.1525, "step": 452 }, { "epoch": 0.06396498164360351, "grad_norm": 6.67150414627448, "learning_rate": 1.420062695924765e-05, "loss": 1.2384, "step": 453 }, { "epoch": 0.06410618469358938, "grad_norm": 6.919929841468454, "learning_rate": 1.4231974921630096e-05, "loss": 1.3341, "step": 454 }, { "epoch": 0.06424738774357526, "grad_norm": 5.595472676314099, "learning_rate": 1.426332288401254e-05, "loss": 0.9739, "step": 455 }, { "epoch": 0.06438859079356114, "grad_norm": 7.341705285146319, "learning_rate": 1.4294670846394984e-05, "loss": 1.1755, "step": 456 }, { "epoch": 0.06452979384354703, "grad_norm": 6.607899093283406, "learning_rate": 1.4326018808777432e-05, "loss": 1.1249, "step": 457 }, { "epoch": 0.0646709968935329, "grad_norm": 6.205452324610187, "learning_rate": 1.4357366771159876e-05, "loss": 1.1244, "step": 458 }, { "epoch": 0.06481219994351878, "grad_norm": 6.422620522724941, "learning_rate": 1.4388714733542321e-05, "loss": 1.0215, "step": 459 }, { "epoch": 0.06495340299350466, "grad_norm": 5.513568377439427, "learning_rate": 1.4420062695924765e-05, "loss": 1.0523, "step": 460 }, { "epoch": 0.06509460604349054, "grad_norm": 6.863710439073776, "learning_rate": 1.4451410658307213e-05, "loss": 1.0948, "step": 461 }, { "epoch": 0.06523580909347641, "grad_norm": 6.466814014679621, "learning_rate": 1.4482758620689657e-05, "loss": 1.2171, "step": 462 }, { "epoch": 0.0653770121434623, "grad_norm": 6.571709729956653, "learning_rate": 1.45141065830721e-05, "loss": 1.1121, "step": 463 }, { "epoch": 0.06551821519344818, "grad_norm": 7.45591231216958, "learning_rate": 1.4545454545454546e-05, "loss": 1.1951, "step": 464 }, { "epoch": 0.06565941824343406, "grad_norm": 6.460185508560586, "learning_rate": 1.4576802507836992e-05, "loss": 1.2531, "step": 465 }, { "epoch": 0.06580062129341994, "grad_norm": 5.472168217526706, "learning_rate": 1.4608150470219438e-05, "loss": 0.9932, "step": 466 }, { "epoch": 0.06594182434340581, "grad_norm": 5.609935378754374, "learning_rate": 1.4639498432601882e-05, "loss": 1.0227, "step": 467 }, { "epoch": 0.0660830273933917, "grad_norm": 5.856835217316755, "learning_rate": 1.4670846394984329e-05, "loss": 1.0027, "step": 468 }, { "epoch": 0.06622423044337758, "grad_norm": 6.897239918818994, "learning_rate": 1.4702194357366773e-05, "loss": 1.0837, "step": 469 }, { "epoch": 0.06636543349336346, "grad_norm": 5.075493341272089, "learning_rate": 1.4733542319749217e-05, "loss": 1.0563, "step": 470 }, { "epoch": 0.06650663654334933, "grad_norm": 6.487047601012693, "learning_rate": 1.4764890282131663e-05, "loss": 1.164, "step": 471 }, { "epoch": 0.06664783959333521, "grad_norm": 5.561060553990291, "learning_rate": 1.4796238244514107e-05, "loss": 0.9517, "step": 472 }, { "epoch": 0.0667890426433211, "grad_norm": 5.306478619977663, "learning_rate": 1.4827586206896554e-05, "loss": 0.9706, "step": 473 }, { "epoch": 0.06693024569330698, "grad_norm": 5.545504602796221, "learning_rate": 1.4858934169278998e-05, "loss": 1.0224, "step": 474 }, { "epoch": 0.06707144874329285, "grad_norm": 6.096402444223812, "learning_rate": 1.4890282131661444e-05, "loss": 1.2514, "step": 475 }, { "epoch": 0.06721265179327873, "grad_norm": 7.044657635371257, "learning_rate": 1.4921630094043888e-05, "loss": 1.3064, "step": 476 }, { "epoch": 0.06735385484326462, "grad_norm": 6.974941097082132, "learning_rate": 1.4952978056426334e-05, "loss": 1.1552, "step": 477 }, { "epoch": 0.0674950578932505, "grad_norm": 5.518244664747658, "learning_rate": 1.498432601880878e-05, "loss": 1.0333, "step": 478 }, { "epoch": 0.06763626094323637, "grad_norm": 5.705166168869933, "learning_rate": 1.5015673981191223e-05, "loss": 0.9881, "step": 479 }, { "epoch": 0.06777746399322225, "grad_norm": 6.733230778134236, "learning_rate": 1.5047021943573669e-05, "loss": 1.2252, "step": 480 }, { "epoch": 0.06791866704320813, "grad_norm": 7.318963337402553, "learning_rate": 1.5078369905956115e-05, "loss": 0.9486, "step": 481 }, { "epoch": 0.06805987009319402, "grad_norm": 5.71036970305862, "learning_rate": 1.5109717868338559e-05, "loss": 0.8924, "step": 482 }, { "epoch": 0.0682010731431799, "grad_norm": 6.447491605316806, "learning_rate": 1.5141065830721004e-05, "loss": 1.0869, "step": 483 }, { "epoch": 0.06834227619316577, "grad_norm": 6.5559537592079336, "learning_rate": 1.5172413793103448e-05, "loss": 1.2647, "step": 484 }, { "epoch": 0.06848347924315165, "grad_norm": 6.252401468409138, "learning_rate": 1.5203761755485896e-05, "loss": 0.9774, "step": 485 }, { "epoch": 0.06862468229313753, "grad_norm": 5.927837383671542, "learning_rate": 1.523510971786834e-05, "loss": 0.9423, "step": 486 }, { "epoch": 0.06876588534312342, "grad_norm": 5.324097854415276, "learning_rate": 1.5266457680250787e-05, "loss": 0.9911, "step": 487 }, { "epoch": 0.06890708839310929, "grad_norm": 5.753233975849189, "learning_rate": 1.529780564263323e-05, "loss": 0.9426, "step": 488 }, { "epoch": 0.06904829144309517, "grad_norm": 6.750991687413456, "learning_rate": 1.5329153605015675e-05, "loss": 1.0388, "step": 489 }, { "epoch": 0.06918949449308105, "grad_norm": 6.182367507382677, "learning_rate": 1.536050156739812e-05, "loss": 0.9484, "step": 490 }, { "epoch": 0.06933069754306694, "grad_norm": 5.836684795131817, "learning_rate": 1.5391849529780566e-05, "loss": 1.1077, "step": 491 }, { "epoch": 0.0694719005930528, "grad_norm": 6.74346044890058, "learning_rate": 1.5423197492163012e-05, "loss": 1.2073, "step": 492 }, { "epoch": 0.06961310364303869, "grad_norm": 6.454369234799304, "learning_rate": 1.5454545454545454e-05, "loss": 1.3091, "step": 493 }, { "epoch": 0.06975430669302457, "grad_norm": 6.007095637133718, "learning_rate": 1.5485893416927904e-05, "loss": 0.9159, "step": 494 }, { "epoch": 0.06989550974301045, "grad_norm": 6.53976612139777, "learning_rate": 1.5517241379310346e-05, "loss": 1.0973, "step": 495 }, { "epoch": 0.07003671279299632, "grad_norm": 5.955631406069896, "learning_rate": 1.554858934169279e-05, "loss": 1.1143, "step": 496 }, { "epoch": 0.0701779158429822, "grad_norm": 5.4171902019084825, "learning_rate": 1.5579937304075237e-05, "loss": 0.9846, "step": 497 }, { "epoch": 0.07031911889296809, "grad_norm": 6.793090063833778, "learning_rate": 1.561128526645768e-05, "loss": 1.1967, "step": 498 }, { "epoch": 0.07046032194295397, "grad_norm": 6.742796010345787, "learning_rate": 1.564263322884013e-05, "loss": 1.2463, "step": 499 }, { "epoch": 0.07060152499293985, "grad_norm": 5.344652204172256, "learning_rate": 1.567398119122257e-05, "loss": 1.0464, "step": 500 }, { "epoch": 0.07074272804292572, "grad_norm": 6.39036293454649, "learning_rate": 1.5705329153605017e-05, "loss": 1.0594, "step": 501 }, { "epoch": 0.0708839310929116, "grad_norm": 6.194662126507768, "learning_rate": 1.5736677115987462e-05, "loss": 0.9793, "step": 502 }, { "epoch": 0.07102513414289749, "grad_norm": 5.464672530516132, "learning_rate": 1.5768025078369908e-05, "loss": 0.9024, "step": 503 }, { "epoch": 0.07116633719288337, "grad_norm": 6.068700476242649, "learning_rate": 1.5799373040752354e-05, "loss": 1.1162, "step": 504 }, { "epoch": 0.07130754024286924, "grad_norm": 6.464978869074758, "learning_rate": 1.5830721003134796e-05, "loss": 1.2319, "step": 505 }, { "epoch": 0.07144874329285512, "grad_norm": 5.941790207913838, "learning_rate": 1.586206896551724e-05, "loss": 1.3378, "step": 506 }, { "epoch": 0.07158994634284101, "grad_norm": 5.682901754070349, "learning_rate": 1.5893416927899687e-05, "loss": 1.0293, "step": 507 }, { "epoch": 0.07173114939282689, "grad_norm": 5.872876297279923, "learning_rate": 1.5924764890282133e-05, "loss": 1.0867, "step": 508 }, { "epoch": 0.07187235244281276, "grad_norm": 5.371428406042664, "learning_rate": 1.595611285266458e-05, "loss": 1.0894, "step": 509 }, { "epoch": 0.07201355549279864, "grad_norm": 7.314432059488115, "learning_rate": 1.598746081504702e-05, "loss": 1.021, "step": 510 }, { "epoch": 0.07215475854278453, "grad_norm": 7.426048653578476, "learning_rate": 1.601880877742947e-05, "loss": 1.3518, "step": 511 }, { "epoch": 0.07229596159277041, "grad_norm": 5.880104246923241, "learning_rate": 1.6050156739811912e-05, "loss": 1.0042, "step": 512 }, { "epoch": 0.07243716464275628, "grad_norm": 6.657460638622406, "learning_rate": 1.6081504702194358e-05, "loss": 1.2185, "step": 513 }, { "epoch": 0.07257836769274216, "grad_norm": 5.563185186463338, "learning_rate": 1.6112852664576804e-05, "loss": 1.1372, "step": 514 }, { "epoch": 0.07271957074272804, "grad_norm": 6.429058405573293, "learning_rate": 1.614420062695925e-05, "loss": 1.2332, "step": 515 }, { "epoch": 0.07286077379271393, "grad_norm": 6.478270156760181, "learning_rate": 1.6175548589341695e-05, "loss": 1.1697, "step": 516 }, { "epoch": 0.07300197684269981, "grad_norm": 5.9238191638797755, "learning_rate": 1.6206896551724137e-05, "loss": 1.3669, "step": 517 }, { "epoch": 0.07314317989268568, "grad_norm": 5.668610797091928, "learning_rate": 1.6238244514106583e-05, "loss": 1.1832, "step": 518 }, { "epoch": 0.07328438294267156, "grad_norm": 6.009943466356328, "learning_rate": 1.626959247648903e-05, "loss": 1.0866, "step": 519 }, { "epoch": 0.07342558599265744, "grad_norm": 6.193366721972212, "learning_rate": 1.6300940438871475e-05, "loss": 1.4709, "step": 520 }, { "epoch": 0.07356678904264333, "grad_norm": 5.823633698202906, "learning_rate": 1.633228840125392e-05, "loss": 1.148, "step": 521 }, { "epoch": 0.0737079920926292, "grad_norm": 6.1852921008929105, "learning_rate": 1.6363636363636366e-05, "loss": 1.0969, "step": 522 }, { "epoch": 0.07384919514261508, "grad_norm": 5.953213590087683, "learning_rate": 1.639498432601881e-05, "loss": 1.0933, "step": 523 }, { "epoch": 0.07399039819260096, "grad_norm": 5.710244906292251, "learning_rate": 1.6426332288401254e-05, "loss": 0.9773, "step": 524 }, { "epoch": 0.07413160124258684, "grad_norm": 5.667687375942904, "learning_rate": 1.64576802507837e-05, "loss": 1.1034, "step": 525 }, { "epoch": 0.07427280429257271, "grad_norm": 5.802881994662777, "learning_rate": 1.6489028213166145e-05, "loss": 1.0898, "step": 526 }, { "epoch": 0.0744140073425586, "grad_norm": 5.682791346168672, "learning_rate": 1.652037617554859e-05, "loss": 1.0491, "step": 527 }, { "epoch": 0.07455521039254448, "grad_norm": 6.030807394394101, "learning_rate": 1.6551724137931037e-05, "loss": 1.1362, "step": 528 }, { "epoch": 0.07469641344253036, "grad_norm": 5.923891859838122, "learning_rate": 1.6583072100313482e-05, "loss": 1.1302, "step": 529 }, { "epoch": 0.07483761649251623, "grad_norm": 5.952051482905965, "learning_rate": 1.6614420062695925e-05, "loss": 1.1015, "step": 530 }, { "epoch": 0.07497881954250211, "grad_norm": 6.209313248137419, "learning_rate": 1.664576802507837e-05, "loss": 1.1234, "step": 531 }, { "epoch": 0.075120022592488, "grad_norm": 6.158602408239308, "learning_rate": 1.6677115987460816e-05, "loss": 1.373, "step": 532 }, { "epoch": 0.07526122564247388, "grad_norm": 4.796258131831851, "learning_rate": 1.6708463949843262e-05, "loss": 1.0352, "step": 533 }, { "epoch": 0.07540242869245976, "grad_norm": 5.60296135867059, "learning_rate": 1.6739811912225707e-05, "loss": 1.0324, "step": 534 }, { "epoch": 0.07554363174244563, "grad_norm": 7.331734880940631, "learning_rate": 1.6771159874608153e-05, "loss": 1.2744, "step": 535 }, { "epoch": 0.07568483479243152, "grad_norm": 5.852543930085999, "learning_rate": 1.68025078369906e-05, "loss": 1.1039, "step": 536 }, { "epoch": 0.0758260378424174, "grad_norm": 6.145110329481626, "learning_rate": 1.683385579937304e-05, "loss": 1.2887, "step": 537 }, { "epoch": 0.07596724089240328, "grad_norm": 5.259349636789632, "learning_rate": 1.6865203761755487e-05, "loss": 1.1346, "step": 538 }, { "epoch": 0.07610844394238915, "grad_norm": 5.286352779670908, "learning_rate": 1.6896551724137932e-05, "loss": 1.1865, "step": 539 }, { "epoch": 0.07624964699237503, "grad_norm": 4.897339601994153, "learning_rate": 1.6927899686520378e-05, "loss": 1.0371, "step": 540 }, { "epoch": 0.07639085004236092, "grad_norm": 6.692016492969598, "learning_rate": 1.6959247648902824e-05, "loss": 1.2827, "step": 541 }, { "epoch": 0.0765320530923468, "grad_norm": 5.740634310469214, "learning_rate": 1.6990595611285266e-05, "loss": 1.1825, "step": 542 }, { "epoch": 0.07667325614233267, "grad_norm": 6.351661143462508, "learning_rate": 1.7021943573667712e-05, "loss": 1.1249, "step": 543 }, { "epoch": 0.07681445919231855, "grad_norm": 6.46660632944194, "learning_rate": 1.7053291536050158e-05, "loss": 1.2392, "step": 544 }, { "epoch": 0.07695566224230443, "grad_norm": 5.526638690733727, "learning_rate": 1.7084639498432603e-05, "loss": 1.1576, "step": 545 }, { "epoch": 0.07709686529229032, "grad_norm": 6.114235327852676, "learning_rate": 1.711598746081505e-05, "loss": 1.1267, "step": 546 }, { "epoch": 0.07723806834227619, "grad_norm": 5.264342286333056, "learning_rate": 1.7147335423197495e-05, "loss": 0.9464, "step": 547 }, { "epoch": 0.07737927139226207, "grad_norm": 6.834650078271172, "learning_rate": 1.717868338557994e-05, "loss": 1.159, "step": 548 }, { "epoch": 0.07752047444224795, "grad_norm": 5.996079921122092, "learning_rate": 1.7210031347962383e-05, "loss": 1.0824, "step": 549 }, { "epoch": 0.07766167749223384, "grad_norm": 7.086148350401072, "learning_rate": 1.7241379310344828e-05, "loss": 1.3272, "step": 550 }, { "epoch": 0.07780288054221972, "grad_norm": 5.543735541393911, "learning_rate": 1.7272727272727274e-05, "loss": 0.9808, "step": 551 }, { "epoch": 0.07794408359220559, "grad_norm": 7.0843576897384315, "learning_rate": 1.730407523510972e-05, "loss": 1.5222, "step": 552 }, { "epoch": 0.07808528664219147, "grad_norm": 4.838105665253886, "learning_rate": 1.7335423197492165e-05, "loss": 0.9439, "step": 553 }, { "epoch": 0.07822648969217735, "grad_norm": 5.403135721254861, "learning_rate": 1.7366771159874608e-05, "loss": 1.1423, "step": 554 }, { "epoch": 0.07836769274216324, "grad_norm": 5.558285600533167, "learning_rate": 1.7398119122257057e-05, "loss": 1.0969, "step": 555 }, { "epoch": 0.0785088957921491, "grad_norm": 6.578773247578049, "learning_rate": 1.74294670846395e-05, "loss": 1.1369, "step": 556 }, { "epoch": 0.07865009884213499, "grad_norm": 5.480957390246865, "learning_rate": 1.7460815047021945e-05, "loss": 1.1984, "step": 557 }, { "epoch": 0.07879130189212087, "grad_norm": 5.843736131099421, "learning_rate": 1.749216300940439e-05, "loss": 1.0589, "step": 558 }, { "epoch": 0.07893250494210675, "grad_norm": 5.8052981592197765, "learning_rate": 1.7523510971786836e-05, "loss": 1.0831, "step": 559 }, { "epoch": 0.07907370799209262, "grad_norm": 6.241678234286253, "learning_rate": 1.7554858934169282e-05, "loss": 1.122, "step": 560 }, { "epoch": 0.0792149110420785, "grad_norm": 5.849170627484907, "learning_rate": 1.7586206896551724e-05, "loss": 1.0995, "step": 561 }, { "epoch": 0.07935611409206439, "grad_norm": 5.884474410784104, "learning_rate": 1.7617554858934173e-05, "loss": 0.9374, "step": 562 }, { "epoch": 0.07949731714205027, "grad_norm": 5.832116996080886, "learning_rate": 1.7648902821316615e-05, "loss": 1.1393, "step": 563 }, { "epoch": 0.07963852019203614, "grad_norm": 5.320624700179814, "learning_rate": 1.768025078369906e-05, "loss": 1.0044, "step": 564 }, { "epoch": 0.07977972324202202, "grad_norm": 6.911040887792742, "learning_rate": 1.7711598746081507e-05, "loss": 1.1676, "step": 565 }, { "epoch": 0.07992092629200791, "grad_norm": 6.853399074295397, "learning_rate": 1.774294670846395e-05, "loss": 1.132, "step": 566 }, { "epoch": 0.08006212934199379, "grad_norm": 6.898205190949468, "learning_rate": 1.7774294670846398e-05, "loss": 1.2241, "step": 567 }, { "epoch": 0.08020333239197967, "grad_norm": 5.576836953504434, "learning_rate": 1.780564263322884e-05, "loss": 1.075, "step": 568 }, { "epoch": 0.08034453544196554, "grad_norm": 8.443395424117693, "learning_rate": 1.7836990595611286e-05, "loss": 1.4261, "step": 569 }, { "epoch": 0.08048573849195143, "grad_norm": 5.0152196245214276, "learning_rate": 1.7868338557993732e-05, "loss": 0.9979, "step": 570 }, { "epoch": 0.08062694154193731, "grad_norm": 6.270426407981272, "learning_rate": 1.7899686520376178e-05, "loss": 1.3631, "step": 571 }, { "epoch": 0.08076814459192319, "grad_norm": 5.736030805487781, "learning_rate": 1.7931034482758623e-05, "loss": 1.0218, "step": 572 }, { "epoch": 0.08090934764190906, "grad_norm": 5.852691561823155, "learning_rate": 1.7962382445141066e-05, "loss": 1.2907, "step": 573 }, { "epoch": 0.08105055069189494, "grad_norm": 5.848770184732747, "learning_rate": 1.7993730407523515e-05, "loss": 1.1728, "step": 574 }, { "epoch": 0.08119175374188083, "grad_norm": 5.421475194093066, "learning_rate": 1.8025078369905957e-05, "loss": 1.1785, "step": 575 }, { "epoch": 0.08133295679186671, "grad_norm": 5.6427491867124075, "learning_rate": 1.8056426332288403e-05, "loss": 1.1839, "step": 576 }, { "epoch": 0.08147415984185258, "grad_norm": 5.507774894364867, "learning_rate": 1.808777429467085e-05, "loss": 1.0556, "step": 577 }, { "epoch": 0.08161536289183846, "grad_norm": 5.716432649942122, "learning_rate": 1.811912225705329e-05, "loss": 1.0405, "step": 578 }, { "epoch": 0.08175656594182434, "grad_norm": 5.837884875128244, "learning_rate": 1.815047021943574e-05, "loss": 1.1678, "step": 579 }, { "epoch": 0.08189776899181023, "grad_norm": 5.300377434600539, "learning_rate": 1.8181818181818182e-05, "loss": 0.9418, "step": 580 }, { "epoch": 0.0820389720417961, "grad_norm": 6.725320147590814, "learning_rate": 1.8213166144200628e-05, "loss": 1.232, "step": 581 }, { "epoch": 0.08218017509178198, "grad_norm": 6.099267875865595, "learning_rate": 1.8244514106583073e-05, "loss": 1.1636, "step": 582 }, { "epoch": 0.08232137814176786, "grad_norm": 7.996285191743562, "learning_rate": 1.827586206896552e-05, "loss": 1.3492, "step": 583 }, { "epoch": 0.08246258119175374, "grad_norm": 5.016637946543329, "learning_rate": 1.8307210031347965e-05, "loss": 0.9026, "step": 584 }, { "epoch": 0.08260378424173963, "grad_norm": 5.495762050588071, "learning_rate": 1.8338557993730407e-05, "loss": 0.9803, "step": 585 }, { "epoch": 0.0827449872917255, "grad_norm": 6.3922637350643265, "learning_rate": 1.8369905956112856e-05, "loss": 1.0984, "step": 586 }, { "epoch": 0.08288619034171138, "grad_norm": 5.392388211200259, "learning_rate": 1.84012539184953e-05, "loss": 0.894, "step": 587 }, { "epoch": 0.08302739339169726, "grad_norm": 6.690298945925227, "learning_rate": 1.8432601880877744e-05, "loss": 1.2125, "step": 588 }, { "epoch": 0.08316859644168315, "grad_norm": 5.671717685738797, "learning_rate": 1.846394984326019e-05, "loss": 0.9612, "step": 589 }, { "epoch": 0.08330979949166901, "grad_norm": 5.067388868181858, "learning_rate": 1.8495297805642636e-05, "loss": 1.0224, "step": 590 }, { "epoch": 0.0834510025416549, "grad_norm": 5.623803728888419, "learning_rate": 1.852664576802508e-05, "loss": 0.9653, "step": 591 }, { "epoch": 0.08359220559164078, "grad_norm": 6.601627551141263, "learning_rate": 1.8557993730407524e-05, "loss": 1.1374, "step": 592 }, { "epoch": 0.08373340864162666, "grad_norm": 6.6027030820492545, "learning_rate": 1.858934169278997e-05, "loss": 1.1728, "step": 593 }, { "epoch": 0.08387461169161253, "grad_norm": 5.333670417374938, "learning_rate": 1.8620689655172415e-05, "loss": 0.9548, "step": 594 }, { "epoch": 0.08401581474159842, "grad_norm": 6.198321222438251, "learning_rate": 1.865203761755486e-05, "loss": 1.2297, "step": 595 }, { "epoch": 0.0841570177915843, "grad_norm": 5.9431558207548925, "learning_rate": 1.8683385579937306e-05, "loss": 1.014, "step": 596 }, { "epoch": 0.08429822084157018, "grad_norm": 6.710774989517947, "learning_rate": 1.8714733542319752e-05, "loss": 1.249, "step": 597 }, { "epoch": 0.08443942389155605, "grad_norm": 5.379538607412715, "learning_rate": 1.8746081504702194e-05, "loss": 0.957, "step": 598 }, { "epoch": 0.08458062694154193, "grad_norm": 7.303180959939755, "learning_rate": 1.877742946708464e-05, "loss": 1.2566, "step": 599 }, { "epoch": 0.08472182999152782, "grad_norm": 5.86211539459515, "learning_rate": 1.8808777429467086e-05, "loss": 1.0868, "step": 600 }, { "epoch": 0.0848630330415137, "grad_norm": 5.239541128255798, "learning_rate": 1.884012539184953e-05, "loss": 1.1452, "step": 601 }, { "epoch": 0.08500423609149958, "grad_norm": 6.397091785620323, "learning_rate": 1.8871473354231977e-05, "loss": 1.1904, "step": 602 }, { "epoch": 0.08514543914148545, "grad_norm": 6.520562809731438, "learning_rate": 1.8902821316614423e-05, "loss": 1.3573, "step": 603 }, { "epoch": 0.08528664219147133, "grad_norm": 4.590438272201157, "learning_rate": 1.893416927899687e-05, "loss": 0.8604, "step": 604 }, { "epoch": 0.08542784524145722, "grad_norm": 5.576165116459939, "learning_rate": 1.896551724137931e-05, "loss": 1.0791, "step": 605 }, { "epoch": 0.0855690482914431, "grad_norm": 5.920010495599947, "learning_rate": 1.8996865203761756e-05, "loss": 1.1234, "step": 606 }, { "epoch": 0.08571025134142897, "grad_norm": 6.190645354123114, "learning_rate": 1.9028213166144202e-05, "loss": 1.1197, "step": 607 }, { "epoch": 0.08585145439141485, "grad_norm": 6.495354708349006, "learning_rate": 1.9059561128526648e-05, "loss": 1.3107, "step": 608 }, { "epoch": 0.08599265744140074, "grad_norm": 6.182901350359474, "learning_rate": 1.9090909090909094e-05, "loss": 1.062, "step": 609 }, { "epoch": 0.08613386049138662, "grad_norm": 5.3296862461077215, "learning_rate": 1.9122257053291536e-05, "loss": 0.9507, "step": 610 }, { "epoch": 0.08627506354137249, "grad_norm": 5.766012786864861, "learning_rate": 1.915360501567398e-05, "loss": 0.996, "step": 611 }, { "epoch": 0.08641626659135837, "grad_norm": 5.058032189774383, "learning_rate": 1.9184952978056427e-05, "loss": 1.0008, "step": 612 }, { "epoch": 0.08655746964134425, "grad_norm": 5.4571204911719144, "learning_rate": 1.9216300940438873e-05, "loss": 1.1685, "step": 613 }, { "epoch": 0.08669867269133014, "grad_norm": 7.885139131692505, "learning_rate": 1.924764890282132e-05, "loss": 1.3801, "step": 614 }, { "epoch": 0.086839875741316, "grad_norm": 5.70905121788261, "learning_rate": 1.9278996865203764e-05, "loss": 1.07, "step": 615 }, { "epoch": 0.08698107879130189, "grad_norm": 5.789647737306275, "learning_rate": 1.931034482758621e-05, "loss": 1.1097, "step": 616 }, { "epoch": 0.08712228184128777, "grad_norm": 5.333824466358461, "learning_rate": 1.9341692789968652e-05, "loss": 1.0032, "step": 617 }, { "epoch": 0.08726348489127365, "grad_norm": 5.279567562817471, "learning_rate": 1.9373040752351098e-05, "loss": 0.9815, "step": 618 }, { "epoch": 0.08740468794125954, "grad_norm": 5.946563250288684, "learning_rate": 1.9404388714733544e-05, "loss": 1.2617, "step": 619 }, { "epoch": 0.0875458909912454, "grad_norm": 5.945772051557553, "learning_rate": 1.943573667711599e-05, "loss": 1.0628, "step": 620 }, { "epoch": 0.08768709404123129, "grad_norm": 6.66789076667815, "learning_rate": 1.9467084639498435e-05, "loss": 1.3254, "step": 621 }, { "epoch": 0.08782829709121717, "grad_norm": 6.229491991717858, "learning_rate": 1.9498432601880877e-05, "loss": 1.147, "step": 622 }, { "epoch": 0.08796950014120306, "grad_norm": 5.2543706086447735, "learning_rate": 1.9529780564263326e-05, "loss": 1.0044, "step": 623 }, { "epoch": 0.08811070319118892, "grad_norm": 4.64518538823868, "learning_rate": 1.956112852664577e-05, "loss": 0.8507, "step": 624 }, { "epoch": 0.08825190624117481, "grad_norm": 5.87949355033456, "learning_rate": 1.9592476489028214e-05, "loss": 1.1571, "step": 625 }, { "epoch": 0.08839310929116069, "grad_norm": 5.857411145763842, "learning_rate": 1.962382445141066e-05, "loss": 1.1866, "step": 626 }, { "epoch": 0.08853431234114657, "grad_norm": 6.893548328142488, "learning_rate": 1.9655172413793106e-05, "loss": 1.2687, "step": 627 }, { "epoch": 0.08867551539113244, "grad_norm": 5.831663304735709, "learning_rate": 1.968652037617555e-05, "loss": 1.1951, "step": 628 }, { "epoch": 0.08881671844111833, "grad_norm": 4.973545107047332, "learning_rate": 1.9717868338557994e-05, "loss": 1.123, "step": 629 }, { "epoch": 0.08895792149110421, "grad_norm": 6.500495792637582, "learning_rate": 1.9749216300940443e-05, "loss": 1.0234, "step": 630 }, { "epoch": 0.08909912454109009, "grad_norm": 5.3743364068975366, "learning_rate": 1.9780564263322885e-05, "loss": 1.0438, "step": 631 }, { "epoch": 0.08924032759107597, "grad_norm": 5.121731384137619, "learning_rate": 1.981191222570533e-05, "loss": 1.1276, "step": 632 }, { "epoch": 0.08938153064106184, "grad_norm": 6.311695481617017, "learning_rate": 1.9843260188087777e-05, "loss": 1.2687, "step": 633 }, { "epoch": 0.08952273369104773, "grad_norm": 6.615594164050313, "learning_rate": 1.987460815047022e-05, "loss": 1.2163, "step": 634 }, { "epoch": 0.08966393674103361, "grad_norm": 5.524936353885061, "learning_rate": 1.9905956112852668e-05, "loss": 1.1537, "step": 635 }, { "epoch": 0.08980513979101949, "grad_norm": 4.519873766037404, "learning_rate": 1.993730407523511e-05, "loss": 0.9077, "step": 636 }, { "epoch": 0.08994634284100536, "grad_norm": 6.09576205867513, "learning_rate": 1.9968652037617556e-05, "loss": 1.0986, "step": 637 }, { "epoch": 0.09008754589099124, "grad_norm": 5.324926342669233, "learning_rate": 2e-05, "loss": 1.0757, "step": 638 }, { "epoch": 0.09022874894097713, "grad_norm": 5.666949160368743, "learning_rate": 1.9999999883802162e-05, "loss": 0.971, "step": 639 }, { "epoch": 0.09036995199096301, "grad_norm": 7.1896339702510526, "learning_rate": 1.999999953520864e-05, "loss": 1.1724, "step": 640 }, { "epoch": 0.09051115504094888, "grad_norm": 5.770082078125777, "learning_rate": 1.999999895421945e-05, "loss": 1.1214, "step": 641 }, { "epoch": 0.09065235809093476, "grad_norm": 6.744688401394613, "learning_rate": 1.9999998140834606e-05, "loss": 1.1188, "step": 642 }, { "epoch": 0.09079356114092065, "grad_norm": 5.364338641891255, "learning_rate": 1.999999709505412e-05, "loss": 1.0897, "step": 643 }, { "epoch": 0.09093476419090653, "grad_norm": 5.535805071291531, "learning_rate": 1.9999995816878023e-05, "loss": 1.1295, "step": 644 }, { "epoch": 0.0910759672408924, "grad_norm": 5.078259395818064, "learning_rate": 1.999999430630634e-05, "loss": 1.0883, "step": 645 }, { "epoch": 0.09121717029087828, "grad_norm": 5.607273524163378, "learning_rate": 1.9999992563339106e-05, "loss": 1.1246, "step": 646 }, { "epoch": 0.09135837334086416, "grad_norm": 5.9616108866529, "learning_rate": 1.9999990587976364e-05, "loss": 1.2165, "step": 647 }, { "epoch": 0.09149957639085005, "grad_norm": 5.428490845993007, "learning_rate": 1.9999988380218164e-05, "loss": 1.0671, "step": 648 }, { "epoch": 0.09164077944083593, "grad_norm": 6.0269452495645375, "learning_rate": 1.9999985940064552e-05, "loss": 1.1748, "step": 649 }, { "epoch": 0.0917819824908218, "grad_norm": 5.824579105718113, "learning_rate": 1.9999983267515578e-05, "loss": 1.1847, "step": 650 }, { "epoch": 0.09192318554080768, "grad_norm": 6.158700459715279, "learning_rate": 1.9999980362571318e-05, "loss": 1.0948, "step": 651 }, { "epoch": 0.09206438859079356, "grad_norm": 5.703276946645952, "learning_rate": 1.9999977225231833e-05, "loss": 1.2464, "step": 652 }, { "epoch": 0.09220559164077945, "grad_norm": 5.826786775307854, "learning_rate": 1.9999973855497196e-05, "loss": 1.3517, "step": 653 }, { "epoch": 0.09234679469076532, "grad_norm": 5.894383308645578, "learning_rate": 1.999997025336748e-05, "loss": 1.2618, "step": 654 }, { "epoch": 0.0924879977407512, "grad_norm": 5.570146909107198, "learning_rate": 1.9999966418842778e-05, "loss": 1.092, "step": 655 }, { "epoch": 0.09262920079073708, "grad_norm": 5.985116919433037, "learning_rate": 1.9999962351923176e-05, "loss": 1.0396, "step": 656 }, { "epoch": 0.09277040384072296, "grad_norm": 5.386635594530617, "learning_rate": 1.9999958052608766e-05, "loss": 1.2431, "step": 657 }, { "epoch": 0.09291160689070883, "grad_norm": 6.207369455383297, "learning_rate": 1.999995352089965e-05, "loss": 1.2984, "step": 658 }, { "epoch": 0.09305280994069472, "grad_norm": 6.132660019806295, "learning_rate": 1.999994875679593e-05, "loss": 1.3616, "step": 659 }, { "epoch": 0.0931940129906806, "grad_norm": 6.481054938157247, "learning_rate": 1.9999943760297725e-05, "loss": 1.0988, "step": 660 }, { "epoch": 0.09333521604066648, "grad_norm": 5.7613169535325985, "learning_rate": 1.9999938531405142e-05, "loss": 1.0736, "step": 661 }, { "epoch": 0.09347641909065235, "grad_norm": 5.255227266215469, "learning_rate": 1.999993307011831e-05, "loss": 0.9656, "step": 662 }, { "epoch": 0.09361762214063823, "grad_norm": 6.189745974331651, "learning_rate": 1.9999927376437348e-05, "loss": 1.135, "step": 663 }, { "epoch": 0.09375882519062412, "grad_norm": 5.81189681277316, "learning_rate": 1.9999921450362392e-05, "loss": 1.1131, "step": 664 }, { "epoch": 0.09390002824061, "grad_norm": 5.503829112292691, "learning_rate": 1.9999915291893584e-05, "loss": 1.1699, "step": 665 }, { "epoch": 0.09404123129059588, "grad_norm": 5.478567498765631, "learning_rate": 1.999990890103106e-05, "loss": 1.1486, "step": 666 }, { "epoch": 0.09418243434058175, "grad_norm": 6.067551237003436, "learning_rate": 1.9999902277774975e-05, "loss": 1.2134, "step": 667 }, { "epoch": 0.09432363739056764, "grad_norm": 4.849793656947368, "learning_rate": 1.9999895422125477e-05, "loss": 0.9784, "step": 668 }, { "epoch": 0.09446484044055352, "grad_norm": 5.139092118609053, "learning_rate": 1.999988833408273e-05, "loss": 1.0161, "step": 669 }, { "epoch": 0.0946060434905394, "grad_norm": 5.953497430636807, "learning_rate": 1.9999881013646893e-05, "loss": 1.2735, "step": 670 }, { "epoch": 0.09474724654052527, "grad_norm": 5.739455802347055, "learning_rate": 1.9999873460818142e-05, "loss": 1.2257, "step": 671 }, { "epoch": 0.09488844959051115, "grad_norm": 6.210078095732943, "learning_rate": 1.999986567559665e-05, "loss": 1.0667, "step": 672 }, { "epoch": 0.09502965264049704, "grad_norm": 6.9847891440486665, "learning_rate": 1.9999857657982597e-05, "loss": 1.2512, "step": 673 }, { "epoch": 0.09517085569048292, "grad_norm": 6.024046312113741, "learning_rate": 1.9999849407976176e-05, "loss": 1.1704, "step": 674 }, { "epoch": 0.09531205874046879, "grad_norm": 4.820104959128535, "learning_rate": 1.9999840925577568e-05, "loss": 1.1433, "step": 675 }, { "epoch": 0.09545326179045467, "grad_norm": 5.115622896516326, "learning_rate": 1.9999832210786977e-05, "loss": 1.0019, "step": 676 }, { "epoch": 0.09559446484044055, "grad_norm": 5.77858269741972, "learning_rate": 1.9999823263604606e-05, "loss": 1.143, "step": 677 }, { "epoch": 0.09573566789042644, "grad_norm": 5.110085448833619, "learning_rate": 1.999981408403066e-05, "loss": 0.9884, "step": 678 }, { "epoch": 0.0958768709404123, "grad_norm": 5.614887421941373, "learning_rate": 1.999980467206535e-05, "loss": 1.2248, "step": 679 }, { "epoch": 0.09601807399039819, "grad_norm": 5.611956138773773, "learning_rate": 1.99997950277089e-05, "loss": 1.276, "step": 680 }, { "epoch": 0.09615927704038407, "grad_norm": 5.3752249888791495, "learning_rate": 1.9999785150961536e-05, "loss": 1.1671, "step": 681 }, { "epoch": 0.09630048009036996, "grad_norm": 5.109802752343885, "learning_rate": 1.9999775041823478e-05, "loss": 0.9753, "step": 682 }, { "epoch": 0.09644168314035584, "grad_norm": 4.659592444779131, "learning_rate": 1.999976470029497e-05, "loss": 0.8942, "step": 683 }, { "epoch": 0.09658288619034171, "grad_norm": 5.405217109466532, "learning_rate": 1.9999754126376247e-05, "loss": 1.169, "step": 684 }, { "epoch": 0.09672408924032759, "grad_norm": 5.5637464006244945, "learning_rate": 1.9999743320067556e-05, "loss": 1.229, "step": 685 }, { "epoch": 0.09686529229031347, "grad_norm": 5.6768403523268915, "learning_rate": 1.999973228136915e-05, "loss": 1.1861, "step": 686 }, { "epoch": 0.09700649534029936, "grad_norm": 6.620644939123791, "learning_rate": 1.9999721010281288e-05, "loss": 1.1918, "step": 687 }, { "epoch": 0.09714769839028523, "grad_norm": 5.209816864875043, "learning_rate": 1.9999709506804223e-05, "loss": 1.1157, "step": 688 }, { "epoch": 0.09728890144027111, "grad_norm": 5.409822280823739, "learning_rate": 1.9999697770938227e-05, "loss": 1.1351, "step": 689 }, { "epoch": 0.09743010449025699, "grad_norm": 4.931291740088819, "learning_rate": 1.999968580268358e-05, "loss": 1.0615, "step": 690 }, { "epoch": 0.09757130754024287, "grad_norm": 5.990438985640952, "learning_rate": 1.999967360204055e-05, "loss": 1.2046, "step": 691 }, { "epoch": 0.09771251059022874, "grad_norm": 4.288297098821375, "learning_rate": 1.999966116900942e-05, "loss": 1.0324, "step": 692 }, { "epoch": 0.09785371364021463, "grad_norm": 4.950699747566915, "learning_rate": 1.9999648503590488e-05, "loss": 1.1933, "step": 693 }, { "epoch": 0.09799491669020051, "grad_norm": 5.767445833191182, "learning_rate": 1.9999635605784042e-05, "loss": 1.3355, "step": 694 }, { "epoch": 0.09813611974018639, "grad_norm": 5.218928818543916, "learning_rate": 1.9999622475590383e-05, "loss": 0.9571, "step": 695 }, { "epoch": 0.09827732279017226, "grad_norm": 4.802705337559807, "learning_rate": 1.9999609113009817e-05, "loss": 0.9388, "step": 696 }, { "epoch": 0.09841852584015814, "grad_norm": 5.388105902279214, "learning_rate": 1.999959551804265e-05, "loss": 0.9834, "step": 697 }, { "epoch": 0.09855972889014403, "grad_norm": 5.2169316246723065, "learning_rate": 1.9999581690689204e-05, "loss": 1.0518, "step": 698 }, { "epoch": 0.09870093194012991, "grad_norm": 4.8462983840891765, "learning_rate": 1.9999567630949798e-05, "loss": 1.0323, "step": 699 }, { "epoch": 0.0988421349901158, "grad_norm": 4.979942240728192, "learning_rate": 1.999955333882476e-05, "loss": 0.9719, "step": 700 }, { "epoch": 0.09898333804010166, "grad_norm": 5.563624313268988, "learning_rate": 1.9999538814314417e-05, "loss": 1.0766, "step": 701 }, { "epoch": 0.09912454109008755, "grad_norm": 5.7666428702306645, "learning_rate": 1.9999524057419116e-05, "loss": 1.2518, "step": 702 }, { "epoch": 0.09926574414007343, "grad_norm": 5.249095854501378, "learning_rate": 1.9999509068139195e-05, "loss": 1.1129, "step": 703 }, { "epoch": 0.09940694719005931, "grad_norm": 5.262058498054311, "learning_rate": 1.9999493846474995e-05, "loss": 1.2255, "step": 704 }, { "epoch": 0.09954815024004518, "grad_norm": 5.751945010797834, "learning_rate": 1.9999478392426882e-05, "loss": 1.3891, "step": 705 }, { "epoch": 0.09968935329003106, "grad_norm": 4.751815582453203, "learning_rate": 1.9999462705995206e-05, "loss": 1.0502, "step": 706 }, { "epoch": 0.09983055634001695, "grad_norm": 4.785141541279744, "learning_rate": 1.9999446787180338e-05, "loss": 0.993, "step": 707 }, { "epoch": 0.09997175939000283, "grad_norm": 4.709430265877194, "learning_rate": 1.9999430635982643e-05, "loss": 1.0911, "step": 708 }, { "epoch": 0.1001129624399887, "grad_norm": 5.1236130544956975, "learning_rate": 1.9999414252402498e-05, "loss": 1.149, "step": 709 }, { "epoch": 0.10025416548997458, "grad_norm": 6.132400682455835, "learning_rate": 1.9999397636440284e-05, "loss": 1.1057, "step": 710 }, { "epoch": 0.10039536853996046, "grad_norm": 5.828726603762334, "learning_rate": 1.999938078809639e-05, "loss": 1.2996, "step": 711 }, { "epoch": 0.10053657158994635, "grad_norm": 5.987293666494228, "learning_rate": 1.9999363707371203e-05, "loss": 1.0761, "step": 712 }, { "epoch": 0.10067777463993222, "grad_norm": 6.581807055331371, "learning_rate": 1.9999346394265122e-05, "loss": 1.1585, "step": 713 }, { "epoch": 0.1008189776899181, "grad_norm": 6.539521831297887, "learning_rate": 1.999932884877855e-05, "loss": 1.1535, "step": 714 }, { "epoch": 0.10096018073990398, "grad_norm": 5.4732360427182964, "learning_rate": 1.9999311070911894e-05, "loss": 1.1875, "step": 715 }, { "epoch": 0.10110138378988986, "grad_norm": 5.784025575986175, "learning_rate": 1.9999293060665565e-05, "loss": 1.0287, "step": 716 }, { "epoch": 0.10124258683987575, "grad_norm": 5.133736164167682, "learning_rate": 1.9999274818039988e-05, "loss": 1.1005, "step": 717 }, { "epoch": 0.10138378988986162, "grad_norm": 6.510978354880005, "learning_rate": 1.9999256343035577e-05, "loss": 1.1927, "step": 718 }, { "epoch": 0.1015249929398475, "grad_norm": 5.500852274034257, "learning_rate": 1.999923763565277e-05, "loss": 1.0421, "step": 719 }, { "epoch": 0.10166619598983338, "grad_norm": 6.198347709223239, "learning_rate": 1.9999218695892e-05, "loss": 1.2198, "step": 720 }, { "epoch": 0.10180739903981927, "grad_norm": 4.873935303878561, "learning_rate": 1.9999199523753703e-05, "loss": 1.0713, "step": 721 }, { "epoch": 0.10194860208980513, "grad_norm": 5.816079592242595, "learning_rate": 1.9999180119238327e-05, "loss": 1.2243, "step": 722 }, { "epoch": 0.10208980513979102, "grad_norm": 5.568323616759773, "learning_rate": 1.9999160482346328e-05, "loss": 1.2409, "step": 723 }, { "epoch": 0.1022310081897769, "grad_norm": 6.036377036205735, "learning_rate": 1.9999140613078154e-05, "loss": 1.2706, "step": 724 }, { "epoch": 0.10237221123976278, "grad_norm": 5.012922828562304, "learning_rate": 1.999912051143427e-05, "loss": 1.2475, "step": 725 }, { "epoch": 0.10251341428974865, "grad_norm": 4.598627902633691, "learning_rate": 1.9999100177415143e-05, "loss": 1.0442, "step": 726 }, { "epoch": 0.10265461733973454, "grad_norm": 4.914847279973792, "learning_rate": 1.999907961102125e-05, "loss": 0.9987, "step": 727 }, { "epoch": 0.10279582038972042, "grad_norm": 5.307894371058464, "learning_rate": 1.999905881225306e-05, "loss": 1.1264, "step": 728 }, { "epoch": 0.1029370234397063, "grad_norm": 5.810319439068577, "learning_rate": 1.9999037781111067e-05, "loss": 1.1089, "step": 729 }, { "epoch": 0.10307822648969217, "grad_norm": 4.786968652577345, "learning_rate": 1.9999016517595752e-05, "loss": 1.2382, "step": 730 }, { "epoch": 0.10321942953967805, "grad_norm": 6.1084215923413385, "learning_rate": 1.9998995021707614e-05, "loss": 1.3161, "step": 731 }, { "epoch": 0.10336063258966394, "grad_norm": 5.056769533270197, "learning_rate": 1.9998973293447143e-05, "loss": 0.9979, "step": 732 }, { "epoch": 0.10350183563964982, "grad_norm": 5.704456283407406, "learning_rate": 1.9998951332814856e-05, "loss": 1.095, "step": 733 }, { "epoch": 0.1036430386896357, "grad_norm": 6.043167917541847, "learning_rate": 1.9998929139811257e-05, "loss": 1.3111, "step": 734 }, { "epoch": 0.10378424173962157, "grad_norm": 5.020497520449919, "learning_rate": 1.9998906714436864e-05, "loss": 1.1345, "step": 735 }, { "epoch": 0.10392544478960745, "grad_norm": 4.85025193275784, "learning_rate": 1.9998884056692195e-05, "loss": 1.0362, "step": 736 }, { "epoch": 0.10406664783959334, "grad_norm": 5.428213127714124, "learning_rate": 1.999886116657778e-05, "loss": 1.1797, "step": 737 }, { "epoch": 0.10420785088957922, "grad_norm": 5.807489463471432, "learning_rate": 1.999883804409415e-05, "loss": 0.9791, "step": 738 }, { "epoch": 0.10434905393956509, "grad_norm": 5.642919201101373, "learning_rate": 1.999881468924184e-05, "loss": 1.2226, "step": 739 }, { "epoch": 0.10449025698955097, "grad_norm": 5.271063846020795, "learning_rate": 1.9998791102021396e-05, "loss": 1.0923, "step": 740 }, { "epoch": 0.10463146003953686, "grad_norm": 4.885668846840746, "learning_rate": 1.9998767282433367e-05, "loss": 0.8912, "step": 741 }, { "epoch": 0.10477266308952274, "grad_norm": 4.900010257787578, "learning_rate": 1.99987432304783e-05, "loss": 1.0789, "step": 742 }, { "epoch": 0.10491386613950861, "grad_norm": 5.652034656827883, "learning_rate": 1.9998718946156766e-05, "loss": 1.1742, "step": 743 }, { "epoch": 0.10505506918949449, "grad_norm": 5.040623538663468, "learning_rate": 1.9998694429469315e-05, "loss": 1.1059, "step": 744 }, { "epoch": 0.10519627223948037, "grad_norm": 6.464269122005671, "learning_rate": 1.9998669680416526e-05, "loss": 1.2625, "step": 745 }, { "epoch": 0.10533747528946626, "grad_norm": 5.624258426420456, "learning_rate": 1.999864469899897e-05, "loss": 1.0741, "step": 746 }, { "epoch": 0.10547867833945213, "grad_norm": 6.572251059482539, "learning_rate": 1.999861948521723e-05, "loss": 1.2783, "step": 747 }, { "epoch": 0.10561988138943801, "grad_norm": 5.625549589571555, "learning_rate": 1.9998594039071894e-05, "loss": 1.2335, "step": 748 }, { "epoch": 0.10576108443942389, "grad_norm": 5.328741266989122, "learning_rate": 1.999856836056355e-05, "loss": 1.0974, "step": 749 }, { "epoch": 0.10590228748940977, "grad_norm": 5.341384349114137, "learning_rate": 1.9998542449692794e-05, "loss": 0.8541, "step": 750 }, { "epoch": 0.10604349053939566, "grad_norm": 5.736007555215294, "learning_rate": 1.9998516306460226e-05, "loss": 1.1804, "step": 751 }, { "epoch": 0.10618469358938153, "grad_norm": 4.897172795523395, "learning_rate": 1.999848993086646e-05, "loss": 1.1651, "step": 752 }, { "epoch": 0.10632589663936741, "grad_norm": 4.533177272969853, "learning_rate": 1.999846332291211e-05, "loss": 0.9911, "step": 753 }, { "epoch": 0.10646709968935329, "grad_norm": 5.231367703326481, "learning_rate": 1.9998436482597784e-05, "loss": 1.0903, "step": 754 }, { "epoch": 0.10660830273933918, "grad_norm": 5.521757687458886, "learning_rate": 1.9998409409924115e-05, "loss": 1.1007, "step": 755 }, { "epoch": 0.10674950578932504, "grad_norm": 5.4929736416325685, "learning_rate": 1.9998382104891728e-05, "loss": 1.0937, "step": 756 }, { "epoch": 0.10689070883931093, "grad_norm": 5.903541946031825, "learning_rate": 1.9998354567501262e-05, "loss": 1.2178, "step": 757 }, { "epoch": 0.10703191188929681, "grad_norm": 5.701560028675846, "learning_rate": 1.9998326797753352e-05, "loss": 1.4536, "step": 758 }, { "epoch": 0.1071731149392827, "grad_norm": 5.247477347574576, "learning_rate": 1.9998298795648645e-05, "loss": 1.1055, "step": 759 }, { "epoch": 0.10731431798926856, "grad_norm": 4.6012264045742475, "learning_rate": 1.999827056118779e-05, "loss": 0.8451, "step": 760 }, { "epoch": 0.10745552103925445, "grad_norm": 5.101505642100971, "learning_rate": 1.999824209437145e-05, "loss": 0.9903, "step": 761 }, { "epoch": 0.10759672408924033, "grad_norm": 5.538563910263696, "learning_rate": 1.9998213395200277e-05, "loss": 1.1274, "step": 762 }, { "epoch": 0.10773792713922621, "grad_norm": 4.756257820240059, "learning_rate": 1.9998184463674947e-05, "loss": 1.0958, "step": 763 }, { "epoch": 0.10787913018921208, "grad_norm": 4.965508053130454, "learning_rate": 1.9998155299796122e-05, "loss": 1.0715, "step": 764 }, { "epoch": 0.10802033323919796, "grad_norm": 5.4704516094316356, "learning_rate": 1.999812590356449e-05, "loss": 1.3287, "step": 765 }, { "epoch": 0.10816153628918385, "grad_norm": 5.377616350364168, "learning_rate": 1.999809627498073e-05, "loss": 1.1041, "step": 766 }, { "epoch": 0.10830273933916973, "grad_norm": 5.654675297637625, "learning_rate": 1.999806641404553e-05, "loss": 1.0549, "step": 767 }, { "epoch": 0.10844394238915561, "grad_norm": 6.544690276680263, "learning_rate": 1.9998036320759582e-05, "loss": 1.3446, "step": 768 }, { "epoch": 0.10858514543914148, "grad_norm": 5.625968326355665, "learning_rate": 1.999800599512359e-05, "loss": 1.2314, "step": 769 }, { "epoch": 0.10872634848912736, "grad_norm": 6.1398232285014895, "learning_rate": 1.9997975437138256e-05, "loss": 1.2515, "step": 770 }, { "epoch": 0.10886755153911325, "grad_norm": 6.13454450174957, "learning_rate": 1.999794464680429e-05, "loss": 1.0589, "step": 771 }, { "epoch": 0.10900875458909913, "grad_norm": 4.6366569770462895, "learning_rate": 1.999791362412241e-05, "loss": 1.0316, "step": 772 }, { "epoch": 0.109149957639085, "grad_norm": 4.923060372990307, "learning_rate": 1.9997882369093334e-05, "loss": 1.1058, "step": 773 }, { "epoch": 0.10929116068907088, "grad_norm": 4.984834473524601, "learning_rate": 1.9997850881717788e-05, "loss": 1.1754, "step": 774 }, { "epoch": 0.10943236373905676, "grad_norm": 5.035565565394427, "learning_rate": 1.9997819161996507e-05, "loss": 1.1307, "step": 775 }, { "epoch": 0.10957356678904265, "grad_norm": 5.040610041872462, "learning_rate": 1.9997787209930222e-05, "loss": 1.0287, "step": 776 }, { "epoch": 0.10971476983902852, "grad_norm": 6.159249400645923, "learning_rate": 1.9997755025519687e-05, "loss": 1.0095, "step": 777 }, { "epoch": 0.1098559728890144, "grad_norm": 5.5764104616174075, "learning_rate": 1.999772260876564e-05, "loss": 1.3386, "step": 778 }, { "epoch": 0.10999717593900028, "grad_norm": 5.392220379789989, "learning_rate": 1.9997689959668837e-05, "loss": 1.1628, "step": 779 }, { "epoch": 0.11013837898898617, "grad_norm": 6.082124062495701, "learning_rate": 1.9997657078230034e-05, "loss": 1.2274, "step": 780 }, { "epoch": 0.11027958203897203, "grad_norm": 5.4564933215479305, "learning_rate": 1.9997623964450002e-05, "loss": 1.2284, "step": 781 }, { "epoch": 0.11042078508895792, "grad_norm": 5.405189961559795, "learning_rate": 1.9997590618329507e-05, "loss": 1.0186, "step": 782 }, { "epoch": 0.1105619881389438, "grad_norm": 5.0645601975123835, "learning_rate": 1.9997557039869325e-05, "loss": 1.0671, "step": 783 }, { "epoch": 0.11070319118892968, "grad_norm": 5.86063819787201, "learning_rate": 1.999752322907023e-05, "loss": 1.3963, "step": 784 }, { "epoch": 0.11084439423891557, "grad_norm": 5.642557978601218, "learning_rate": 1.9997489185933015e-05, "loss": 1.2447, "step": 785 }, { "epoch": 0.11098559728890144, "grad_norm": 5.104626697522372, "learning_rate": 1.999745491045847e-05, "loss": 1.0064, "step": 786 }, { "epoch": 0.11112680033888732, "grad_norm": 4.268100775435925, "learning_rate": 1.999742040264739e-05, "loss": 1.0119, "step": 787 }, { "epoch": 0.1112680033888732, "grad_norm": 7.139410754305002, "learning_rate": 1.9997385662500577e-05, "loss": 1.3604, "step": 788 }, { "epoch": 0.11140920643885908, "grad_norm": 4.946506825408107, "learning_rate": 1.999735069001884e-05, "loss": 0.818, "step": 789 }, { "epoch": 0.11155040948884495, "grad_norm": 5.780895871394449, "learning_rate": 1.999731548520299e-05, "loss": 1.0843, "step": 790 }, { "epoch": 0.11169161253883084, "grad_norm": 5.799587764253103, "learning_rate": 1.999728004805384e-05, "loss": 1.0935, "step": 791 }, { "epoch": 0.11183281558881672, "grad_norm": 6.748946933770958, "learning_rate": 1.9997244378572227e-05, "loss": 1.3733, "step": 792 }, { "epoch": 0.1119740186388026, "grad_norm": 6.52900877438131, "learning_rate": 1.999720847675897e-05, "loss": 1.1624, "step": 793 }, { "epoch": 0.11211522168878847, "grad_norm": 6.025993306476371, "learning_rate": 1.9997172342614902e-05, "loss": 0.8829, "step": 794 }, { "epoch": 0.11225642473877435, "grad_norm": 5.018883638760266, "learning_rate": 1.999713597614087e-05, "loss": 1.0535, "step": 795 }, { "epoch": 0.11239762778876024, "grad_norm": 5.828023063069, "learning_rate": 1.9997099377337714e-05, "loss": 1.1162, "step": 796 }, { "epoch": 0.11253883083874612, "grad_norm": 6.116322971079314, "learning_rate": 1.9997062546206287e-05, "loss": 1.3535, "step": 797 }, { "epoch": 0.112680033888732, "grad_norm": 5.443814569332978, "learning_rate": 1.999702548274744e-05, "loss": 0.9972, "step": 798 }, { "epoch": 0.11282123693871787, "grad_norm": 5.877595197960374, "learning_rate": 1.9996988186962044e-05, "loss": 0.9868, "step": 799 }, { "epoch": 0.11296243998870376, "grad_norm": 4.453449245801207, "learning_rate": 1.9996950658850956e-05, "loss": 0.9016, "step": 800 }, { "epoch": 0.11310364303868964, "grad_norm": 6.649263652676453, "learning_rate": 1.9996912898415055e-05, "loss": 1.3181, "step": 801 }, { "epoch": 0.11324484608867552, "grad_norm": 4.548357238389129, "learning_rate": 1.9996874905655214e-05, "loss": 0.9035, "step": 802 }, { "epoch": 0.11338604913866139, "grad_norm": 5.666277269697033, "learning_rate": 1.9996836680572315e-05, "loss": 1.2294, "step": 803 }, { "epoch": 0.11352725218864727, "grad_norm": 5.607580893143515, "learning_rate": 1.999679822316725e-05, "loss": 1.2376, "step": 804 }, { "epoch": 0.11366845523863316, "grad_norm": 5.618724506035481, "learning_rate": 1.9996759533440916e-05, "loss": 1.0262, "step": 805 }, { "epoch": 0.11380965828861904, "grad_norm": 5.000879783261625, "learning_rate": 1.99967206113942e-05, "loss": 1.1771, "step": 806 }, { "epoch": 0.11395086133860491, "grad_norm": 6.430629249895955, "learning_rate": 1.9996681457028024e-05, "loss": 1.2216, "step": 807 }, { "epoch": 0.11409206438859079, "grad_norm": 5.297120763561583, "learning_rate": 1.999664207034328e-05, "loss": 1.155, "step": 808 }, { "epoch": 0.11423326743857667, "grad_norm": 5.751556472820045, "learning_rate": 1.9996602451340892e-05, "loss": 1.1242, "step": 809 }, { "epoch": 0.11437447048856256, "grad_norm": 5.721804381703796, "learning_rate": 1.9996562600021786e-05, "loss": 1.2767, "step": 810 }, { "epoch": 0.11451567353854843, "grad_norm": 5.893988920391293, "learning_rate": 1.9996522516386875e-05, "loss": 1.1995, "step": 811 }, { "epoch": 0.11465687658853431, "grad_norm": 5.9031098736613705, "learning_rate": 1.9996482200437104e-05, "loss": 1.1383, "step": 812 }, { "epoch": 0.11479807963852019, "grad_norm": 5.678267009884401, "learning_rate": 1.9996441652173403e-05, "loss": 1.044, "step": 813 }, { "epoch": 0.11493928268850608, "grad_norm": 5.464529351162833, "learning_rate": 1.999640087159671e-05, "loss": 1.1106, "step": 814 }, { "epoch": 0.11508048573849196, "grad_norm": 5.094443793906905, "learning_rate": 1.9996359858707984e-05, "loss": 1.0449, "step": 815 }, { "epoch": 0.11522168878847783, "grad_norm": 6.511428641399698, "learning_rate": 1.999631861350817e-05, "loss": 1.4996, "step": 816 }, { "epoch": 0.11536289183846371, "grad_norm": 5.385932380290889, "learning_rate": 1.999627713599823e-05, "loss": 1.1581, "step": 817 }, { "epoch": 0.1155040948884496, "grad_norm": 5.233641318862517, "learning_rate": 1.999623542617912e-05, "loss": 1.0391, "step": 818 }, { "epoch": 0.11564529793843548, "grad_norm": 5.655396083274696, "learning_rate": 1.9996193484051822e-05, "loss": 1.1819, "step": 819 }, { "epoch": 0.11578650098842135, "grad_norm": 6.204063907496864, "learning_rate": 1.99961513096173e-05, "loss": 1.4043, "step": 820 }, { "epoch": 0.11592770403840723, "grad_norm": 5.160519482087256, "learning_rate": 1.9996108902876544e-05, "loss": 1.2475, "step": 821 }, { "epoch": 0.11606890708839311, "grad_norm": 5.033055551242292, "learning_rate": 1.9996066263830533e-05, "loss": 0.9497, "step": 822 }, { "epoch": 0.116210110138379, "grad_norm": 5.4801078556293, "learning_rate": 1.9996023392480254e-05, "loss": 1.1818, "step": 823 }, { "epoch": 0.11635131318836486, "grad_norm": 5.3761131553378965, "learning_rate": 1.9995980288826714e-05, "loss": 1.2519, "step": 824 }, { "epoch": 0.11649251623835075, "grad_norm": 5.405889625421241, "learning_rate": 1.9995936952870905e-05, "loss": 1.0568, "step": 825 }, { "epoch": 0.11663371928833663, "grad_norm": 5.3021710412145575, "learning_rate": 1.999589338461384e-05, "loss": 1.023, "step": 826 }, { "epoch": 0.11677492233832251, "grad_norm": 5.785930181095784, "learning_rate": 1.999584958405653e-05, "loss": 1.2078, "step": 827 }, { "epoch": 0.11691612538830838, "grad_norm": 5.5250382539465726, "learning_rate": 1.9995805551199988e-05, "loss": 1.1602, "step": 828 }, { "epoch": 0.11705732843829426, "grad_norm": 5.16493805362523, "learning_rate": 1.9995761286045248e-05, "loss": 1.1591, "step": 829 }, { "epoch": 0.11719853148828015, "grad_norm": 5.133249324233252, "learning_rate": 1.999571678859333e-05, "loss": 1.0251, "step": 830 }, { "epoch": 0.11733973453826603, "grad_norm": 5.239574428739896, "learning_rate": 1.9995672058845272e-05, "loss": 1.2703, "step": 831 }, { "epoch": 0.11748093758825191, "grad_norm": 5.625553408129529, "learning_rate": 1.999562709680211e-05, "loss": 1.193, "step": 832 }, { "epoch": 0.11762214063823778, "grad_norm": 4.342673750607401, "learning_rate": 1.9995581902464894e-05, "loss": 0.9556, "step": 833 }, { "epoch": 0.11776334368822367, "grad_norm": 5.052398275117559, "learning_rate": 1.9995536475834667e-05, "loss": 1.0436, "step": 834 }, { "epoch": 0.11790454673820955, "grad_norm": 5.991484523628505, "learning_rate": 1.9995490816912494e-05, "loss": 1.3851, "step": 835 }, { "epoch": 0.11804574978819543, "grad_norm": 5.714464558199651, "learning_rate": 1.9995444925699427e-05, "loss": 1.1418, "step": 836 }, { "epoch": 0.1181869528381813, "grad_norm": 5.027890337363048, "learning_rate": 1.999539880219654e-05, "loss": 1.317, "step": 837 }, { "epoch": 0.11832815588816718, "grad_norm": 4.437889276305247, "learning_rate": 1.99953524464049e-05, "loss": 0.892, "step": 838 }, { "epoch": 0.11846935893815307, "grad_norm": 5.1281825633815545, "learning_rate": 1.9995305858325586e-05, "loss": 1.0215, "step": 839 }, { "epoch": 0.11861056198813895, "grad_norm": 5.557320859358658, "learning_rate": 1.999525903795968e-05, "loss": 1.2308, "step": 840 }, { "epoch": 0.11875176503812482, "grad_norm": 5.352457908193261, "learning_rate": 1.999521198530827e-05, "loss": 1.2149, "step": 841 }, { "epoch": 0.1188929680881107, "grad_norm": 5.049151601059812, "learning_rate": 1.9995164700372448e-05, "loss": 1.043, "step": 842 }, { "epoch": 0.11903417113809658, "grad_norm": 6.812611063105615, "learning_rate": 1.999511718315332e-05, "loss": 1.1816, "step": 843 }, { "epoch": 0.11917537418808247, "grad_norm": 5.033233034273857, "learning_rate": 1.9995069433651985e-05, "loss": 1.1476, "step": 844 }, { "epoch": 0.11931657723806834, "grad_norm": 5.099808940755445, "learning_rate": 1.9995021451869548e-05, "loss": 1.1959, "step": 845 }, { "epoch": 0.11945778028805422, "grad_norm": 4.772748515716294, "learning_rate": 1.9994973237807133e-05, "loss": 1.0424, "step": 846 }, { "epoch": 0.1195989833380401, "grad_norm": 5.122110864730668, "learning_rate": 1.9994924791465854e-05, "loss": 1.0853, "step": 847 }, { "epoch": 0.11974018638802598, "grad_norm": 5.502888437833528, "learning_rate": 1.999487611284684e-05, "loss": 1.3397, "step": 848 }, { "epoch": 0.11988138943801187, "grad_norm": 5.207542648884867, "learning_rate": 1.9994827201951223e-05, "loss": 1.1872, "step": 849 }, { "epoch": 0.12002259248799774, "grad_norm": 5.293792420635834, "learning_rate": 1.9994778058780136e-05, "loss": 1.0728, "step": 850 }, { "epoch": 0.12016379553798362, "grad_norm": 5.2355440932748465, "learning_rate": 1.9994728683334726e-05, "loss": 1.0075, "step": 851 }, { "epoch": 0.1203049985879695, "grad_norm": 4.93290987765116, "learning_rate": 1.999467907561614e-05, "loss": 1.1079, "step": 852 }, { "epoch": 0.12044620163795539, "grad_norm": 5.021314451083187, "learning_rate": 1.9994629235625524e-05, "loss": 0.9773, "step": 853 }, { "epoch": 0.12058740468794125, "grad_norm": 5.2567430786820735, "learning_rate": 1.999457916336404e-05, "loss": 1.0051, "step": 854 }, { "epoch": 0.12072860773792714, "grad_norm": 5.04435006227269, "learning_rate": 1.9994528858832854e-05, "loss": 1.1814, "step": 855 }, { "epoch": 0.12086981078791302, "grad_norm": 5.6355051650302554, "learning_rate": 1.9994478322033134e-05, "loss": 1.0268, "step": 856 }, { "epoch": 0.1210110138378989, "grad_norm": 4.933867335534344, "learning_rate": 1.9994427552966056e-05, "loss": 1.1164, "step": 857 }, { "epoch": 0.12115221688788477, "grad_norm": 4.370951102868625, "learning_rate": 1.9994376551632796e-05, "loss": 0.8646, "step": 858 }, { "epoch": 0.12129341993787066, "grad_norm": 4.460584837298375, "learning_rate": 1.999432531803454e-05, "loss": 0.9945, "step": 859 }, { "epoch": 0.12143462298785654, "grad_norm": 5.0714085073396795, "learning_rate": 1.9994273852172484e-05, "loss": 1.1131, "step": 860 }, { "epoch": 0.12157582603784242, "grad_norm": 4.7831668929163245, "learning_rate": 1.9994222154047815e-05, "loss": 1.0351, "step": 861 }, { "epoch": 0.12171702908782829, "grad_norm": 5.617787569481514, "learning_rate": 1.999417022366174e-05, "loss": 1.2635, "step": 862 }, { "epoch": 0.12185823213781417, "grad_norm": 7.9309459021833275, "learning_rate": 1.9994118061015468e-05, "loss": 1.215, "step": 863 }, { "epoch": 0.12199943518780006, "grad_norm": 5.5743713530017605, "learning_rate": 1.9994065666110206e-05, "loss": 1.0707, "step": 864 }, { "epoch": 0.12214063823778594, "grad_norm": 5.09601293123909, "learning_rate": 1.999401303894718e-05, "loss": 1.2704, "step": 865 }, { "epoch": 0.12228184128777182, "grad_norm": 4.7956150724392, "learning_rate": 1.99939601795276e-05, "loss": 1.2585, "step": 866 }, { "epoch": 0.12242304433775769, "grad_norm": 5.571223991807083, "learning_rate": 1.9993907087852703e-05, "loss": 1.156, "step": 867 }, { "epoch": 0.12256424738774357, "grad_norm": 5.1033386277020245, "learning_rate": 1.9993853763923724e-05, "loss": 1.1006, "step": 868 }, { "epoch": 0.12270545043772946, "grad_norm": 5.753998714877866, "learning_rate": 1.9993800207741896e-05, "loss": 1.3855, "step": 869 }, { "epoch": 0.12284665348771534, "grad_norm": 4.382155954718282, "learning_rate": 1.999374641930847e-05, "loss": 1.0522, "step": 870 }, { "epoch": 0.12298785653770121, "grad_norm": 5.270368299827882, "learning_rate": 1.999369239862469e-05, "loss": 1.2041, "step": 871 }, { "epoch": 0.12312905958768709, "grad_norm": 5.1403083748212905, "learning_rate": 1.9993638145691817e-05, "loss": 1.2324, "step": 872 }, { "epoch": 0.12327026263767298, "grad_norm": 6.08074670417796, "learning_rate": 1.999358366051111e-05, "loss": 1.1363, "step": 873 }, { "epoch": 0.12341146568765886, "grad_norm": 4.2939366533801975, "learning_rate": 1.999352894308383e-05, "loss": 1.0829, "step": 874 }, { "epoch": 0.12355266873764473, "grad_norm": 5.272779825078832, "learning_rate": 1.999347399341126e-05, "loss": 1.1646, "step": 875 }, { "epoch": 0.12369387178763061, "grad_norm": 5.2176497161857585, "learning_rate": 1.9993418811494663e-05, "loss": 1.2705, "step": 876 }, { "epoch": 0.1238350748376165, "grad_norm": 5.442022399731426, "learning_rate": 1.9993363397335335e-05, "loss": 1.1217, "step": 877 }, { "epoch": 0.12397627788760238, "grad_norm": 6.38768066327861, "learning_rate": 1.9993307750934555e-05, "loss": 1.3341, "step": 878 }, { "epoch": 0.12411748093758825, "grad_norm": 4.973903989077026, "learning_rate": 1.9993251872293618e-05, "loss": 1.0029, "step": 879 }, { "epoch": 0.12425868398757413, "grad_norm": 5.268164174800898, "learning_rate": 1.9993195761413823e-05, "loss": 0.9974, "step": 880 }, { "epoch": 0.12439988703756001, "grad_norm": 5.21981647274676, "learning_rate": 1.9993139418296477e-05, "loss": 1.104, "step": 881 }, { "epoch": 0.1245410900875459, "grad_norm": 5.670791790579677, "learning_rate": 1.9993082842942883e-05, "loss": 1.285, "step": 882 }, { "epoch": 0.12468229313753178, "grad_norm": 5.40065965184563, "learning_rate": 1.999302603535436e-05, "loss": 1.1038, "step": 883 }, { "epoch": 0.12482349618751765, "grad_norm": 6.098120544259875, "learning_rate": 1.9992968995532228e-05, "loss": 1.0751, "step": 884 }, { "epoch": 0.12496469923750353, "grad_norm": 5.6945610815131955, "learning_rate": 1.9992911723477816e-05, "loss": 1.2666, "step": 885 }, { "epoch": 0.1251059022874894, "grad_norm": 6.417872362780822, "learning_rate": 1.999285421919245e-05, "loss": 1.4128, "step": 886 }, { "epoch": 0.1252471053374753, "grad_norm": 5.643168660643183, "learning_rate": 1.9992796482677465e-05, "loss": 1.2438, "step": 887 }, { "epoch": 0.12538830838746118, "grad_norm": 6.200068370802321, "learning_rate": 1.9992738513934205e-05, "loss": 1.187, "step": 888 }, { "epoch": 0.12552951143744706, "grad_norm": 5.334047649322766, "learning_rate": 1.9992680312964018e-05, "loss": 1.15, "step": 889 }, { "epoch": 0.12567071448743292, "grad_norm": 5.644828267533013, "learning_rate": 1.9992621879768256e-05, "loss": 1.2009, "step": 890 }, { "epoch": 0.1258119175374188, "grad_norm": 5.888255855521819, "learning_rate": 1.999256321434828e-05, "loss": 1.3039, "step": 891 }, { "epoch": 0.12595312058740468, "grad_norm": 5.348523442219035, "learning_rate": 1.9992504316705446e-05, "loss": 1.1741, "step": 892 }, { "epoch": 0.12609432363739057, "grad_norm": 4.881682036266078, "learning_rate": 1.9992445186841128e-05, "loss": 1.1224, "step": 893 }, { "epoch": 0.12623552668737645, "grad_norm": 5.203730259820256, "learning_rate": 1.99923858247567e-05, "loss": 1.0781, "step": 894 }, { "epoch": 0.12637672973736233, "grad_norm": 4.774378450311665, "learning_rate": 1.9992326230453544e-05, "loss": 0.9942, "step": 895 }, { "epoch": 0.12651793278734821, "grad_norm": 5.248190466095374, "learning_rate": 1.9992266403933037e-05, "loss": 1.0486, "step": 896 }, { "epoch": 0.1266591358373341, "grad_norm": 5.102677308848711, "learning_rate": 1.9992206345196576e-05, "loss": 1.1761, "step": 897 }, { "epoch": 0.12680033888731998, "grad_norm": 5.495026680667837, "learning_rate": 1.9992146054245552e-05, "loss": 1.2891, "step": 898 }, { "epoch": 0.12694154193730584, "grad_norm": 5.084583161279488, "learning_rate": 1.9992085531081374e-05, "loss": 1.127, "step": 899 }, { "epoch": 0.12708274498729172, "grad_norm": 5.617561940825893, "learning_rate": 1.999202477570544e-05, "loss": 1.1415, "step": 900 }, { "epoch": 0.1272239480372776, "grad_norm": 5.2426936350995375, "learning_rate": 1.9991963788119166e-05, "loss": 1.1545, "step": 901 }, { "epoch": 0.12736515108726348, "grad_norm": 4.209370947675331, "learning_rate": 1.9991902568323972e-05, "loss": 0.9188, "step": 902 }, { "epoch": 0.12750635413724937, "grad_norm": 4.754577076981472, "learning_rate": 1.9991841116321272e-05, "loss": 1.0862, "step": 903 }, { "epoch": 0.12764755718723525, "grad_norm": 5.583435340630613, "learning_rate": 1.9991779432112503e-05, "loss": 1.0693, "step": 904 }, { "epoch": 0.12778876023722113, "grad_norm": 5.380354323379033, "learning_rate": 1.9991717515699097e-05, "loss": 1.3403, "step": 905 }, { "epoch": 0.12792996328720702, "grad_norm": 5.143434917955193, "learning_rate": 1.9991655367082487e-05, "loss": 1.0408, "step": 906 }, { "epoch": 0.12807116633719287, "grad_norm": 5.056272133552769, "learning_rate": 1.9991592986264122e-05, "loss": 1.0496, "step": 907 }, { "epoch": 0.12821236938717875, "grad_norm": 5.489078876073065, "learning_rate": 1.999153037324545e-05, "loss": 1.1358, "step": 908 }, { "epoch": 0.12835357243716464, "grad_norm": 5.269679865296767, "learning_rate": 1.999146752802793e-05, "loss": 1.2032, "step": 909 }, { "epoch": 0.12849477548715052, "grad_norm": 5.6333335099989, "learning_rate": 1.999140445061302e-05, "loss": 1.066, "step": 910 }, { "epoch": 0.1286359785371364, "grad_norm": 5.041319873023169, "learning_rate": 1.9991341141002185e-05, "loss": 1.1441, "step": 911 }, { "epoch": 0.12877718158712229, "grad_norm": 5.367812889439325, "learning_rate": 1.9991277599196897e-05, "loss": 0.9939, "step": 912 }, { "epoch": 0.12891838463710817, "grad_norm": 5.078656371145898, "learning_rate": 1.999121382519863e-05, "loss": 1.0985, "step": 913 }, { "epoch": 0.12905958768709405, "grad_norm": 4.946862885553495, "learning_rate": 1.999114981900887e-05, "loss": 1.061, "step": 914 }, { "epoch": 0.12920079073707993, "grad_norm": 5.180711918536928, "learning_rate": 1.9991085580629103e-05, "loss": 1.0385, "step": 915 }, { "epoch": 0.1293419937870658, "grad_norm": 4.780963636377455, "learning_rate": 1.9991021110060825e-05, "loss": 1.0859, "step": 916 }, { "epoch": 0.12948319683705167, "grad_norm": 4.735577384078431, "learning_rate": 1.9990956407305525e-05, "loss": 0.8622, "step": 917 }, { "epoch": 0.12962439988703756, "grad_norm": 5.5036610425201, "learning_rate": 1.999089147236472e-05, "loss": 1.1861, "step": 918 }, { "epoch": 0.12976560293702344, "grad_norm": 4.773065186470791, "learning_rate": 1.9990826305239906e-05, "loss": 1.0559, "step": 919 }, { "epoch": 0.12990680598700932, "grad_norm": 5.345862295352198, "learning_rate": 1.9990760905932605e-05, "loss": 1.2572, "step": 920 }, { "epoch": 0.1300480090369952, "grad_norm": 4.559381469417303, "learning_rate": 1.9990695274444338e-05, "loss": 1.0719, "step": 921 }, { "epoch": 0.1301892120869811, "grad_norm": 5.665120960887968, "learning_rate": 1.9990629410776624e-05, "loss": 1.3774, "step": 922 }, { "epoch": 0.13033041513696697, "grad_norm": 5.172174638994669, "learning_rate": 1.9990563314930997e-05, "loss": 1.015, "step": 923 }, { "epoch": 0.13047161818695283, "grad_norm": 5.499930974785996, "learning_rate": 1.9990496986908994e-05, "loss": 1.3084, "step": 924 }, { "epoch": 0.1306128212369387, "grad_norm": 5.197619345991682, "learning_rate": 1.9990430426712156e-05, "loss": 1.218, "step": 925 }, { "epoch": 0.1307540242869246, "grad_norm": 6.460598377532906, "learning_rate": 1.9990363634342032e-05, "loss": 1.2714, "step": 926 }, { "epoch": 0.13089522733691047, "grad_norm": 5.379512170317948, "learning_rate": 1.9990296609800167e-05, "loss": 1.0327, "step": 927 }, { "epoch": 0.13103643038689636, "grad_norm": 5.388522971829105, "learning_rate": 1.9990229353088123e-05, "loss": 1.3501, "step": 928 }, { "epoch": 0.13117763343688224, "grad_norm": 5.224493527208887, "learning_rate": 1.9990161864207466e-05, "loss": 1.2567, "step": 929 }, { "epoch": 0.13131883648686812, "grad_norm": 4.6317562806682675, "learning_rate": 1.9990094143159763e-05, "loss": 0.9792, "step": 930 }, { "epoch": 0.131460039536854, "grad_norm": 4.761843862735333, "learning_rate": 1.9990026189946584e-05, "loss": 1.1116, "step": 931 }, { "epoch": 0.1316012425868399, "grad_norm": 4.9942741246143125, "learning_rate": 1.9989958004569514e-05, "loss": 1.2391, "step": 932 }, { "epoch": 0.13174244563682574, "grad_norm": 5.331731136576249, "learning_rate": 1.998988958703013e-05, "loss": 1.1209, "step": 933 }, { "epoch": 0.13188364868681163, "grad_norm": 6.039204116458024, "learning_rate": 1.9989820937330028e-05, "loss": 1.4043, "step": 934 }, { "epoch": 0.1320248517367975, "grad_norm": 5.120888595994629, "learning_rate": 1.99897520554708e-05, "loss": 1.0491, "step": 935 }, { "epoch": 0.1321660547867834, "grad_norm": 4.754581901564395, "learning_rate": 1.998968294145405e-05, "loss": 1.1176, "step": 936 }, { "epoch": 0.13230725783676928, "grad_norm": 5.195528747894677, "learning_rate": 1.9989613595281384e-05, "loss": 0.963, "step": 937 }, { "epoch": 0.13244846088675516, "grad_norm": 5.6163949327877365, "learning_rate": 1.9989544016954414e-05, "loss": 1.406, "step": 938 }, { "epoch": 0.13258966393674104, "grad_norm": 5.192517204151998, "learning_rate": 1.998947420647475e-05, "loss": 0.9567, "step": 939 }, { "epoch": 0.13273086698672693, "grad_norm": 4.939326278914683, "learning_rate": 1.9989404163844026e-05, "loss": 1.0999, "step": 940 }, { "epoch": 0.13287207003671278, "grad_norm": 4.4363610143865, "learning_rate": 1.9989333889063856e-05, "loss": 0.9798, "step": 941 }, { "epoch": 0.13301327308669866, "grad_norm": 4.8234058295639795, "learning_rate": 1.9989263382135882e-05, "loss": 0.9433, "step": 942 }, { "epoch": 0.13315447613668455, "grad_norm": 5.2426868079866225, "learning_rate": 1.9989192643061745e-05, "loss": 1.183, "step": 943 }, { "epoch": 0.13329567918667043, "grad_norm": 5.63390095990124, "learning_rate": 1.998912167184308e-05, "loss": 1.3909, "step": 944 }, { "epoch": 0.1334368822366563, "grad_norm": 4.7800251715278606, "learning_rate": 1.9989050468481544e-05, "loss": 1.0927, "step": 945 }, { "epoch": 0.1335780852866422, "grad_norm": 5.059596253692547, "learning_rate": 1.998897903297879e-05, "loss": 1.1203, "step": 946 }, { "epoch": 0.13371928833662808, "grad_norm": 5.462436930269701, "learning_rate": 1.9988907365336478e-05, "loss": 1.0374, "step": 947 }, { "epoch": 0.13386049138661396, "grad_norm": 5.383252482072445, "learning_rate": 1.998883546555627e-05, "loss": 1.2318, "step": 948 }, { "epoch": 0.13400169443659984, "grad_norm": 4.691362619313088, "learning_rate": 1.9988763333639843e-05, "loss": 1.1946, "step": 949 }, { "epoch": 0.1341428974865857, "grad_norm": 4.928880192779834, "learning_rate": 1.9988690969588867e-05, "loss": 1.0886, "step": 950 }, { "epoch": 0.13428410053657158, "grad_norm": 4.3687379111943745, "learning_rate": 1.9988618373405025e-05, "loss": 0.903, "step": 951 }, { "epoch": 0.13442530358655747, "grad_norm": 5.125574725864024, "learning_rate": 1.998854554509001e-05, "loss": 1.2622, "step": 952 }, { "epoch": 0.13456650663654335, "grad_norm": 4.614140815104223, "learning_rate": 1.998847248464551e-05, "loss": 1.1588, "step": 953 }, { "epoch": 0.13470770968652923, "grad_norm": 4.128127436169854, "learning_rate": 1.998839919207322e-05, "loss": 1.0075, "step": 954 }, { "epoch": 0.13484891273651511, "grad_norm": 4.394625209204543, "learning_rate": 1.9988325667374848e-05, "loss": 1.1011, "step": 955 }, { "epoch": 0.134990115786501, "grad_norm": 4.9631838776976425, "learning_rate": 1.9988251910552103e-05, "loss": 1.2591, "step": 956 }, { "epoch": 0.13513131883648688, "grad_norm": 5.0553501367064415, "learning_rate": 1.998817792160669e-05, "loss": 1.2266, "step": 957 }, { "epoch": 0.13527252188647274, "grad_norm": 4.965229263011208, "learning_rate": 1.9988103700540345e-05, "loss": 1.0737, "step": 958 }, { "epoch": 0.13541372493645862, "grad_norm": 4.823682018357297, "learning_rate": 1.9988029247354778e-05, "loss": 1.2301, "step": 959 }, { "epoch": 0.1355549279864445, "grad_norm": 4.993761047777017, "learning_rate": 1.9987954562051724e-05, "loss": 1.0941, "step": 960 }, { "epoch": 0.13569613103643038, "grad_norm": 5.579508206335968, "learning_rate": 1.998787964463292e-05, "loss": 1.1355, "step": 961 }, { "epoch": 0.13583733408641627, "grad_norm": 5.927369532705596, "learning_rate": 1.9987804495100112e-05, "loss": 1.1492, "step": 962 }, { "epoch": 0.13597853713640215, "grad_norm": 4.344311792671202, "learning_rate": 1.9987729113455036e-05, "loss": 1.0834, "step": 963 }, { "epoch": 0.13611974018638803, "grad_norm": 5.393506333320731, "learning_rate": 1.998765349969945e-05, "loss": 1.0183, "step": 964 }, { "epoch": 0.13626094323637392, "grad_norm": 4.877987090652875, "learning_rate": 1.998757765383511e-05, "loss": 1.0941, "step": 965 }, { "epoch": 0.1364021462863598, "grad_norm": 5.249919120932146, "learning_rate": 1.9987501575863776e-05, "loss": 1.1639, "step": 966 }, { "epoch": 0.13654334933634565, "grad_norm": 5.798203926989949, "learning_rate": 1.9987425265787222e-05, "loss": 1.2088, "step": 967 }, { "epoch": 0.13668455238633154, "grad_norm": 6.22724404473044, "learning_rate": 1.9987348723607218e-05, "loss": 1.2981, "step": 968 }, { "epoch": 0.13682575543631742, "grad_norm": 5.448868754906509, "learning_rate": 1.9987271949325543e-05, "loss": 1.0919, "step": 969 }, { "epoch": 0.1369669584863033, "grad_norm": 4.71134074672123, "learning_rate": 1.998719494294398e-05, "loss": 1.062, "step": 970 }, { "epoch": 0.13710816153628919, "grad_norm": 4.841239063395571, "learning_rate": 1.998711770446432e-05, "loss": 1.0859, "step": 971 }, { "epoch": 0.13724936458627507, "grad_norm": 5.146262195727475, "learning_rate": 1.9987040233888355e-05, "loss": 1.2393, "step": 972 }, { "epoch": 0.13739056763626095, "grad_norm": 5.114334190310484, "learning_rate": 1.998696253121789e-05, "loss": 1.1822, "step": 973 }, { "epoch": 0.13753177068624683, "grad_norm": 4.794117383524985, "learning_rate": 1.998688459645473e-05, "loss": 1.238, "step": 974 }, { "epoch": 0.1376729737362327, "grad_norm": 4.287235871492822, "learning_rate": 1.9986806429600684e-05, "loss": 1.0582, "step": 975 }, { "epoch": 0.13781417678621857, "grad_norm": 5.86960034355485, "learning_rate": 1.998672803065757e-05, "loss": 1.1929, "step": 976 }, { "epoch": 0.13795537983620446, "grad_norm": 4.519127485751294, "learning_rate": 1.998664939962721e-05, "loss": 1.2046, "step": 977 }, { "epoch": 0.13809658288619034, "grad_norm": 4.67139988786646, "learning_rate": 1.998657053651143e-05, "loss": 1.1493, "step": 978 }, { "epoch": 0.13823778593617622, "grad_norm": 5.299227266277092, "learning_rate": 1.9986491441312064e-05, "loss": 1.2819, "step": 979 }, { "epoch": 0.1383789889861621, "grad_norm": 4.706529851370187, "learning_rate": 1.998641211403095e-05, "loss": 1.1905, "step": 980 }, { "epoch": 0.138520192036148, "grad_norm": 5.103760687920829, "learning_rate": 1.998633255466993e-05, "loss": 0.9903, "step": 981 }, { "epoch": 0.13866139508613387, "grad_norm": 4.803457248565893, "learning_rate": 1.9986252763230856e-05, "loss": 1.0923, "step": 982 }, { "epoch": 0.13880259813611975, "grad_norm": 5.68141816842776, "learning_rate": 1.998617273971558e-05, "loss": 1.2047, "step": 983 }, { "epoch": 0.1389438011861056, "grad_norm": 4.826803219845096, "learning_rate": 1.998609248412596e-05, "loss": 0.9226, "step": 984 }, { "epoch": 0.1390850042360915, "grad_norm": 5.220878720177872, "learning_rate": 1.9986011996463865e-05, "loss": 1.2218, "step": 985 }, { "epoch": 0.13922620728607737, "grad_norm": 5.4265888114952485, "learning_rate": 1.9985931276731162e-05, "loss": 1.1696, "step": 986 }, { "epoch": 0.13936741033606326, "grad_norm": 5.902399316085023, "learning_rate": 1.998585032492973e-05, "loss": 1.1828, "step": 987 }, { "epoch": 0.13950861338604914, "grad_norm": 4.663341571879512, "learning_rate": 1.998576914106145e-05, "loss": 1.1453, "step": 988 }, { "epoch": 0.13964981643603502, "grad_norm": 5.1009069846630535, "learning_rate": 1.9985687725128208e-05, "loss": 1.1245, "step": 989 }, { "epoch": 0.1397910194860209, "grad_norm": 5.193301142220486, "learning_rate": 1.9985606077131895e-05, "loss": 1.2048, "step": 990 }, { "epoch": 0.1399322225360068, "grad_norm": 4.837039022757927, "learning_rate": 1.9985524197074412e-05, "loss": 1.0964, "step": 991 }, { "epoch": 0.14007342558599264, "grad_norm": 5.303187748834104, "learning_rate": 1.9985442084957657e-05, "loss": 1.0946, "step": 992 }, { "epoch": 0.14021462863597853, "grad_norm": 5.031949316489782, "learning_rate": 1.9985359740783538e-05, "loss": 0.9783, "step": 993 }, { "epoch": 0.1403558316859644, "grad_norm": 6.128873408688188, "learning_rate": 1.9985277164553972e-05, "loss": 1.2135, "step": 994 }, { "epoch": 0.1404970347359503, "grad_norm": 5.2262161075540225, "learning_rate": 1.9985194356270877e-05, "loss": 1.2272, "step": 995 }, { "epoch": 0.14063823778593618, "grad_norm": 5.4524975848746005, "learning_rate": 1.9985111315936177e-05, "loss": 1.086, "step": 996 }, { "epoch": 0.14077944083592206, "grad_norm": 4.7661745359885215, "learning_rate": 1.9985028043551804e-05, "loss": 1.1571, "step": 997 }, { "epoch": 0.14092064388590794, "grad_norm": 5.342198708993227, "learning_rate": 1.998494453911969e-05, "loss": 1.1877, "step": 998 }, { "epoch": 0.14106184693589383, "grad_norm": 5.801971382713709, "learning_rate": 1.998486080264178e-05, "loss": 1.3002, "step": 999 }, { "epoch": 0.1412030499858797, "grad_norm": 5.454876186146265, "learning_rate": 1.9984776834120015e-05, "loss": 1.1621, "step": 1000 }, { "epoch": 0.14134425303586556, "grad_norm": 4.813788366382928, "learning_rate": 1.998469263355635e-05, "loss": 0.9452, "step": 1001 }, { "epoch": 0.14148545608585145, "grad_norm": 5.624256910985407, "learning_rate": 1.9984608200952736e-05, "loss": 1.0643, "step": 1002 }, { "epoch": 0.14162665913583733, "grad_norm": 4.719082834886343, "learning_rate": 1.9984523536311143e-05, "loss": 0.9886, "step": 1003 }, { "epoch": 0.1417678621858232, "grad_norm": 4.741326545885201, "learning_rate": 1.9984438639633534e-05, "loss": 1.0628, "step": 1004 }, { "epoch": 0.1419090652358091, "grad_norm": 6.709670649714525, "learning_rate": 1.998435351092188e-05, "loss": 1.2321, "step": 1005 }, { "epoch": 0.14205026828579498, "grad_norm": 4.747265385337215, "learning_rate": 1.998426815017817e-05, "loss": 1.1405, "step": 1006 }, { "epoch": 0.14219147133578086, "grad_norm": 4.955184824852184, "learning_rate": 1.998418255740437e-05, "loss": 1.1816, "step": 1007 }, { "epoch": 0.14233267438576674, "grad_norm": 4.948725132811143, "learning_rate": 1.9984096732602485e-05, "loss": 1.1544, "step": 1008 }, { "epoch": 0.1424738774357526, "grad_norm": 5.005213721677638, "learning_rate": 1.9984010675774504e-05, "loss": 1.3052, "step": 1009 }, { "epoch": 0.14261508048573848, "grad_norm": 4.985660029015741, "learning_rate": 1.9983924386922427e-05, "loss": 1.0762, "step": 1010 }, { "epoch": 0.14275628353572437, "grad_norm": 4.927306637627592, "learning_rate": 1.998383786604826e-05, "loss": 1.1855, "step": 1011 }, { "epoch": 0.14289748658571025, "grad_norm": 4.409473994268329, "learning_rate": 1.998375111315401e-05, "loss": 1.0704, "step": 1012 }, { "epoch": 0.14303868963569613, "grad_norm": 4.739180242718452, "learning_rate": 1.9983664128241694e-05, "loss": 1.1632, "step": 1013 }, { "epoch": 0.14317989268568201, "grad_norm": 5.03655566069825, "learning_rate": 1.998357691131334e-05, "loss": 1.275, "step": 1014 }, { "epoch": 0.1433210957356679, "grad_norm": 4.923270709707517, "learning_rate": 1.998348946237097e-05, "loss": 1.0444, "step": 1015 }, { "epoch": 0.14346229878565378, "grad_norm": 5.7277676659111165, "learning_rate": 1.998340178141661e-05, "loss": 1.0806, "step": 1016 }, { "epoch": 0.14360350183563966, "grad_norm": 5.334532052127538, "learning_rate": 1.998331386845231e-05, "loss": 1.1856, "step": 1017 }, { "epoch": 0.14374470488562552, "grad_norm": 4.974947805606638, "learning_rate": 1.9983225723480104e-05, "loss": 0.9759, "step": 1018 }, { "epoch": 0.1438859079356114, "grad_norm": 5.465656018484248, "learning_rate": 1.9983137346502046e-05, "loss": 1.0451, "step": 1019 }, { "epoch": 0.14402711098559728, "grad_norm": 4.889948656561684, "learning_rate": 1.9983048737520186e-05, "loss": 1.068, "step": 1020 }, { "epoch": 0.14416831403558317, "grad_norm": 5.804464843060851, "learning_rate": 1.9982959896536588e-05, "loss": 1.1558, "step": 1021 }, { "epoch": 0.14430951708556905, "grad_norm": 4.819953150693295, "learning_rate": 1.998287082355331e-05, "loss": 1.0911, "step": 1022 }, { "epoch": 0.14445072013555493, "grad_norm": 4.27568049613761, "learning_rate": 1.9982781518572424e-05, "loss": 0.9496, "step": 1023 }, { "epoch": 0.14459192318554082, "grad_norm": 5.155309453242915, "learning_rate": 1.998269198159601e-05, "loss": 1.074, "step": 1024 }, { "epoch": 0.1447331262355267, "grad_norm": 5.09547458259276, "learning_rate": 1.9982602212626144e-05, "loss": 1.2055, "step": 1025 }, { "epoch": 0.14487432928551255, "grad_norm": 5.4946745608607985, "learning_rate": 1.9982512211664914e-05, "loss": 1.1038, "step": 1026 }, { "epoch": 0.14501553233549844, "grad_norm": 4.844896733765715, "learning_rate": 1.998242197871441e-05, "loss": 0.9975, "step": 1027 }, { "epoch": 0.14515673538548432, "grad_norm": 5.9709915805171745, "learning_rate": 1.9982331513776733e-05, "loss": 1.4021, "step": 1028 }, { "epoch": 0.1452979384354702, "grad_norm": 5.475157761291001, "learning_rate": 1.9982240816853983e-05, "loss": 1.2389, "step": 1029 }, { "epoch": 0.14543914148545609, "grad_norm": 4.8643699153987, "learning_rate": 1.9982149887948264e-05, "loss": 0.9482, "step": 1030 }, { "epoch": 0.14558034453544197, "grad_norm": 4.365670520441248, "learning_rate": 1.9982058727061692e-05, "loss": 0.9553, "step": 1031 }, { "epoch": 0.14572154758542785, "grad_norm": 5.20439111539994, "learning_rate": 1.998196733419639e-05, "loss": 1.0998, "step": 1032 }, { "epoch": 0.14586275063541373, "grad_norm": 4.683631406535726, "learning_rate": 1.9981875709354478e-05, "loss": 1.234, "step": 1033 }, { "epoch": 0.14600395368539962, "grad_norm": 4.816067248130286, "learning_rate": 1.998178385253808e-05, "loss": 1.2131, "step": 1034 }, { "epoch": 0.14614515673538547, "grad_norm": 5.18864985856515, "learning_rate": 1.998169176374934e-05, "loss": 1.1972, "step": 1035 }, { "epoch": 0.14628635978537136, "grad_norm": 4.8246602370808755, "learning_rate": 1.9981599442990397e-05, "loss": 0.9288, "step": 1036 }, { "epoch": 0.14642756283535724, "grad_norm": 4.208838681132935, "learning_rate": 1.998150689026339e-05, "loss": 0.9546, "step": 1037 }, { "epoch": 0.14656876588534312, "grad_norm": 4.955894406623324, "learning_rate": 1.9981414105570473e-05, "loss": 1.0658, "step": 1038 }, { "epoch": 0.146709968935329, "grad_norm": 4.68067412598076, "learning_rate": 1.9981321088913806e-05, "loss": 1.0163, "step": 1039 }, { "epoch": 0.1468511719853149, "grad_norm": 4.645297238272814, "learning_rate": 1.9981227840295544e-05, "loss": 1.0526, "step": 1040 }, { "epoch": 0.14699237503530077, "grad_norm": 4.7469964863101834, "learning_rate": 1.998113435971786e-05, "loss": 1.0682, "step": 1041 }, { "epoch": 0.14713357808528665, "grad_norm": 5.000522242682167, "learning_rate": 1.9981040647182923e-05, "loss": 0.9545, "step": 1042 }, { "epoch": 0.1472747811352725, "grad_norm": 5.042863383463851, "learning_rate": 1.998094670269291e-05, "loss": 1.1831, "step": 1043 }, { "epoch": 0.1474159841852584, "grad_norm": 4.275336733477554, "learning_rate": 1.998085252625001e-05, "loss": 0.8503, "step": 1044 }, { "epoch": 0.14755718723524427, "grad_norm": 4.550947022277699, "learning_rate": 1.9980758117856403e-05, "loss": 1.1488, "step": 1045 }, { "epoch": 0.14769839028523016, "grad_norm": 5.06680443540758, "learning_rate": 1.9980663477514294e-05, "loss": 1.1539, "step": 1046 }, { "epoch": 0.14783959333521604, "grad_norm": 5.713185343523915, "learning_rate": 1.998056860522587e-05, "loss": 1.2296, "step": 1047 }, { "epoch": 0.14798079638520192, "grad_norm": 5.230852325398315, "learning_rate": 1.9980473500993346e-05, "loss": 1.1401, "step": 1048 }, { "epoch": 0.1481219994351878, "grad_norm": 3.9325487650392126, "learning_rate": 1.9980378164818926e-05, "loss": 1.0065, "step": 1049 }, { "epoch": 0.1482632024851737, "grad_norm": 4.674932051075275, "learning_rate": 1.9980282596704828e-05, "loss": 1.2407, "step": 1050 }, { "epoch": 0.14840440553515957, "grad_norm": 4.694106343490261, "learning_rate": 1.998018679665327e-05, "loss": 1.1867, "step": 1051 }, { "epoch": 0.14854560858514543, "grad_norm": 5.383007842176398, "learning_rate": 1.9980090764666486e-05, "loss": 1.193, "step": 1052 }, { "epoch": 0.1486868116351313, "grad_norm": 4.840170984239393, "learning_rate": 1.9979994500746697e-05, "loss": 1.0282, "step": 1053 }, { "epoch": 0.1488280146851172, "grad_norm": 5.101448686942334, "learning_rate": 1.997989800489615e-05, "loss": 1.1701, "step": 1054 }, { "epoch": 0.14896921773510308, "grad_norm": 4.815828081728059, "learning_rate": 1.9979801277117082e-05, "loss": 1.1164, "step": 1055 }, { "epoch": 0.14911042078508896, "grad_norm": 4.783152538415447, "learning_rate": 1.9979704317411745e-05, "loss": 1.3569, "step": 1056 }, { "epoch": 0.14925162383507484, "grad_norm": 4.879324034262992, "learning_rate": 1.9979607125782387e-05, "loss": 1.1476, "step": 1057 }, { "epoch": 0.14939282688506073, "grad_norm": 4.548052852951136, "learning_rate": 1.997950970223127e-05, "loss": 1.2485, "step": 1058 }, { "epoch": 0.1495340299350466, "grad_norm": 5.058620602455277, "learning_rate": 1.9979412046760657e-05, "loss": 1.2437, "step": 1059 }, { "epoch": 0.14967523298503246, "grad_norm": 4.165882301703776, "learning_rate": 1.9979314159372815e-05, "loss": 1.1035, "step": 1060 }, { "epoch": 0.14981643603501835, "grad_norm": 4.231504717921089, "learning_rate": 1.9979216040070026e-05, "loss": 1.045, "step": 1061 }, { "epoch": 0.14995763908500423, "grad_norm": 4.500094370397637, "learning_rate": 1.9979117688854565e-05, "loss": 1.1506, "step": 1062 }, { "epoch": 0.1500988421349901, "grad_norm": 5.022187789468775, "learning_rate": 1.9979019105728717e-05, "loss": 1.0845, "step": 1063 }, { "epoch": 0.150240045184976, "grad_norm": 4.388156715038051, "learning_rate": 1.9978920290694776e-05, "loss": 1.0058, "step": 1064 }, { "epoch": 0.15038124823496188, "grad_norm": 4.49583224495091, "learning_rate": 1.9978821243755035e-05, "loss": 1.0972, "step": 1065 }, { "epoch": 0.15052245128494776, "grad_norm": 5.150998331665217, "learning_rate": 1.99787219649118e-05, "loss": 1.2336, "step": 1066 }, { "epoch": 0.15066365433493364, "grad_norm": 4.5903759486498155, "learning_rate": 1.9978622454167376e-05, "loss": 1.0926, "step": 1067 }, { "epoch": 0.15080485738491953, "grad_norm": 5.901878520313315, "learning_rate": 1.9978522711524076e-05, "loss": 1.2474, "step": 1068 }, { "epoch": 0.15094606043490538, "grad_norm": 5.524109606050706, "learning_rate": 1.9978422736984216e-05, "loss": 1.0601, "step": 1069 }, { "epoch": 0.15108726348489127, "grad_norm": 5.9551718005021606, "learning_rate": 1.997832253055012e-05, "loss": 1.2725, "step": 1070 }, { "epoch": 0.15122846653487715, "grad_norm": 4.989541845809095, "learning_rate": 1.997822209222412e-05, "loss": 1.2202, "step": 1071 }, { "epoch": 0.15136966958486303, "grad_norm": 5.276690780495154, "learning_rate": 1.9978121422008547e-05, "loss": 1.2666, "step": 1072 }, { "epoch": 0.15151087263484891, "grad_norm": 4.789907089313249, "learning_rate": 1.9978020519905742e-05, "loss": 1.1662, "step": 1073 }, { "epoch": 0.1516520756848348, "grad_norm": 7.601629566967476, "learning_rate": 1.997791938591805e-05, "loss": 1.1854, "step": 1074 }, { "epoch": 0.15179327873482068, "grad_norm": 6.0086804548275285, "learning_rate": 1.9977818020047816e-05, "loss": 1.4079, "step": 1075 }, { "epoch": 0.15193448178480656, "grad_norm": 4.326979328470599, "learning_rate": 1.9977716422297404e-05, "loss": 1.0989, "step": 1076 }, { "epoch": 0.15207568483479242, "grad_norm": 5.097828981266428, "learning_rate": 1.997761459266917e-05, "loss": 1.1963, "step": 1077 }, { "epoch": 0.1522168878847783, "grad_norm": 4.7327716238405335, "learning_rate": 1.9977512531165484e-05, "loss": 1.3076, "step": 1078 }, { "epoch": 0.15235809093476418, "grad_norm": 4.041750148542854, "learning_rate": 1.9977410237788715e-05, "loss": 0.9088, "step": 1079 }, { "epoch": 0.15249929398475007, "grad_norm": 5.703759166864815, "learning_rate": 1.997730771254124e-05, "loss": 1.2683, "step": 1080 }, { "epoch": 0.15264049703473595, "grad_norm": 4.953506936126508, "learning_rate": 1.9977204955425443e-05, "loss": 1.0767, "step": 1081 }, { "epoch": 0.15278170008472183, "grad_norm": 4.829695878897943, "learning_rate": 1.9977101966443713e-05, "loss": 1.146, "step": 1082 }, { "epoch": 0.15292290313470772, "grad_norm": 4.620173504468806, "learning_rate": 1.997699874559844e-05, "loss": 1.0712, "step": 1083 }, { "epoch": 0.1530641061846936, "grad_norm": 4.977907880105714, "learning_rate": 1.9976895292892028e-05, "loss": 1.0521, "step": 1084 }, { "epoch": 0.15320530923467948, "grad_norm": 5.566691571022359, "learning_rate": 1.9976791608326876e-05, "loss": 1.4203, "step": 1085 }, { "epoch": 0.15334651228466534, "grad_norm": 4.78538990233984, "learning_rate": 1.9976687691905394e-05, "loss": 0.9861, "step": 1086 }, { "epoch": 0.15348771533465122, "grad_norm": 5.695630859346557, "learning_rate": 1.997658354363e-05, "loss": 1.2845, "step": 1087 }, { "epoch": 0.1536289183846371, "grad_norm": 5.890929271497539, "learning_rate": 1.9976479163503112e-05, "loss": 1.1156, "step": 1088 }, { "epoch": 0.15377012143462299, "grad_norm": 5.596829149931767, "learning_rate": 1.9976374551527156e-05, "loss": 1.2213, "step": 1089 }, { "epoch": 0.15391132448460887, "grad_norm": 4.313428149548073, "learning_rate": 1.997626970770457e-05, "loss": 1.1139, "step": 1090 }, { "epoch": 0.15405252753459475, "grad_norm": 5.2196992029204266, "learning_rate": 1.997616463203778e-05, "loss": 1.0761, "step": 1091 }, { "epoch": 0.15419373058458063, "grad_norm": 4.786288220678975, "learning_rate": 1.997605932452923e-05, "loss": 1.1584, "step": 1092 }, { "epoch": 0.15433493363456652, "grad_norm": 4.1452058579049895, "learning_rate": 1.9975953785181373e-05, "loss": 1.1304, "step": 1093 }, { "epoch": 0.15447613668455237, "grad_norm": 5.520870451267546, "learning_rate": 1.9975848013996658e-05, "loss": 1.313, "step": 1094 }, { "epoch": 0.15461733973453826, "grad_norm": 5.327997661783379, "learning_rate": 1.9975742010977544e-05, "loss": 1.3395, "step": 1095 }, { "epoch": 0.15475854278452414, "grad_norm": 5.023970991530368, "learning_rate": 1.9975635776126492e-05, "loss": 1.2949, "step": 1096 }, { "epoch": 0.15489974583451002, "grad_norm": 4.581170786574883, "learning_rate": 1.9975529309445972e-05, "loss": 1.0555, "step": 1097 }, { "epoch": 0.1550409488844959, "grad_norm": 5.300415428719428, "learning_rate": 1.9975422610938463e-05, "loss": 1.1372, "step": 1098 }, { "epoch": 0.1551821519344818, "grad_norm": 5.146995178214368, "learning_rate": 1.9975315680606436e-05, "loss": 1.1885, "step": 1099 }, { "epoch": 0.15532335498446767, "grad_norm": 4.3958111063798295, "learning_rate": 1.9975208518452384e-05, "loss": 1.0613, "step": 1100 }, { "epoch": 0.15546455803445355, "grad_norm": 4.496637765971624, "learning_rate": 1.9975101124478794e-05, "loss": 1.0108, "step": 1101 }, { "epoch": 0.15560576108443944, "grad_norm": 5.195064630750615, "learning_rate": 1.997499349868816e-05, "loss": 1.2129, "step": 1102 }, { "epoch": 0.1557469641344253, "grad_norm": 6.025793699254552, "learning_rate": 1.997488564108298e-05, "loss": 1.2281, "step": 1103 }, { "epoch": 0.15588816718441117, "grad_norm": 4.2671990513538605, "learning_rate": 1.9974777551665773e-05, "loss": 0.9592, "step": 1104 }, { "epoch": 0.15602937023439706, "grad_norm": 5.099057981736808, "learning_rate": 1.9974669230439043e-05, "loss": 1.0767, "step": 1105 }, { "epoch": 0.15617057328438294, "grad_norm": 4.43202272681864, "learning_rate": 1.9974560677405307e-05, "loss": 0.993, "step": 1106 }, { "epoch": 0.15631177633436882, "grad_norm": 4.220344609414598, "learning_rate": 1.9974451892567086e-05, "loss": 0.9865, "step": 1107 }, { "epoch": 0.1564529793843547, "grad_norm": 5.143021275270448, "learning_rate": 1.9974342875926913e-05, "loss": 1.5237, "step": 1108 }, { "epoch": 0.1565941824343406, "grad_norm": 4.738749555939336, "learning_rate": 1.997423362748732e-05, "loss": 1.1025, "step": 1109 }, { "epoch": 0.15673538548432647, "grad_norm": 5.05114651810557, "learning_rate": 1.997412414725084e-05, "loss": 1.2272, "step": 1110 }, { "epoch": 0.15687658853431233, "grad_norm": 4.591643963880788, "learning_rate": 1.9974014435220027e-05, "loss": 1.0934, "step": 1111 }, { "epoch": 0.1570177915842982, "grad_norm": 5.062834204920063, "learning_rate": 1.9973904491397426e-05, "loss": 1.174, "step": 1112 }, { "epoch": 0.1571589946342841, "grad_norm": 4.678185160298189, "learning_rate": 1.9973794315785587e-05, "loss": 1.0912, "step": 1113 }, { "epoch": 0.15730019768426998, "grad_norm": 4.19405399220832, "learning_rate": 1.997368390838708e-05, "loss": 0.8624, "step": 1114 }, { "epoch": 0.15744140073425586, "grad_norm": 5.145176571186488, "learning_rate": 1.9973573269204466e-05, "loss": 1.1022, "step": 1115 }, { "epoch": 0.15758260378424174, "grad_norm": 4.930982695701197, "learning_rate": 1.9973462398240316e-05, "loss": 0.9899, "step": 1116 }, { "epoch": 0.15772380683422763, "grad_norm": 4.837922958625512, "learning_rate": 1.997335129549721e-05, "loss": 1.1217, "step": 1117 }, { "epoch": 0.1578650098842135, "grad_norm": 6.508780539481619, "learning_rate": 1.997323996097772e-05, "loss": 1.2971, "step": 1118 }, { "epoch": 0.1580062129341994, "grad_norm": 5.974016503996492, "learning_rate": 1.9973128394684448e-05, "loss": 1.2914, "step": 1119 }, { "epoch": 0.15814741598418525, "grad_norm": 4.603512649117855, "learning_rate": 1.9973016596619973e-05, "loss": 1.0364, "step": 1120 }, { "epoch": 0.15828861903417113, "grad_norm": 4.522512186367647, "learning_rate": 1.9972904566786903e-05, "loss": 0.8583, "step": 1121 }, { "epoch": 0.158429822084157, "grad_norm": 5.649589325592691, "learning_rate": 1.997279230518784e-05, "loss": 1.197, "step": 1122 }, { "epoch": 0.1585710251341429, "grad_norm": 4.552292209312444, "learning_rate": 1.9972679811825384e-05, "loss": 1.1165, "step": 1123 }, { "epoch": 0.15871222818412878, "grad_norm": 6.400696905377842, "learning_rate": 1.9972567086702163e-05, "loss": 1.2852, "step": 1124 }, { "epoch": 0.15885343123411466, "grad_norm": 4.825929786172961, "learning_rate": 1.9972454129820786e-05, "loss": 1.2004, "step": 1125 }, { "epoch": 0.15899463428410054, "grad_norm": 5.270462972565587, "learning_rate": 1.997234094118388e-05, "loss": 1.039, "step": 1126 }, { "epoch": 0.15913583733408643, "grad_norm": 4.117872923603862, "learning_rate": 1.997222752079408e-05, "loss": 0.8892, "step": 1127 }, { "epoch": 0.15927704038407228, "grad_norm": 4.944608927893909, "learning_rate": 1.9972113868654016e-05, "loss": 1.2317, "step": 1128 }, { "epoch": 0.15941824343405817, "grad_norm": 4.8139383272839105, "learning_rate": 1.9971999984766337e-05, "loss": 1.1767, "step": 1129 }, { "epoch": 0.15955944648404405, "grad_norm": 4.459582085684538, "learning_rate": 1.9971885869133683e-05, "loss": 1.1833, "step": 1130 }, { "epoch": 0.15970064953402993, "grad_norm": 4.958432377251127, "learning_rate": 1.9971771521758707e-05, "loss": 1.2543, "step": 1131 }, { "epoch": 0.15984185258401581, "grad_norm": 4.996952775789927, "learning_rate": 1.9971656942644068e-05, "loss": 1.1167, "step": 1132 }, { "epoch": 0.1599830556340017, "grad_norm": 5.470769268862384, "learning_rate": 1.997154213179243e-05, "loss": 1.3827, "step": 1133 }, { "epoch": 0.16012425868398758, "grad_norm": 5.240352881796299, "learning_rate": 1.9971427089206458e-05, "loss": 1.1858, "step": 1134 }, { "epoch": 0.16026546173397346, "grad_norm": 4.41487799239553, "learning_rate": 1.9971311814888823e-05, "loss": 0.9955, "step": 1135 }, { "epoch": 0.16040666478395935, "grad_norm": 4.629404400225735, "learning_rate": 1.997119630884221e-05, "loss": 1.0966, "step": 1136 }, { "epoch": 0.1605478678339452, "grad_norm": 4.735890302037721, "learning_rate": 1.9971080571069303e-05, "loss": 1.0419, "step": 1137 }, { "epoch": 0.16068907088393108, "grad_norm": 5.46902641288739, "learning_rate": 1.9970964601572788e-05, "loss": 1.0326, "step": 1138 }, { "epoch": 0.16083027393391697, "grad_norm": 4.921918149183649, "learning_rate": 1.9970848400355364e-05, "loss": 1.0783, "step": 1139 }, { "epoch": 0.16097147698390285, "grad_norm": 4.5532972752435095, "learning_rate": 1.9970731967419724e-05, "loss": 1.0725, "step": 1140 }, { "epoch": 0.16111268003388873, "grad_norm": 4.656200162759009, "learning_rate": 1.9970615302768586e-05, "loss": 1.147, "step": 1141 }, { "epoch": 0.16125388308387462, "grad_norm": 6.030633098850729, "learning_rate": 1.997049840640465e-05, "loss": 1.3959, "step": 1142 }, { "epoch": 0.1613950861338605, "grad_norm": 5.7090161370207255, "learning_rate": 1.9970381278330638e-05, "loss": 1.3324, "step": 1143 }, { "epoch": 0.16153628918384638, "grad_norm": 3.5516607599886356, "learning_rate": 1.9970263918549274e-05, "loss": 0.858, "step": 1144 }, { "epoch": 0.16167749223383224, "grad_norm": 5.072042007315354, "learning_rate": 1.9970146327063276e-05, "loss": 1.2115, "step": 1145 }, { "epoch": 0.16181869528381812, "grad_norm": 5.9786361230868375, "learning_rate": 1.9970028503875387e-05, "loss": 1.3568, "step": 1146 }, { "epoch": 0.161959898333804, "grad_norm": 4.82441265177681, "learning_rate": 1.996991044898834e-05, "loss": 1.1887, "step": 1147 }, { "epoch": 0.1621011013837899, "grad_norm": 4.156615645020029, "learning_rate": 1.996979216240488e-05, "loss": 0.9075, "step": 1148 }, { "epoch": 0.16224230443377577, "grad_norm": 5.295656274068003, "learning_rate": 1.996967364412776e-05, "loss": 1.0972, "step": 1149 }, { "epoch": 0.16238350748376165, "grad_norm": 4.992705844666568, "learning_rate": 1.9969554894159723e-05, "loss": 1.0895, "step": 1150 }, { "epoch": 0.16252471053374754, "grad_norm": 4.381287639430764, "learning_rate": 1.996943591250354e-05, "loss": 1.2319, "step": 1151 }, { "epoch": 0.16266591358373342, "grad_norm": 4.882908726919229, "learning_rate": 1.996931669916197e-05, "loss": 1.125, "step": 1152 }, { "epoch": 0.1628071166337193, "grad_norm": 4.248085213469589, "learning_rate": 1.9969197254137786e-05, "loss": 1.0392, "step": 1153 }, { "epoch": 0.16294831968370516, "grad_norm": 5.018658833090311, "learning_rate": 1.9969077577433763e-05, "loss": 1.0778, "step": 1154 }, { "epoch": 0.16308952273369104, "grad_norm": 3.9889336820297325, "learning_rate": 1.9968957669052687e-05, "loss": 0.9441, "step": 1155 }, { "epoch": 0.16323072578367692, "grad_norm": 6.153782683434731, "learning_rate": 1.9968837528997333e-05, "loss": 1.2275, "step": 1156 }, { "epoch": 0.1633719288336628, "grad_norm": 5.634623491013958, "learning_rate": 1.9968717157270502e-05, "loss": 1.2337, "step": 1157 }, { "epoch": 0.1635131318836487, "grad_norm": 4.422171742775482, "learning_rate": 1.9968596553874993e-05, "loss": 0.9681, "step": 1158 }, { "epoch": 0.16365433493363457, "grad_norm": 5.157700831331247, "learning_rate": 1.99684757188136e-05, "loss": 1.1761, "step": 1159 }, { "epoch": 0.16379553798362045, "grad_norm": 5.992449393622337, "learning_rate": 1.9968354652089142e-05, "loss": 1.1279, "step": 1160 }, { "epoch": 0.16393674103360634, "grad_norm": 4.916631406032586, "learning_rate": 1.996823335370442e-05, "loss": 1.1057, "step": 1161 }, { "epoch": 0.1640779440835922, "grad_norm": 5.608265764309042, "learning_rate": 1.9968111823662268e-05, "loss": 1.102, "step": 1162 }, { "epoch": 0.16421914713357808, "grad_norm": 4.426081705158, "learning_rate": 1.9967990061965497e-05, "loss": 1.2122, "step": 1163 }, { "epoch": 0.16436035018356396, "grad_norm": 5.34358330556191, "learning_rate": 1.996786806861694e-05, "loss": 1.2047, "step": 1164 }, { "epoch": 0.16450155323354984, "grad_norm": 4.50661736599926, "learning_rate": 1.996774584361944e-05, "loss": 1.0098, "step": 1165 }, { "epoch": 0.16464275628353572, "grad_norm": 4.909272011581947, "learning_rate": 1.9967623386975826e-05, "loss": 1.1622, "step": 1166 }, { "epoch": 0.1647839593335216, "grad_norm": 4.780828325639308, "learning_rate": 1.996750069868895e-05, "loss": 1.0284, "step": 1167 }, { "epoch": 0.1649251623835075, "grad_norm": 5.718198109586679, "learning_rate": 1.9967377778761667e-05, "loss": 1.0667, "step": 1168 }, { "epoch": 0.16506636543349337, "grad_norm": 4.781998095392276, "learning_rate": 1.9967254627196823e-05, "loss": 1.1192, "step": 1169 }, { "epoch": 0.16520756848347926, "grad_norm": 4.158974695290931, "learning_rate": 1.996713124399729e-05, "loss": 0.9703, "step": 1170 }, { "epoch": 0.1653487715334651, "grad_norm": 4.774755573005021, "learning_rate": 1.996700762916593e-05, "loss": 1.1202, "step": 1171 }, { "epoch": 0.165489974583451, "grad_norm": 4.602605691369339, "learning_rate": 1.9966883782705617e-05, "loss": 1.0959, "step": 1172 }, { "epoch": 0.16563117763343688, "grad_norm": 4.470840325697791, "learning_rate": 1.996675970461923e-05, "loss": 1.105, "step": 1173 }, { "epoch": 0.16577238068342276, "grad_norm": 5.155838892675553, "learning_rate": 1.996663539490965e-05, "loss": 1.1275, "step": 1174 }, { "epoch": 0.16591358373340864, "grad_norm": 4.493211582275356, "learning_rate": 1.9966510853579767e-05, "loss": 1.1345, "step": 1175 }, { "epoch": 0.16605478678339453, "grad_norm": 5.103456949154394, "learning_rate": 1.996638608063248e-05, "loss": 1.0466, "step": 1176 }, { "epoch": 0.1661959898333804, "grad_norm": 4.664538175951263, "learning_rate": 1.996626107607068e-05, "loss": 0.9554, "step": 1177 }, { "epoch": 0.1663371928833663, "grad_norm": 4.860685825944272, "learning_rate": 1.996613583989728e-05, "loss": 1.161, "step": 1178 }, { "epoch": 0.16647839593335215, "grad_norm": 4.638587678007743, "learning_rate": 1.9966010372115186e-05, "loss": 1.0527, "step": 1179 }, { "epoch": 0.16661959898333803, "grad_norm": 5.2318313887716945, "learning_rate": 1.9965884672727313e-05, "loss": 1.2447, "step": 1180 }, { "epoch": 0.1667608020333239, "grad_norm": 4.671149961172834, "learning_rate": 1.9965758741736587e-05, "loss": 1.0432, "step": 1181 }, { "epoch": 0.1669020050833098, "grad_norm": 3.892770963817775, "learning_rate": 1.996563257914593e-05, "loss": 0.8379, "step": 1182 }, { "epoch": 0.16704320813329568, "grad_norm": 4.972522787507057, "learning_rate": 1.9965506184958277e-05, "loss": 1.1638, "step": 1183 }, { "epoch": 0.16718441118328156, "grad_norm": 5.236804181725235, "learning_rate": 1.9965379559176562e-05, "loss": 1.2589, "step": 1184 }, { "epoch": 0.16732561423326744, "grad_norm": 5.159160002524021, "learning_rate": 1.9965252701803733e-05, "loss": 1.0586, "step": 1185 }, { "epoch": 0.16746681728325333, "grad_norm": 5.193207175371085, "learning_rate": 1.996512561284273e-05, "loss": 1.0912, "step": 1186 }, { "epoch": 0.1676080203332392, "grad_norm": 5.088875793184004, "learning_rate": 1.996499829229652e-05, "loss": 1.208, "step": 1187 }, { "epoch": 0.16774922338322507, "grad_norm": 4.1768913153132, "learning_rate": 1.9964870740168046e-05, "loss": 1.1677, "step": 1188 }, { "epoch": 0.16789042643321095, "grad_norm": 4.720334313188313, "learning_rate": 1.996474295646028e-05, "loss": 1.0759, "step": 1189 }, { "epoch": 0.16803162948319683, "grad_norm": 4.463351861093201, "learning_rate": 1.9964614941176194e-05, "loss": 1.0423, "step": 1190 }, { "epoch": 0.16817283253318271, "grad_norm": 4.6140940656716145, "learning_rate": 1.9964486694318758e-05, "loss": 1.1759, "step": 1191 }, { "epoch": 0.1683140355831686, "grad_norm": 4.860198883725041, "learning_rate": 1.9964358215890955e-05, "loss": 1.0864, "step": 1192 }, { "epoch": 0.16845523863315448, "grad_norm": 5.676714072235111, "learning_rate": 1.9964229505895768e-05, "loss": 1.2349, "step": 1193 }, { "epoch": 0.16859644168314036, "grad_norm": 4.325881150491777, "learning_rate": 1.9964100564336197e-05, "loss": 0.9979, "step": 1194 }, { "epoch": 0.16873764473312625, "grad_norm": 5.135060794473869, "learning_rate": 1.9963971391215226e-05, "loss": 1.1304, "step": 1195 }, { "epoch": 0.1688788477831121, "grad_norm": 4.43911971813641, "learning_rate": 1.9963841986535864e-05, "loss": 1.0748, "step": 1196 }, { "epoch": 0.16902005083309798, "grad_norm": 4.536829356448458, "learning_rate": 1.996371235030112e-05, "loss": 1.3027, "step": 1197 }, { "epoch": 0.16916125388308387, "grad_norm": 5.955702804116423, "learning_rate": 1.9963582482514003e-05, "loss": 1.0215, "step": 1198 }, { "epoch": 0.16930245693306975, "grad_norm": 4.4124999689209865, "learning_rate": 1.996345238317753e-05, "loss": 0.9698, "step": 1199 }, { "epoch": 0.16944365998305563, "grad_norm": 5.3875102071102585, "learning_rate": 1.996332205229473e-05, "loss": 1.2229, "step": 1200 }, { "epoch": 0.16958486303304152, "grad_norm": 4.623236606991806, "learning_rate": 1.9963191489868624e-05, "loss": 0.9886, "step": 1201 }, { "epoch": 0.1697260660830274, "grad_norm": 4.334738100442265, "learning_rate": 1.996306069590225e-05, "loss": 1.0788, "step": 1202 }, { "epoch": 0.16986726913301328, "grad_norm": 4.796742345483506, "learning_rate": 1.996292967039865e-05, "loss": 1.1337, "step": 1203 }, { "epoch": 0.17000847218299917, "grad_norm": 4.839822157330316, "learning_rate": 1.996279841336087e-05, "loss": 1.0515, "step": 1204 }, { "epoch": 0.17014967523298502, "grad_norm": 5.264923200250912, "learning_rate": 1.996266692479196e-05, "loss": 1.1155, "step": 1205 }, { "epoch": 0.1702908782829709, "grad_norm": 3.5624932963091394, "learning_rate": 1.9962535204694964e-05, "loss": 0.9227, "step": 1206 }, { "epoch": 0.1704320813329568, "grad_norm": 4.818957797529313, "learning_rate": 1.996240325307296e-05, "loss": 1.0933, "step": 1207 }, { "epoch": 0.17057328438294267, "grad_norm": 5.15095130623406, "learning_rate": 1.9962271069929e-05, "loss": 1.4231, "step": 1208 }, { "epoch": 0.17071448743292855, "grad_norm": 4.4859995335573135, "learning_rate": 1.996213865526617e-05, "loss": 0.9422, "step": 1209 }, { "epoch": 0.17085569048291444, "grad_norm": 4.965232098293266, "learning_rate": 1.9962006009087535e-05, "loss": 1.0033, "step": 1210 }, { "epoch": 0.17099689353290032, "grad_norm": 4.429831296158428, "learning_rate": 1.9961873131396185e-05, "loss": 1.0522, "step": 1211 }, { "epoch": 0.1711380965828862, "grad_norm": 4.2902686003066846, "learning_rate": 1.9961740022195202e-05, "loss": 1.0232, "step": 1212 }, { "epoch": 0.17127929963287206, "grad_norm": 4.637063025708788, "learning_rate": 1.9961606681487685e-05, "loss": 1.0737, "step": 1213 }, { "epoch": 0.17142050268285794, "grad_norm": 4.9649691032247985, "learning_rate": 1.9961473109276735e-05, "loss": 1.2268, "step": 1214 }, { "epoch": 0.17156170573284382, "grad_norm": 5.346696112627389, "learning_rate": 1.9961339305565447e-05, "loss": 1.128, "step": 1215 }, { "epoch": 0.1717029087828297, "grad_norm": 5.051514125497087, "learning_rate": 1.9961205270356937e-05, "loss": 1.2226, "step": 1216 }, { "epoch": 0.1718441118328156, "grad_norm": 4.3335417860770855, "learning_rate": 1.996107100365432e-05, "loss": 0.9637, "step": 1217 }, { "epoch": 0.17198531488280147, "grad_norm": 4.314035724736223, "learning_rate": 1.996093650546071e-05, "loss": 0.8998, "step": 1218 }, { "epoch": 0.17212651793278735, "grad_norm": 4.462396544399696, "learning_rate": 1.996080177577924e-05, "loss": 1.1061, "step": 1219 }, { "epoch": 0.17226772098277324, "grad_norm": 4.554901065008981, "learning_rate": 1.9960666814613043e-05, "loss": 1.1939, "step": 1220 }, { "epoch": 0.17240892403275912, "grad_norm": 5.451877302293623, "learning_rate": 1.9960531621965246e-05, "loss": 1.0017, "step": 1221 }, { "epoch": 0.17255012708274498, "grad_norm": 4.649639678605315, "learning_rate": 1.9960396197838997e-05, "loss": 1.2605, "step": 1222 }, { "epoch": 0.17269133013273086, "grad_norm": 4.879921813476367, "learning_rate": 1.9960260542237438e-05, "loss": 1.1538, "step": 1223 }, { "epoch": 0.17283253318271674, "grad_norm": 4.3017424577008105, "learning_rate": 1.996012465516373e-05, "loss": 1.0651, "step": 1224 }, { "epoch": 0.17297373623270262, "grad_norm": 4.692640802351377, "learning_rate": 1.9959988536621026e-05, "loss": 1.1819, "step": 1225 }, { "epoch": 0.1731149392826885, "grad_norm": 4.242932972997732, "learning_rate": 1.9959852186612492e-05, "loss": 1.2351, "step": 1226 }, { "epoch": 0.1732561423326744, "grad_norm": 4.199971960819896, "learning_rate": 1.995971560514129e-05, "loss": 0.93, "step": 1227 }, { "epoch": 0.17339734538266027, "grad_norm": 5.544480469175915, "learning_rate": 1.9959578792210604e-05, "loss": 1.2355, "step": 1228 }, { "epoch": 0.17353854843264616, "grad_norm": 4.230432303297978, "learning_rate": 1.9959441747823603e-05, "loss": 0.8558, "step": 1229 }, { "epoch": 0.173679751482632, "grad_norm": 3.86338852818337, "learning_rate": 1.9959304471983477e-05, "loss": 0.7717, "step": 1230 }, { "epoch": 0.1738209545326179, "grad_norm": 5.192953040524915, "learning_rate": 1.9959166964693417e-05, "loss": 1.3743, "step": 1231 }, { "epoch": 0.17396215758260378, "grad_norm": 4.870700849269062, "learning_rate": 1.995902922595662e-05, "loss": 1.2319, "step": 1232 }, { "epoch": 0.17410336063258966, "grad_norm": 4.533206029446299, "learning_rate": 1.9958891255776284e-05, "loss": 1.0641, "step": 1233 }, { "epoch": 0.17424456368257554, "grad_norm": 4.8705735075571726, "learning_rate": 1.995875305415561e-05, "loss": 1.1214, "step": 1234 }, { "epoch": 0.17438576673256143, "grad_norm": 5.073610120108678, "learning_rate": 1.9958614621097826e-05, "loss": 1.3175, "step": 1235 }, { "epoch": 0.1745269697825473, "grad_norm": 5.303017697231334, "learning_rate": 1.9958475956606133e-05, "loss": 1.2074, "step": 1236 }, { "epoch": 0.1746681728325332, "grad_norm": 5.169586306770147, "learning_rate": 1.995833706068376e-05, "loss": 1.2317, "step": 1237 }, { "epoch": 0.17480937588251907, "grad_norm": 4.919302063301337, "learning_rate": 1.995819793333394e-05, "loss": 1.2109, "step": 1238 }, { "epoch": 0.17495057893250493, "grad_norm": 4.609082575319201, "learning_rate": 1.9958058574559893e-05, "loss": 1.0625, "step": 1239 }, { "epoch": 0.1750917819824908, "grad_norm": 4.6944231225839586, "learning_rate": 1.995791898436487e-05, "loss": 1.014, "step": 1240 }, { "epoch": 0.1752329850324767, "grad_norm": 4.6586299999591905, "learning_rate": 1.9957779162752105e-05, "loss": 1.079, "step": 1241 }, { "epoch": 0.17537418808246258, "grad_norm": 4.470951048820083, "learning_rate": 1.9957639109724858e-05, "loss": 1.1884, "step": 1242 }, { "epoch": 0.17551539113244846, "grad_norm": 4.310139549114599, "learning_rate": 1.9957498825286374e-05, "loss": 0.9912, "step": 1243 }, { "epoch": 0.17565659418243434, "grad_norm": 5.447714868694457, "learning_rate": 1.995735830943992e-05, "loss": 1.2984, "step": 1244 }, { "epoch": 0.17579779723242023, "grad_norm": 4.500836203978812, "learning_rate": 1.9957217562188763e-05, "loss": 1.0362, "step": 1245 }, { "epoch": 0.1759390002824061, "grad_norm": 4.989481499385495, "learning_rate": 1.9957076583536166e-05, "loss": 1.1578, "step": 1246 }, { "epoch": 0.176080203332392, "grad_norm": 4.532421271108147, "learning_rate": 1.9956935373485406e-05, "loss": 1.0343, "step": 1247 }, { "epoch": 0.17622140638237785, "grad_norm": 4.872221326425936, "learning_rate": 1.9956793932039774e-05, "loss": 1.1878, "step": 1248 }, { "epoch": 0.17636260943236373, "grad_norm": 4.2504954569185145, "learning_rate": 1.9956652259202548e-05, "loss": 1.0258, "step": 1249 }, { "epoch": 0.17650381248234961, "grad_norm": 4.941910920948673, "learning_rate": 1.9956510354977022e-05, "loss": 1.1637, "step": 1250 }, { "epoch": 0.1766450155323355, "grad_norm": 5.084381829744562, "learning_rate": 1.99563682193665e-05, "loss": 1.1971, "step": 1251 }, { "epoch": 0.17678621858232138, "grad_norm": 4.105186947940933, "learning_rate": 1.9956225852374275e-05, "loss": 0.8818, "step": 1252 }, { "epoch": 0.17692742163230726, "grad_norm": 4.071406483525072, "learning_rate": 1.9956083254003667e-05, "loss": 0.8964, "step": 1253 }, { "epoch": 0.17706862468229315, "grad_norm": 4.62430982559045, "learning_rate": 1.995594042425798e-05, "loss": 1.0882, "step": 1254 }, { "epoch": 0.17720982773227903, "grad_norm": 5.699316862550439, "learning_rate": 1.9955797363140536e-05, "loss": 1.2869, "step": 1255 }, { "epoch": 0.17735103078226488, "grad_norm": 4.6441839273500625, "learning_rate": 1.9955654070654664e-05, "loss": 1.2183, "step": 1256 }, { "epoch": 0.17749223383225077, "grad_norm": 5.798970056058559, "learning_rate": 1.995551054680369e-05, "loss": 1.2501, "step": 1257 }, { "epoch": 0.17763343688223665, "grad_norm": 4.527253270046071, "learning_rate": 1.995536679159095e-05, "loss": 0.9936, "step": 1258 }, { "epoch": 0.17777463993222253, "grad_norm": 4.633787085996182, "learning_rate": 1.9955222805019786e-05, "loss": 1.1555, "step": 1259 }, { "epoch": 0.17791584298220842, "grad_norm": 4.589039110290919, "learning_rate": 1.995507858709354e-05, "loss": 0.9787, "step": 1260 }, { "epoch": 0.1780570460321943, "grad_norm": 5.2790049115244635, "learning_rate": 1.9954934137815568e-05, "loss": 1.1364, "step": 1261 }, { "epoch": 0.17819824908218018, "grad_norm": 4.534838459934027, "learning_rate": 1.995478945718923e-05, "loss": 1.2378, "step": 1262 }, { "epoch": 0.17833945213216607, "grad_norm": 4.9154977673551645, "learning_rate": 1.995464454521788e-05, "loss": 1.2292, "step": 1263 }, { "epoch": 0.17848065518215195, "grad_norm": 3.98779088434465, "learning_rate": 1.9954499401904893e-05, "loss": 0.8715, "step": 1264 }, { "epoch": 0.1786218582321378, "grad_norm": 5.142652047003303, "learning_rate": 1.9954354027253635e-05, "loss": 1.4004, "step": 1265 }, { "epoch": 0.1787630612821237, "grad_norm": 5.004151334023995, "learning_rate": 1.995420842126749e-05, "loss": 0.9918, "step": 1266 }, { "epoch": 0.17890426433210957, "grad_norm": 4.361361301082337, "learning_rate": 1.9954062583949842e-05, "loss": 1.0382, "step": 1267 }, { "epoch": 0.17904546738209545, "grad_norm": 4.7034889403971825, "learning_rate": 1.9953916515304077e-05, "loss": 1.2482, "step": 1268 }, { "epoch": 0.17918667043208134, "grad_norm": 6.232633458423675, "learning_rate": 1.9953770215333593e-05, "loss": 1.2757, "step": 1269 }, { "epoch": 0.17932787348206722, "grad_norm": 5.040964662575827, "learning_rate": 1.9953623684041786e-05, "loss": 1.0132, "step": 1270 }, { "epoch": 0.1794690765320531, "grad_norm": 5.044052772782454, "learning_rate": 1.995347692143206e-05, "loss": 1.1451, "step": 1271 }, { "epoch": 0.17961027958203898, "grad_norm": 5.20990423713552, "learning_rate": 1.9953329927507833e-05, "loss": 1.3044, "step": 1272 }, { "epoch": 0.17975148263202484, "grad_norm": 4.254999650470814, "learning_rate": 1.9953182702272514e-05, "loss": 0.9758, "step": 1273 }, { "epoch": 0.17989268568201072, "grad_norm": 6.026971312928143, "learning_rate": 1.995303524572953e-05, "loss": 1.3211, "step": 1274 }, { "epoch": 0.1800338887319966, "grad_norm": 4.595563752221218, "learning_rate": 1.9952887557882304e-05, "loss": 1.1039, "step": 1275 }, { "epoch": 0.1801750917819825, "grad_norm": 4.496045674765187, "learning_rate": 1.995273963873427e-05, "loss": 1.2038, "step": 1276 }, { "epoch": 0.18031629483196837, "grad_norm": 4.655447861840876, "learning_rate": 1.9952591488288868e-05, "loss": 0.974, "step": 1277 }, { "epoch": 0.18045749788195425, "grad_norm": 4.15308438518539, "learning_rate": 1.9952443106549535e-05, "loss": 1.099, "step": 1278 }, { "epoch": 0.18059870093194014, "grad_norm": 6.190142892845365, "learning_rate": 1.995229449351972e-05, "loss": 1.289, "step": 1279 }, { "epoch": 0.18073990398192602, "grad_norm": 5.358189285530809, "learning_rate": 1.9952145649202885e-05, "loss": 1.1363, "step": 1280 }, { "epoch": 0.1808811070319119, "grad_norm": 5.1677095117282414, "learning_rate": 1.9951996573602476e-05, "loss": 1.2044, "step": 1281 }, { "epoch": 0.18102231008189776, "grad_norm": 5.009382872473553, "learning_rate": 1.995184726672197e-05, "loss": 1.066, "step": 1282 }, { "epoch": 0.18116351313188364, "grad_norm": 4.693743290558771, "learning_rate": 1.995169772856483e-05, "loss": 1.1668, "step": 1283 }, { "epoch": 0.18130471618186952, "grad_norm": 4.80827540853195, "learning_rate": 1.9951547959134535e-05, "loss": 1.1819, "step": 1284 }, { "epoch": 0.1814459192318554, "grad_norm": 4.298919866577047, "learning_rate": 1.9951397958434556e-05, "loss": 0.9337, "step": 1285 }, { "epoch": 0.1815871222818413, "grad_norm": 4.375509210194846, "learning_rate": 1.9951247726468393e-05, "loss": 1.0347, "step": 1286 }, { "epoch": 0.18172832533182717, "grad_norm": 4.196236919986424, "learning_rate": 1.995109726323953e-05, "loss": 0.9568, "step": 1287 }, { "epoch": 0.18186952838181306, "grad_norm": 4.492160719422738, "learning_rate": 1.9950946568751458e-05, "loss": 0.9087, "step": 1288 }, { "epoch": 0.18201073143179894, "grad_norm": 4.682166600476827, "learning_rate": 1.995079564300769e-05, "loss": 1.1577, "step": 1289 }, { "epoch": 0.1821519344817848, "grad_norm": 4.767562446301597, "learning_rate": 1.995064448601173e-05, "loss": 1.2279, "step": 1290 }, { "epoch": 0.18229313753177068, "grad_norm": 4.563497077376157, "learning_rate": 1.9950493097767086e-05, "loss": 1.1248, "step": 1291 }, { "epoch": 0.18243434058175656, "grad_norm": 4.719289082275079, "learning_rate": 1.9950341478277282e-05, "loss": 1.4356, "step": 1292 }, { "epoch": 0.18257554363174244, "grad_norm": 4.016594252551786, "learning_rate": 1.9950189627545837e-05, "loss": 0.8753, "step": 1293 }, { "epoch": 0.18271674668172833, "grad_norm": 4.894590333283925, "learning_rate": 1.9950037545576288e-05, "loss": 1.1642, "step": 1294 }, { "epoch": 0.1828579497317142, "grad_norm": 4.415399911984575, "learning_rate": 1.994988523237216e-05, "loss": 1.1803, "step": 1295 }, { "epoch": 0.1829991527817001, "grad_norm": 4.1549054885743235, "learning_rate": 1.9949732687936992e-05, "loss": 0.9826, "step": 1296 }, { "epoch": 0.18314035583168597, "grad_norm": 5.0533212810800725, "learning_rate": 1.9949579912274337e-05, "loss": 1.1916, "step": 1297 }, { "epoch": 0.18328155888167186, "grad_norm": 5.533817274245665, "learning_rate": 1.994942690538774e-05, "loss": 1.165, "step": 1298 }, { "epoch": 0.1834227619316577, "grad_norm": 4.655854771267515, "learning_rate": 1.9949273667280764e-05, "loss": 1.0441, "step": 1299 }, { "epoch": 0.1835639649816436, "grad_norm": 3.778265624584894, "learning_rate": 1.9949120197956956e-05, "loss": 0.8498, "step": 1300 }, { "epoch": 0.18370516803162948, "grad_norm": 5.091137737813793, "learning_rate": 1.99489664974199e-05, "loss": 1.1958, "step": 1301 }, { "epoch": 0.18384637108161536, "grad_norm": 4.49639533349355, "learning_rate": 1.9948812565673153e-05, "loss": 0.997, "step": 1302 }, { "epoch": 0.18398757413160124, "grad_norm": 4.875794611584804, "learning_rate": 1.9948658402720303e-05, "loss": 1.279, "step": 1303 }, { "epoch": 0.18412877718158713, "grad_norm": 4.796974470574435, "learning_rate": 1.9948504008564928e-05, "loss": 1.2298, "step": 1304 }, { "epoch": 0.184269980231573, "grad_norm": 4.069420107108959, "learning_rate": 1.994834938321061e-05, "loss": 0.9937, "step": 1305 }, { "epoch": 0.1844111832815589, "grad_norm": 5.135791535008867, "learning_rate": 1.9948194526660956e-05, "loss": 1.3049, "step": 1306 }, { "epoch": 0.18455238633154475, "grad_norm": 4.713189212916467, "learning_rate": 1.9948039438919554e-05, "loss": 1.2788, "step": 1307 }, { "epoch": 0.18469358938153063, "grad_norm": 5.515181633149785, "learning_rate": 1.9947884119990012e-05, "loss": 1.3322, "step": 1308 }, { "epoch": 0.18483479243151651, "grad_norm": 5.402063884682159, "learning_rate": 1.9947728569875936e-05, "loss": 1.0901, "step": 1309 }, { "epoch": 0.1849759954815024, "grad_norm": 4.173255440490058, "learning_rate": 1.994757278858095e-05, "loss": 0.9413, "step": 1310 }, { "epoch": 0.18511719853148828, "grad_norm": 5.311433751864453, "learning_rate": 1.9947416776108665e-05, "loss": 1.2074, "step": 1311 }, { "epoch": 0.18525840158147416, "grad_norm": 4.6885898969286215, "learning_rate": 1.994726053246271e-05, "loss": 1.2217, "step": 1312 }, { "epoch": 0.18539960463146005, "grad_norm": 5.384508304446427, "learning_rate": 1.9947104057646716e-05, "loss": 1.1769, "step": 1313 }, { "epoch": 0.18554080768144593, "grad_norm": 4.938337074046915, "learning_rate": 1.9946947351664324e-05, "loss": 1.1609, "step": 1314 }, { "epoch": 0.1856820107314318, "grad_norm": 4.348731252736528, "learning_rate": 1.9946790414519167e-05, "loss": 0.9727, "step": 1315 }, { "epoch": 0.18582321378141767, "grad_norm": 4.345858913635626, "learning_rate": 1.99466332462149e-05, "loss": 1.161, "step": 1316 }, { "epoch": 0.18596441683140355, "grad_norm": 4.7088288301731085, "learning_rate": 1.9946475846755166e-05, "loss": 1.1086, "step": 1317 }, { "epoch": 0.18610561988138943, "grad_norm": 4.6905140970151304, "learning_rate": 1.9946318216143633e-05, "loss": 1.0715, "step": 1318 }, { "epoch": 0.18624682293137532, "grad_norm": 4.77404387780695, "learning_rate": 1.994616035438396e-05, "loss": 1.2865, "step": 1319 }, { "epoch": 0.1863880259813612, "grad_norm": 4.586524909983407, "learning_rate": 1.9946002261479817e-05, "loss": 1.1532, "step": 1320 }, { "epoch": 0.18652922903134708, "grad_norm": 4.30123606743428, "learning_rate": 1.9945843937434875e-05, "loss": 0.9626, "step": 1321 }, { "epoch": 0.18667043208133297, "grad_norm": 5.120312756155164, "learning_rate": 1.9945685382252816e-05, "loss": 1.1421, "step": 1322 }, { "epoch": 0.18681163513131885, "grad_norm": 5.058646208404037, "learning_rate": 1.994552659593732e-05, "loss": 1.2529, "step": 1323 }, { "epoch": 0.1869528381813047, "grad_norm": 4.608940549326115, "learning_rate": 1.9945367578492085e-05, "loss": 0.9914, "step": 1324 }, { "epoch": 0.1870940412312906, "grad_norm": 5.069868504857425, "learning_rate": 1.9945208329920802e-05, "loss": 1.153, "step": 1325 }, { "epoch": 0.18723524428127647, "grad_norm": 4.399188099857936, "learning_rate": 1.994504885022717e-05, "loss": 0.9999, "step": 1326 }, { "epoch": 0.18737644733126235, "grad_norm": 4.930740486047185, "learning_rate": 1.9944889139414904e-05, "loss": 1.1089, "step": 1327 }, { "epoch": 0.18751765038124824, "grad_norm": 5.3975213730845, "learning_rate": 1.9944729197487702e-05, "loss": 1.1878, "step": 1328 }, { "epoch": 0.18765885343123412, "grad_norm": 4.821977108353716, "learning_rate": 1.994456902444929e-05, "loss": 1.2141, "step": 1329 }, { "epoch": 0.18780005648122, "grad_norm": 5.018021630479794, "learning_rate": 1.9944408620303393e-05, "loss": 1.0732, "step": 1330 }, { "epoch": 0.18794125953120588, "grad_norm": 4.954923731787435, "learning_rate": 1.994424798505373e-05, "loss": 1.1086, "step": 1331 }, { "epoch": 0.18808246258119177, "grad_norm": 5.214749913426401, "learning_rate": 1.994408711870404e-05, "loss": 1.3411, "step": 1332 }, { "epoch": 0.18822366563117762, "grad_norm": 4.390996133137107, "learning_rate": 1.994392602125806e-05, "loss": 1.1271, "step": 1333 }, { "epoch": 0.1883648686811635, "grad_norm": 4.394339174489535, "learning_rate": 1.9943764692719535e-05, "loss": 0.9938, "step": 1334 }, { "epoch": 0.1885060717311494, "grad_norm": 4.210998121060827, "learning_rate": 1.9943603133092208e-05, "loss": 0.9607, "step": 1335 }, { "epoch": 0.18864727478113527, "grad_norm": 4.380465402443947, "learning_rate": 1.9943441342379843e-05, "loss": 0.9735, "step": 1336 }, { "epoch": 0.18878847783112115, "grad_norm": 4.38393020161247, "learning_rate": 1.9943279320586194e-05, "loss": 1.0468, "step": 1337 }, { "epoch": 0.18892968088110704, "grad_norm": 4.58730758032554, "learning_rate": 1.994311706771503e-05, "loss": 1.1235, "step": 1338 }, { "epoch": 0.18907088393109292, "grad_norm": 4.698073968088262, "learning_rate": 1.9942954583770116e-05, "loss": 1.0193, "step": 1339 }, { "epoch": 0.1892120869810788, "grad_norm": 4.403201393664041, "learning_rate": 1.9942791868755233e-05, "loss": 1.093, "step": 1340 }, { "epoch": 0.18935329003106466, "grad_norm": 5.162686923005779, "learning_rate": 1.994262892267416e-05, "loss": 1.3125, "step": 1341 }, { "epoch": 0.18949449308105054, "grad_norm": 5.377391322826577, "learning_rate": 1.9942465745530687e-05, "loss": 1.181, "step": 1342 }, { "epoch": 0.18963569613103642, "grad_norm": 4.564608012748201, "learning_rate": 1.99423023373286e-05, "loss": 1.0372, "step": 1343 }, { "epoch": 0.1897768991810223, "grad_norm": 5.1526342850790074, "learning_rate": 1.99421386980717e-05, "loss": 1.2143, "step": 1344 }, { "epoch": 0.1899181022310082, "grad_norm": 4.890502070791986, "learning_rate": 1.9941974827763797e-05, "loss": 1.0797, "step": 1345 }, { "epoch": 0.19005930528099407, "grad_norm": 4.296482830134802, "learning_rate": 1.994181072640869e-05, "loss": 0.9844, "step": 1346 }, { "epoch": 0.19020050833097996, "grad_norm": 4.3273981564878055, "learning_rate": 1.9941646394010194e-05, "loss": 1.1888, "step": 1347 }, { "epoch": 0.19034171138096584, "grad_norm": 5.159120481243468, "learning_rate": 1.9941481830572126e-05, "loss": 1.2561, "step": 1348 }, { "epoch": 0.19048291443095172, "grad_norm": 3.8535509932377887, "learning_rate": 1.994131703609832e-05, "loss": 0.9681, "step": 1349 }, { "epoch": 0.19062411748093758, "grad_norm": 5.636729918625148, "learning_rate": 1.9941152010592594e-05, "loss": 1.2005, "step": 1350 }, { "epoch": 0.19076532053092346, "grad_norm": 4.234432154714101, "learning_rate": 1.9940986754058792e-05, "loss": 1.0525, "step": 1351 }, { "epoch": 0.19090652358090934, "grad_norm": 4.45955658381661, "learning_rate": 1.994082126650075e-05, "loss": 1.0848, "step": 1352 }, { "epoch": 0.19104772663089523, "grad_norm": 4.495511389432425, "learning_rate": 1.9940655547922314e-05, "loss": 1.169, "step": 1353 }, { "epoch": 0.1911889296808811, "grad_norm": 3.966821731365161, "learning_rate": 1.9940489598327336e-05, "loss": 0.9496, "step": 1354 }, { "epoch": 0.191330132730867, "grad_norm": 4.988245421127051, "learning_rate": 1.9940323417719677e-05, "loss": 1.2218, "step": 1355 }, { "epoch": 0.19147133578085287, "grad_norm": 5.077921266949037, "learning_rate": 1.994015700610319e-05, "loss": 1.0569, "step": 1356 }, { "epoch": 0.19161253883083876, "grad_norm": 4.256113929917655, "learning_rate": 1.9939990363481748e-05, "loss": 1.0854, "step": 1357 }, { "epoch": 0.1917537418808246, "grad_norm": 4.380369823664638, "learning_rate": 1.9939823489859226e-05, "loss": 1.2463, "step": 1358 }, { "epoch": 0.1918949449308105, "grad_norm": 5.8405042242999725, "learning_rate": 1.9939656385239493e-05, "loss": 1.1355, "step": 1359 }, { "epoch": 0.19203614798079638, "grad_norm": 4.076259785178609, "learning_rate": 1.993948904962644e-05, "loss": 1.1287, "step": 1360 }, { "epoch": 0.19217735103078226, "grad_norm": 3.9368530576347145, "learning_rate": 1.9939321483023958e-05, "loss": 0.8798, "step": 1361 }, { "epoch": 0.19231855408076814, "grad_norm": 4.780702129696289, "learning_rate": 1.9939153685435932e-05, "loss": 1.1728, "step": 1362 }, { "epoch": 0.19245975713075403, "grad_norm": 5.237488985157414, "learning_rate": 1.993898565686627e-05, "loss": 1.2618, "step": 1363 }, { "epoch": 0.1926009601807399, "grad_norm": 4.486296177813556, "learning_rate": 1.9938817397318872e-05, "loss": 1.0266, "step": 1364 }, { "epoch": 0.1927421632307258, "grad_norm": 4.884647580743861, "learning_rate": 1.993864890679765e-05, "loss": 1.2699, "step": 1365 }, { "epoch": 0.19288336628071168, "grad_norm": 5.06937477857426, "learning_rate": 1.993848018530652e-05, "loss": 1.1504, "step": 1366 }, { "epoch": 0.19302456933069753, "grad_norm": 5.2310621854171435, "learning_rate": 1.9938311232849403e-05, "loss": 1.196, "step": 1367 }, { "epoch": 0.19316577238068341, "grad_norm": 4.445733762014927, "learning_rate": 1.9938142049430223e-05, "loss": 1.1346, "step": 1368 }, { "epoch": 0.1933069754306693, "grad_norm": 4.961584455490589, "learning_rate": 1.9937972635052918e-05, "loss": 1.1495, "step": 1369 }, { "epoch": 0.19344817848065518, "grad_norm": 4.6009485669165695, "learning_rate": 1.9937802989721417e-05, "loss": 1.1562, "step": 1370 }, { "epoch": 0.19358938153064106, "grad_norm": 4.786603149168164, "learning_rate": 1.9937633113439667e-05, "loss": 1.1402, "step": 1371 }, { "epoch": 0.19373058458062695, "grad_norm": 5.215685371857311, "learning_rate": 1.9937463006211617e-05, "loss": 1.2459, "step": 1372 }, { "epoch": 0.19387178763061283, "grad_norm": 4.219953271400533, "learning_rate": 1.9937292668041216e-05, "loss": 1.0828, "step": 1373 }, { "epoch": 0.1940129906805987, "grad_norm": 4.439440428967038, "learning_rate": 1.9937122098932428e-05, "loss": 0.9913, "step": 1374 }, { "epoch": 0.19415419373058457, "grad_norm": 4.157030725787838, "learning_rate": 1.993695129888921e-05, "loss": 1.0894, "step": 1375 }, { "epoch": 0.19429539678057045, "grad_norm": 34.570734153991495, "learning_rate": 1.993678026791554e-05, "loss": 1.2165, "step": 1376 }, { "epoch": 0.19443659983055633, "grad_norm": 4.359256269584173, "learning_rate": 1.9936609006015383e-05, "loss": 1.0305, "step": 1377 }, { "epoch": 0.19457780288054222, "grad_norm": 4.502427167387434, "learning_rate": 1.9936437513192728e-05, "loss": 1.1464, "step": 1378 }, { "epoch": 0.1947190059305281, "grad_norm": 4.256541823699716, "learning_rate": 1.9936265789451554e-05, "loss": 1.236, "step": 1379 }, { "epoch": 0.19486020898051398, "grad_norm": 4.698197044398401, "learning_rate": 1.9936093834795853e-05, "loss": 1.115, "step": 1380 }, { "epoch": 0.19500141203049987, "grad_norm": 4.587669651026083, "learning_rate": 1.9935921649229626e-05, "loss": 1.0151, "step": 1381 }, { "epoch": 0.19514261508048575, "grad_norm": 5.122064895287609, "learning_rate": 1.9935749232756868e-05, "loss": 1.152, "step": 1382 }, { "epoch": 0.19528381813047163, "grad_norm": 5.884767488767738, "learning_rate": 1.993557658538159e-05, "loss": 1.1704, "step": 1383 }, { "epoch": 0.1954250211804575, "grad_norm": 4.55416969934983, "learning_rate": 1.9935403707107804e-05, "loss": 0.9859, "step": 1384 }, { "epoch": 0.19556622423044337, "grad_norm": 4.093946759835486, "learning_rate": 1.9935230597939525e-05, "loss": 1.0983, "step": 1385 }, { "epoch": 0.19570742728042925, "grad_norm": 4.495323307741869, "learning_rate": 1.9935057257880777e-05, "loss": 0.9505, "step": 1386 }, { "epoch": 0.19584863033041514, "grad_norm": 5.834897892507581, "learning_rate": 1.993488368693559e-05, "loss": 1.3045, "step": 1387 }, { "epoch": 0.19598983338040102, "grad_norm": 4.338397488445801, "learning_rate": 1.9934709885108e-05, "loss": 1.1545, "step": 1388 }, { "epoch": 0.1961310364303869, "grad_norm": 4.810445045051072, "learning_rate": 1.9934535852402038e-05, "loss": 1.1318, "step": 1389 }, { "epoch": 0.19627223948037278, "grad_norm": 4.378292848894843, "learning_rate": 1.9934361588821757e-05, "loss": 1.0782, "step": 1390 }, { "epoch": 0.19641344253035867, "grad_norm": 5.746582632715619, "learning_rate": 1.9934187094371197e-05, "loss": 1.3311, "step": 1391 }, { "epoch": 0.19655464558034452, "grad_norm": 5.540869649696159, "learning_rate": 1.9934012369054422e-05, "loss": 1.2732, "step": 1392 }, { "epoch": 0.1966958486303304, "grad_norm": 5.174596563827367, "learning_rate": 1.9933837412875493e-05, "loss": 1.3111, "step": 1393 }, { "epoch": 0.1968370516803163, "grad_norm": 5.298528945248415, "learning_rate": 1.993366222583847e-05, "loss": 1.1612, "step": 1394 }, { "epoch": 0.19697825473030217, "grad_norm": 5.076095831294379, "learning_rate": 1.9933486807947425e-05, "loss": 1.1071, "step": 1395 }, { "epoch": 0.19711945778028805, "grad_norm": 4.6502752061493275, "learning_rate": 1.993331115920644e-05, "loss": 1.1118, "step": 1396 }, { "epoch": 0.19726066083027394, "grad_norm": 4.25801742623009, "learning_rate": 1.9933135279619592e-05, "loss": 0.9641, "step": 1397 }, { "epoch": 0.19740186388025982, "grad_norm": 3.800037628500738, "learning_rate": 1.993295916919097e-05, "loss": 0.9734, "step": 1398 }, { "epoch": 0.1975430669302457, "grad_norm": 4.459562974299471, "learning_rate": 1.9932782827924667e-05, "loss": 0.99, "step": 1399 }, { "epoch": 0.1976842699802316, "grad_norm": 4.9326287738430175, "learning_rate": 1.993260625582478e-05, "loss": 1.1598, "step": 1400 }, { "epoch": 0.19782547303021744, "grad_norm": 5.796397860227611, "learning_rate": 1.9932429452895413e-05, "loss": 1.2993, "step": 1401 }, { "epoch": 0.19796667608020332, "grad_norm": 4.433274851023001, "learning_rate": 1.9932252419140675e-05, "loss": 1.0181, "step": 1402 }, { "epoch": 0.1981078791301892, "grad_norm": 4.844598420948536, "learning_rate": 1.9932075154564682e-05, "loss": 1.0453, "step": 1403 }, { "epoch": 0.1982490821801751, "grad_norm": 4.465201641543277, "learning_rate": 1.993189765917155e-05, "loss": 1.054, "step": 1404 }, { "epoch": 0.19839028523016097, "grad_norm": 4.4696411377702105, "learning_rate": 1.9931719932965405e-05, "loss": 1.3337, "step": 1405 }, { "epoch": 0.19853148828014686, "grad_norm": 4.601386120181231, "learning_rate": 1.993154197595038e-05, "loss": 1.0814, "step": 1406 }, { "epoch": 0.19867269133013274, "grad_norm": 4.206286431512165, "learning_rate": 1.9931363788130607e-05, "loss": 0.9872, "step": 1407 }, { "epoch": 0.19881389438011862, "grad_norm": 3.859504779739252, "learning_rate": 1.9931185369510228e-05, "loss": 0.8377, "step": 1408 }, { "epoch": 0.19895509743010448, "grad_norm": 4.755391681682118, "learning_rate": 1.993100672009339e-05, "loss": 1.1062, "step": 1409 }, { "epoch": 0.19909630048009036, "grad_norm": 4.798617143925379, "learning_rate": 1.993082783988425e-05, "loss": 1.0188, "step": 1410 }, { "epoch": 0.19923750353007624, "grad_norm": 4.611536969524065, "learning_rate": 1.9930648728886952e-05, "loss": 1.0135, "step": 1411 }, { "epoch": 0.19937870658006213, "grad_norm": 5.150334462883534, "learning_rate": 1.993046938710567e-05, "loss": 1.0598, "step": 1412 }, { "epoch": 0.199519909630048, "grad_norm": 4.996195006898983, "learning_rate": 1.993028981454457e-05, "loss": 1.2022, "step": 1413 }, { "epoch": 0.1996611126800339, "grad_norm": 4.488016184218211, "learning_rate": 1.993011001120782e-05, "loss": 1.1398, "step": 1414 }, { "epoch": 0.19980231573001977, "grad_norm": 4.165576760566369, "learning_rate": 1.9929929977099603e-05, "loss": 1.1742, "step": 1415 }, { "epoch": 0.19994351878000566, "grad_norm": 6.499337496573019, "learning_rate": 1.9929749712224104e-05, "loss": 1.2797, "step": 1416 }, { "epoch": 0.20008472182999154, "grad_norm": 4.45437976331422, "learning_rate": 1.9929569216585506e-05, "loss": 1.0323, "step": 1417 }, { "epoch": 0.2002259248799774, "grad_norm": 4.607436291322566, "learning_rate": 1.992938849018801e-05, "loss": 1.0396, "step": 1418 }, { "epoch": 0.20036712792996328, "grad_norm": 4.8726097691886325, "learning_rate": 1.9929207533035812e-05, "loss": 0.995, "step": 1419 }, { "epoch": 0.20050833097994916, "grad_norm": 3.730693804058299, "learning_rate": 1.992902634513312e-05, "loss": 0.9677, "step": 1420 }, { "epoch": 0.20064953402993504, "grad_norm": 4.24225325135271, "learning_rate": 1.9928844926484145e-05, "loss": 1.0331, "step": 1421 }, { "epoch": 0.20079073707992093, "grad_norm": 4.519568897635765, "learning_rate": 1.99286632770931e-05, "loss": 1.1464, "step": 1422 }, { "epoch": 0.2009319401299068, "grad_norm": 4.969497536701674, "learning_rate": 1.992848139696421e-05, "loss": 1.2325, "step": 1423 }, { "epoch": 0.2010731431798927, "grad_norm": 5.022407721128527, "learning_rate": 1.9928299286101696e-05, "loss": 1.0214, "step": 1424 }, { "epoch": 0.20121434622987858, "grad_norm": 5.242440192214647, "learning_rate": 1.99281169445098e-05, "loss": 1.2644, "step": 1425 }, { "epoch": 0.20135554927986443, "grad_norm": 4.893282100137156, "learning_rate": 1.992793437219275e-05, "loss": 0.9517, "step": 1426 }, { "epoch": 0.20149675232985031, "grad_norm": 4.950594188754899, "learning_rate": 1.9927751569154795e-05, "loss": 1.2253, "step": 1427 }, { "epoch": 0.2016379553798362, "grad_norm": 4.173985614297457, "learning_rate": 1.992756853540018e-05, "loss": 1.015, "step": 1428 }, { "epoch": 0.20177915842982208, "grad_norm": 4.090141921959075, "learning_rate": 1.992738527093316e-05, "loss": 1.0505, "step": 1429 }, { "epoch": 0.20192036147980796, "grad_norm": 5.561530417477826, "learning_rate": 1.9927201775757995e-05, "loss": 1.2441, "step": 1430 }, { "epoch": 0.20206156452979385, "grad_norm": 4.655025890670126, "learning_rate": 1.9927018049878945e-05, "loss": 1.1776, "step": 1431 }, { "epoch": 0.20220276757977973, "grad_norm": 4.126348351315558, "learning_rate": 1.9926834093300283e-05, "loss": 1.0454, "step": 1432 }, { "epoch": 0.2023439706297656, "grad_norm": 5.280453087456826, "learning_rate": 1.9926649906026285e-05, "loss": 1.3948, "step": 1433 }, { "epoch": 0.2024851736797515, "grad_norm": 4.799321396686698, "learning_rate": 1.9926465488061232e-05, "loss": 1.2189, "step": 1434 }, { "epoch": 0.20262637672973735, "grad_norm": 4.050827720164738, "learning_rate": 1.9926280839409405e-05, "loss": 0.9316, "step": 1435 }, { "epoch": 0.20276757977972323, "grad_norm": 4.28213369161842, "learning_rate": 1.9926095960075097e-05, "loss": 1.1436, "step": 1436 }, { "epoch": 0.20290878282970912, "grad_norm": 4.570101208221942, "learning_rate": 1.9925910850062607e-05, "loss": 1.1577, "step": 1437 }, { "epoch": 0.203049985879695, "grad_norm": 4.011418407683016, "learning_rate": 1.9925725509376236e-05, "loss": 0.8617, "step": 1438 }, { "epoch": 0.20319118892968088, "grad_norm": 5.096557054767951, "learning_rate": 1.9925539938020292e-05, "loss": 1.1157, "step": 1439 }, { "epoch": 0.20333239197966677, "grad_norm": 4.40117843333325, "learning_rate": 1.9925354135999083e-05, "loss": 1.0635, "step": 1440 }, { "epoch": 0.20347359502965265, "grad_norm": 5.328924646195068, "learning_rate": 1.9925168103316938e-05, "loss": 1.1236, "step": 1441 }, { "epoch": 0.20361479807963853, "grad_norm": 5.081907097277156, "learning_rate": 1.9924981839978167e-05, "loss": 1.2163, "step": 1442 }, { "epoch": 0.2037560011296244, "grad_norm": 5.546581969134257, "learning_rate": 1.9924795345987103e-05, "loss": 1.1932, "step": 1443 }, { "epoch": 0.20389720417961027, "grad_norm": 5.389560198664573, "learning_rate": 1.992460862134808e-05, "loss": 1.149, "step": 1444 }, { "epoch": 0.20403840722959615, "grad_norm": 5.098083112279385, "learning_rate": 1.992442166606544e-05, "loss": 1.2768, "step": 1445 }, { "epoch": 0.20417961027958204, "grad_norm": 4.721295768714217, "learning_rate": 1.992423448014353e-05, "loss": 1.0414, "step": 1446 }, { "epoch": 0.20432081332956792, "grad_norm": 4.0322282072817766, "learning_rate": 1.9924047063586695e-05, "loss": 0.9314, "step": 1447 }, { "epoch": 0.2044620163795538, "grad_norm": 4.5603603854322445, "learning_rate": 1.9923859416399295e-05, "loss": 1.078, "step": 1448 }, { "epoch": 0.20460321942953968, "grad_norm": 4.1447334075637015, "learning_rate": 1.992367153858568e-05, "loss": 0.9364, "step": 1449 }, { "epoch": 0.20474442247952557, "grad_norm": 4.692272883159346, "learning_rate": 1.992348343015023e-05, "loss": 1.1447, "step": 1450 }, { "epoch": 0.20488562552951145, "grad_norm": 5.074080390468344, "learning_rate": 1.9923295091097312e-05, "loss": 0.9989, "step": 1451 }, { "epoch": 0.2050268285794973, "grad_norm": 4.49410017850021, "learning_rate": 1.9923106521431296e-05, "loss": 1.0541, "step": 1452 }, { "epoch": 0.2051680316294832, "grad_norm": 5.267474052231831, "learning_rate": 1.9922917721156575e-05, "loss": 1.3713, "step": 1453 }, { "epoch": 0.20530923467946907, "grad_norm": 4.605919090905005, "learning_rate": 1.9922728690277528e-05, "loss": 1.2021, "step": 1454 }, { "epoch": 0.20545043772945495, "grad_norm": 3.7376788048629033, "learning_rate": 1.9922539428798555e-05, "loss": 0.9375, "step": 1455 }, { "epoch": 0.20559164077944084, "grad_norm": 4.6501607230742, "learning_rate": 1.9922349936724048e-05, "loss": 1.0705, "step": 1456 }, { "epoch": 0.20573284382942672, "grad_norm": 5.373320219894962, "learning_rate": 1.9922160214058417e-05, "loss": 1.1918, "step": 1457 }, { "epoch": 0.2058740468794126, "grad_norm": 5.976873633998706, "learning_rate": 1.9921970260806064e-05, "loss": 1.371, "step": 1458 }, { "epoch": 0.2060152499293985, "grad_norm": 5.356476979599017, "learning_rate": 1.992178007697141e-05, "loss": 1.0875, "step": 1459 }, { "epoch": 0.20615645297938434, "grad_norm": 4.440129025654568, "learning_rate": 1.992158966255887e-05, "loss": 1.1862, "step": 1460 }, { "epoch": 0.20629765602937022, "grad_norm": 6.499771637881478, "learning_rate": 1.992139901757287e-05, "loss": 1.3755, "step": 1461 }, { "epoch": 0.2064388590793561, "grad_norm": 4.635436112066935, "learning_rate": 1.9921208142017844e-05, "loss": 1.0043, "step": 1462 }, { "epoch": 0.206580062129342, "grad_norm": 5.696350947923137, "learning_rate": 1.9921017035898226e-05, "loss": 1.038, "step": 1463 }, { "epoch": 0.20672126517932787, "grad_norm": 6.3100624273487265, "learning_rate": 1.9920825699218453e-05, "loss": 1.268, "step": 1464 }, { "epoch": 0.20686246822931376, "grad_norm": 4.30454260141745, "learning_rate": 1.9920634131982976e-05, "loss": 1.0673, "step": 1465 }, { "epoch": 0.20700367127929964, "grad_norm": 4.0361986779457375, "learning_rate": 1.9920442334196248e-05, "loss": 1.1551, "step": 1466 }, { "epoch": 0.20714487432928552, "grad_norm": 4.44560883748071, "learning_rate": 1.9920250305862723e-05, "loss": 1.0469, "step": 1467 }, { "epoch": 0.2072860773792714, "grad_norm": 4.5047295442522595, "learning_rate": 1.9920058046986867e-05, "loss": 1.1378, "step": 1468 }, { "epoch": 0.20742728042925726, "grad_norm": 4.603228962027549, "learning_rate": 1.9919865557573142e-05, "loss": 1.0567, "step": 1469 }, { "epoch": 0.20756848347924314, "grad_norm": 3.984572622435439, "learning_rate": 1.991967283762603e-05, "loss": 0.9173, "step": 1470 }, { "epoch": 0.20770968652922903, "grad_norm": 5.380944958497025, "learning_rate": 1.9919479887150007e-05, "loss": 0.9284, "step": 1471 }, { "epoch": 0.2078508895792149, "grad_norm": 4.132279338072665, "learning_rate": 1.991928670614955e-05, "loss": 1.0698, "step": 1472 }, { "epoch": 0.2079920926292008, "grad_norm": 5.629660767190314, "learning_rate": 1.9919093294629157e-05, "loss": 1.1172, "step": 1473 }, { "epoch": 0.20813329567918668, "grad_norm": 4.204390157487826, "learning_rate": 1.9918899652593316e-05, "loss": 0.9352, "step": 1474 }, { "epoch": 0.20827449872917256, "grad_norm": 4.4772005200888625, "learning_rate": 1.9918705780046536e-05, "loss": 0.8675, "step": 1475 }, { "epoch": 0.20841570177915844, "grad_norm": 5.626415393579422, "learning_rate": 1.9918511676993315e-05, "loss": 1.2055, "step": 1476 }, { "epoch": 0.2085569048291443, "grad_norm": 6.125437345751269, "learning_rate": 1.9918317343438164e-05, "loss": 1.469, "step": 1477 }, { "epoch": 0.20869810787913018, "grad_norm": 4.200591435048954, "learning_rate": 1.99181227793856e-05, "loss": 0.9065, "step": 1478 }, { "epoch": 0.20883931092911606, "grad_norm": 4.439866975875026, "learning_rate": 1.9917927984840154e-05, "loss": 1.1414, "step": 1479 }, { "epoch": 0.20898051397910195, "grad_norm": 4.4176848686859955, "learning_rate": 1.9917732959806336e-05, "loss": 0.906, "step": 1480 }, { "epoch": 0.20912171702908783, "grad_norm": 6.079682722341598, "learning_rate": 1.9917537704288693e-05, "loss": 1.1725, "step": 1481 }, { "epoch": 0.2092629200790737, "grad_norm": 4.274384577599541, "learning_rate": 1.9917342218291752e-05, "loss": 1.0346, "step": 1482 }, { "epoch": 0.2094041231290596, "grad_norm": 4.914864329118279, "learning_rate": 1.9917146501820067e-05, "loss": 0.9349, "step": 1483 }, { "epoch": 0.20954532617904548, "grad_norm": 4.9990494170716415, "learning_rate": 1.9916950554878178e-05, "loss": 1.0778, "step": 1484 }, { "epoch": 0.20968652922903136, "grad_norm": 5.079526362438078, "learning_rate": 1.9916754377470637e-05, "loss": 1.1046, "step": 1485 }, { "epoch": 0.20982773227901722, "grad_norm": 4.511611139455432, "learning_rate": 1.9916557969602007e-05, "loss": 1.0812, "step": 1486 }, { "epoch": 0.2099689353290031, "grad_norm": 4.536891383435789, "learning_rate": 1.9916361331276857e-05, "loss": 1.17, "step": 1487 }, { "epoch": 0.21011013837898898, "grad_norm": 4.353493985291046, "learning_rate": 1.991616446249975e-05, "loss": 0.9709, "step": 1488 }, { "epoch": 0.21025134142897486, "grad_norm": 4.616006887337012, "learning_rate": 1.9915967363275264e-05, "loss": 1.0103, "step": 1489 }, { "epoch": 0.21039254447896075, "grad_norm": 5.104775374154522, "learning_rate": 1.9915770033607978e-05, "loss": 1.262, "step": 1490 }, { "epoch": 0.21053374752894663, "grad_norm": 4.124448992722673, "learning_rate": 1.991557247350248e-05, "loss": 0.9985, "step": 1491 }, { "epoch": 0.2106749505789325, "grad_norm": 5.593889407889321, "learning_rate": 1.9915374682963358e-05, "loss": 1.1781, "step": 1492 }, { "epoch": 0.2108161536289184, "grad_norm": 4.188496778636398, "learning_rate": 1.991517666199521e-05, "loss": 1.1469, "step": 1493 }, { "epoch": 0.21095735667890425, "grad_norm": 3.68563527640135, "learning_rate": 1.991497841060264e-05, "loss": 0.8996, "step": 1494 }, { "epoch": 0.21109855972889013, "grad_norm": 4.255939108084237, "learning_rate": 1.9914779928790255e-05, "loss": 1.0845, "step": 1495 }, { "epoch": 0.21123976277887602, "grad_norm": 4.797282241781136, "learning_rate": 1.9914581216562664e-05, "loss": 1.1462, "step": 1496 }, { "epoch": 0.2113809658288619, "grad_norm": 4.041068469546502, "learning_rate": 1.991438227392449e-05, "loss": 1.0085, "step": 1497 }, { "epoch": 0.21152216887884778, "grad_norm": 4.355245823655319, "learning_rate": 1.991418310088035e-05, "loss": 1.136, "step": 1498 }, { "epoch": 0.21166337192883367, "grad_norm": 4.697790786814241, "learning_rate": 1.991398369743488e-05, "loss": 1.2309, "step": 1499 }, { "epoch": 0.21180457497881955, "grad_norm": 4.608504291540901, "learning_rate": 1.9913784063592708e-05, "loss": 1.1577, "step": 1500 }, { "epoch": 0.21194577802880543, "grad_norm": 4.396493283712529, "learning_rate": 1.9913584199358476e-05, "loss": 1.2312, "step": 1501 }, { "epoch": 0.21208698107879131, "grad_norm": 4.337194424116833, "learning_rate": 1.991338410473683e-05, "loss": 0.9917, "step": 1502 }, { "epoch": 0.21222818412877717, "grad_norm": 4.4367918720550925, "learning_rate": 1.9913183779732417e-05, "loss": 1.1141, "step": 1503 }, { "epoch": 0.21236938717876305, "grad_norm": 4.732995747219146, "learning_rate": 1.9912983224349894e-05, "loss": 1.2732, "step": 1504 }, { "epoch": 0.21251059022874894, "grad_norm": 4.106189987215339, "learning_rate": 1.9912782438593922e-05, "loss": 1.0514, "step": 1505 }, { "epoch": 0.21265179327873482, "grad_norm": 5.956079726298888, "learning_rate": 1.991258142246917e-05, "loss": 1.4877, "step": 1506 }, { "epoch": 0.2127929963287207, "grad_norm": 4.427212311994908, "learning_rate": 1.9912380175980305e-05, "loss": 1.0993, "step": 1507 }, { "epoch": 0.21293419937870658, "grad_norm": 4.686264403318897, "learning_rate": 1.9912178699132005e-05, "loss": 1.1904, "step": 1508 }, { "epoch": 0.21307540242869247, "grad_norm": 4.422309263030953, "learning_rate": 1.991197699192895e-05, "loss": 1.2044, "step": 1509 }, { "epoch": 0.21321660547867835, "grad_norm": 5.2389404849321215, "learning_rate": 1.9911775054375835e-05, "loss": 1.2236, "step": 1510 }, { "epoch": 0.2133578085286642, "grad_norm": 4.759276753928988, "learning_rate": 1.9911572886477345e-05, "loss": 1.1365, "step": 1511 }, { "epoch": 0.2134990115786501, "grad_norm": 4.933440300020826, "learning_rate": 1.9911370488238185e-05, "loss": 1.3686, "step": 1512 }, { "epoch": 0.21364021462863597, "grad_norm": 4.695924520308634, "learning_rate": 1.9911167859663055e-05, "loss": 0.9598, "step": 1513 }, { "epoch": 0.21378141767862185, "grad_norm": 3.7344504328232353, "learning_rate": 1.9910965000756662e-05, "loss": 0.8862, "step": 1514 }, { "epoch": 0.21392262072860774, "grad_norm": 4.432426426627992, "learning_rate": 1.991076191152372e-05, "loss": 1.0417, "step": 1515 }, { "epoch": 0.21406382377859362, "grad_norm": 4.597702040015849, "learning_rate": 1.9910558591968956e-05, "loss": 1.1174, "step": 1516 }, { "epoch": 0.2142050268285795, "grad_norm": 4.6744777669417745, "learning_rate": 1.991035504209709e-05, "loss": 1.1904, "step": 1517 }, { "epoch": 0.2143462298785654, "grad_norm": 5.458366886242176, "learning_rate": 1.991015126191285e-05, "loss": 1.4444, "step": 1518 }, { "epoch": 0.21448743292855127, "grad_norm": 3.962004808059215, "learning_rate": 1.9909947251420977e-05, "loss": 0.9747, "step": 1519 }, { "epoch": 0.21462863597853712, "grad_norm": 4.625214726607694, "learning_rate": 1.990974301062621e-05, "loss": 1.001, "step": 1520 }, { "epoch": 0.214769839028523, "grad_norm": 4.172237718183528, "learning_rate": 1.9909538539533293e-05, "loss": 1.1562, "step": 1521 }, { "epoch": 0.2149110420785089, "grad_norm": 4.3872925671729375, "learning_rate": 1.990933383814698e-05, "loss": 1.0332, "step": 1522 }, { "epoch": 0.21505224512849477, "grad_norm": 4.859260209205926, "learning_rate": 1.990912890647203e-05, "loss": 1.1799, "step": 1523 }, { "epoch": 0.21519344817848066, "grad_norm": 4.6710726461081, "learning_rate": 1.99089237445132e-05, "loss": 1.2096, "step": 1524 }, { "epoch": 0.21533465122846654, "grad_norm": 4.548794129724055, "learning_rate": 1.9908718352275267e-05, "loss": 1.0294, "step": 1525 }, { "epoch": 0.21547585427845242, "grad_norm": 4.500362508875807, "learning_rate": 1.9908512729762995e-05, "loss": 1.0094, "step": 1526 }, { "epoch": 0.2156170573284383, "grad_norm": 5.042199568145323, "learning_rate": 1.9908306876981166e-05, "loss": 1.1713, "step": 1527 }, { "epoch": 0.21575826037842416, "grad_norm": 4.825496254065435, "learning_rate": 1.9908100793934566e-05, "loss": 1.2199, "step": 1528 }, { "epoch": 0.21589946342841004, "grad_norm": 4.3534431572420065, "learning_rate": 1.990789448062798e-05, "loss": 0.9683, "step": 1529 }, { "epoch": 0.21604066647839593, "grad_norm": 3.847853603742493, "learning_rate": 1.990768793706621e-05, "loss": 0.9884, "step": 1530 }, { "epoch": 0.2161818695283818, "grad_norm": 4.1650735143090785, "learning_rate": 1.9907481163254047e-05, "loss": 0.995, "step": 1531 }, { "epoch": 0.2163230725783677, "grad_norm": 3.8020847374106697, "learning_rate": 1.9907274159196302e-05, "loss": 0.9169, "step": 1532 }, { "epoch": 0.21646427562835358, "grad_norm": 5.8000634516265945, "learning_rate": 1.9907066924897784e-05, "loss": 1.0506, "step": 1533 }, { "epoch": 0.21660547867833946, "grad_norm": 4.456617879701141, "learning_rate": 1.9906859460363307e-05, "loss": 1.2079, "step": 1534 }, { "epoch": 0.21674668172832534, "grad_norm": 4.714095787046404, "learning_rate": 1.99066517655977e-05, "loss": 1.0313, "step": 1535 }, { "epoch": 0.21688788477831122, "grad_norm": 5.158629324384314, "learning_rate": 1.990644384060578e-05, "loss": 1.3862, "step": 1536 }, { "epoch": 0.21702908782829708, "grad_norm": 4.676539847816805, "learning_rate": 1.9906235685392384e-05, "loss": 1.1049, "step": 1537 }, { "epoch": 0.21717029087828296, "grad_norm": 4.411957573916679, "learning_rate": 1.990602729996235e-05, "loss": 1.0721, "step": 1538 }, { "epoch": 0.21731149392826885, "grad_norm": 4.611149247724363, "learning_rate": 1.9905818684320518e-05, "loss": 1.2322, "step": 1539 }, { "epoch": 0.21745269697825473, "grad_norm": 5.2704085433679975, "learning_rate": 1.990560983847174e-05, "loss": 1.1648, "step": 1540 }, { "epoch": 0.2175939000282406, "grad_norm": 4.836683215434626, "learning_rate": 1.9905400762420862e-05, "loss": 1.3735, "step": 1541 }, { "epoch": 0.2177351030782265, "grad_norm": 5.308078643337155, "learning_rate": 1.9905191456172757e-05, "loss": 1.3188, "step": 1542 }, { "epoch": 0.21787630612821238, "grad_norm": 4.2899630934464446, "learning_rate": 1.9904981919732272e-05, "loss": 1.1812, "step": 1543 }, { "epoch": 0.21801750917819826, "grad_norm": 4.444702670080381, "learning_rate": 1.990477215310429e-05, "loss": 1.1512, "step": 1544 }, { "epoch": 0.21815871222818412, "grad_norm": 4.354822961166766, "learning_rate": 1.9904562156293678e-05, "loss": 1.1362, "step": 1545 }, { "epoch": 0.21829991527817, "grad_norm": 4.602600327130179, "learning_rate": 1.990435192930532e-05, "loss": 1.103, "step": 1546 }, { "epoch": 0.21844111832815588, "grad_norm": 4.241725068341514, "learning_rate": 1.99041414721441e-05, "loss": 0.9984, "step": 1547 }, { "epoch": 0.21858232137814176, "grad_norm": 4.664697190232162, "learning_rate": 1.9903930784814908e-05, "loss": 1.0443, "step": 1548 }, { "epoch": 0.21872352442812765, "grad_norm": 5.476567973922895, "learning_rate": 1.9903719867322645e-05, "loss": 1.4682, "step": 1549 }, { "epoch": 0.21886472747811353, "grad_norm": 3.9668953778314795, "learning_rate": 1.9903508719672208e-05, "loss": 1.0169, "step": 1550 }, { "epoch": 0.2190059305280994, "grad_norm": 4.497128929965146, "learning_rate": 1.9903297341868506e-05, "loss": 1.1391, "step": 1551 }, { "epoch": 0.2191471335780853, "grad_norm": 4.232371536835727, "learning_rate": 1.990308573391645e-05, "loss": 1.0663, "step": 1552 }, { "epoch": 0.21928833662807118, "grad_norm": 4.53944282642431, "learning_rate": 1.990287389582096e-05, "loss": 1.1959, "step": 1553 }, { "epoch": 0.21942953967805703, "grad_norm": 5.121249690152903, "learning_rate": 1.9902661827586953e-05, "loss": 1.0896, "step": 1554 }, { "epoch": 0.21957074272804292, "grad_norm": 4.762971631669555, "learning_rate": 1.9902449529219367e-05, "loss": 0.9744, "step": 1555 }, { "epoch": 0.2197119457780288, "grad_norm": 4.60961996644637, "learning_rate": 1.9902237000723127e-05, "loss": 1.0719, "step": 1556 }, { "epoch": 0.21985314882801468, "grad_norm": 4.3142455290782245, "learning_rate": 1.9902024242103174e-05, "loss": 1.116, "step": 1557 }, { "epoch": 0.21999435187800057, "grad_norm": 5.094831059601257, "learning_rate": 1.9901811253364458e-05, "loss": 1.2812, "step": 1558 }, { "epoch": 0.22013555492798645, "grad_norm": 3.9721541071523916, "learning_rate": 1.990159803451192e-05, "loss": 0.9889, "step": 1559 }, { "epoch": 0.22027675797797233, "grad_norm": 3.998532612690012, "learning_rate": 1.9901384585550527e-05, "loss": 0.9128, "step": 1560 }, { "epoch": 0.22041796102795821, "grad_norm": 3.438385046725496, "learning_rate": 1.9901170906485227e-05, "loss": 0.878, "step": 1561 }, { "epoch": 0.22055916407794407, "grad_norm": 5.087753206009238, "learning_rate": 1.990095699732099e-05, "loss": 1.0865, "step": 1562 }, { "epoch": 0.22070036712792995, "grad_norm": 5.168488996112628, "learning_rate": 1.9900742858062792e-05, "loss": 1.1686, "step": 1563 }, { "epoch": 0.22084157017791584, "grad_norm": 6.4865301478730295, "learning_rate": 1.990052848871561e-05, "loss": 1.0443, "step": 1564 }, { "epoch": 0.22098277322790172, "grad_norm": 3.904903801146498, "learning_rate": 1.9900313889284413e-05, "loss": 0.9511, "step": 1565 }, { "epoch": 0.2211239762778876, "grad_norm": 4.605190462916444, "learning_rate": 1.9900099059774197e-05, "loss": 1.0432, "step": 1566 }, { "epoch": 0.22126517932787348, "grad_norm": 4.0027389562042055, "learning_rate": 1.9899884000189958e-05, "loss": 0.9627, "step": 1567 }, { "epoch": 0.22140638237785937, "grad_norm": 5.773245037479774, "learning_rate": 1.989966871053669e-05, "loss": 1.42, "step": 1568 }, { "epoch": 0.22154758542784525, "grad_norm": 4.065406299580875, "learning_rate": 1.98994531908194e-05, "loss": 1.0858, "step": 1569 }, { "epoch": 0.22168878847783113, "grad_norm": 4.962583575556403, "learning_rate": 1.989923744104309e-05, "loss": 1.2689, "step": 1570 }, { "epoch": 0.221829991527817, "grad_norm": 4.093737009840619, "learning_rate": 1.989902146121277e-05, "loss": 0.9932, "step": 1571 }, { "epoch": 0.22197119457780287, "grad_norm": 3.9980750774423957, "learning_rate": 1.9898805251333477e-05, "loss": 0.909, "step": 1572 }, { "epoch": 0.22211239762778875, "grad_norm": 4.683772560445976, "learning_rate": 1.9898588811410218e-05, "loss": 1.2598, "step": 1573 }, { "epoch": 0.22225360067777464, "grad_norm": 5.011504456496572, "learning_rate": 1.9898372141448033e-05, "loss": 1.0185, "step": 1574 }, { "epoch": 0.22239480372776052, "grad_norm": 4.29085771167402, "learning_rate": 1.989815524145195e-05, "loss": 1.0056, "step": 1575 }, { "epoch": 0.2225360067777464, "grad_norm": 4.788502181666632, "learning_rate": 1.989793811142702e-05, "loss": 1.1437, "step": 1576 }, { "epoch": 0.2226772098277323, "grad_norm": 4.89009601638008, "learning_rate": 1.989772075137828e-05, "loss": 1.1972, "step": 1577 }, { "epoch": 0.22281841287771817, "grad_norm": 4.49598540136094, "learning_rate": 1.9897503161310786e-05, "loss": 1.0385, "step": 1578 }, { "epoch": 0.22295961592770402, "grad_norm": 4.122466128148814, "learning_rate": 1.989728534122959e-05, "loss": 0.9304, "step": 1579 }, { "epoch": 0.2231008189776899, "grad_norm": 4.769051863148613, "learning_rate": 1.989706729113976e-05, "loss": 1.238, "step": 1580 }, { "epoch": 0.2232420220276758, "grad_norm": 3.9472027050939538, "learning_rate": 1.9896849011046356e-05, "loss": 1.0991, "step": 1581 }, { "epoch": 0.22338322507766167, "grad_norm": 4.293727963057834, "learning_rate": 1.989663050095446e-05, "loss": 1.0645, "step": 1582 }, { "epoch": 0.22352442812764756, "grad_norm": 4.0812027797809565, "learning_rate": 1.9896411760869142e-05, "loss": 0.9768, "step": 1583 }, { "epoch": 0.22366563117763344, "grad_norm": 4.286286461908829, "learning_rate": 1.989619279079549e-05, "loss": 0.9328, "step": 1584 }, { "epoch": 0.22380683422761932, "grad_norm": 5.974064484809082, "learning_rate": 1.9895973590738592e-05, "loss": 1.5244, "step": 1585 }, { "epoch": 0.2239480372776052, "grad_norm": 4.773150924543597, "learning_rate": 1.989575416070354e-05, "loss": 1.1058, "step": 1586 }, { "epoch": 0.2240892403275911, "grad_norm": 5.452686610528497, "learning_rate": 1.9895534500695435e-05, "loss": 1.1389, "step": 1587 }, { "epoch": 0.22423044337757694, "grad_norm": 4.381928584108818, "learning_rate": 1.9895314610719382e-05, "loss": 1.0225, "step": 1588 }, { "epoch": 0.22437164642756283, "grad_norm": 4.13852941032075, "learning_rate": 1.989509449078049e-05, "loss": 1.0382, "step": 1589 }, { "epoch": 0.2245128494775487, "grad_norm": 4.092587861445075, "learning_rate": 1.9894874140883877e-05, "loss": 1.0752, "step": 1590 }, { "epoch": 0.2246540525275346, "grad_norm": 4.983711400258463, "learning_rate": 1.9894653561034664e-05, "loss": 1.1382, "step": 1591 }, { "epoch": 0.22479525557752048, "grad_norm": 4.87150613365761, "learning_rate": 1.9894432751237974e-05, "loss": 0.9787, "step": 1592 }, { "epoch": 0.22493645862750636, "grad_norm": 4.841579794591345, "learning_rate": 1.9894211711498938e-05, "loss": 1.3469, "step": 1593 }, { "epoch": 0.22507766167749224, "grad_norm": 4.070352136164378, "learning_rate": 1.9893990441822698e-05, "loss": 1.2348, "step": 1594 }, { "epoch": 0.22521886472747812, "grad_norm": 4.590809813785775, "learning_rate": 1.989376894221439e-05, "loss": 1.1557, "step": 1595 }, { "epoch": 0.225360067777464, "grad_norm": 4.589861481282709, "learning_rate": 1.9893547212679162e-05, "loss": 1.238, "step": 1596 }, { "epoch": 0.22550127082744986, "grad_norm": 4.1176045795267004, "learning_rate": 1.9893325253222177e-05, "loss": 1.0737, "step": 1597 }, { "epoch": 0.22564247387743575, "grad_norm": 4.7917550472321695, "learning_rate": 1.989310306384858e-05, "loss": 1.0873, "step": 1598 }, { "epoch": 0.22578367692742163, "grad_norm": 4.0297417752338225, "learning_rate": 1.9892880644563544e-05, "loss": 0.8758, "step": 1599 }, { "epoch": 0.2259248799774075, "grad_norm": 4.9923761745667266, "learning_rate": 1.9892657995372227e-05, "loss": 1.2517, "step": 1600 }, { "epoch": 0.2260660830273934, "grad_norm": 4.0530308591510105, "learning_rate": 1.989243511627982e-05, "loss": 0.9146, "step": 1601 }, { "epoch": 0.22620728607737928, "grad_norm": 4.706955425336845, "learning_rate": 1.989221200729149e-05, "loss": 1.1599, "step": 1602 }, { "epoch": 0.22634848912736516, "grad_norm": 3.9181733669419794, "learning_rate": 1.989198866841242e-05, "loss": 1.0262, "step": 1603 }, { "epoch": 0.22648969217735104, "grad_norm": 4.251451376510779, "learning_rate": 1.989176509964781e-05, "loss": 0.9518, "step": 1604 }, { "epoch": 0.2266308952273369, "grad_norm": 3.8329407864119522, "learning_rate": 1.989154130100285e-05, "loss": 0.8747, "step": 1605 }, { "epoch": 0.22677209827732278, "grad_norm": 3.608231562736672, "learning_rate": 1.9891317272482744e-05, "loss": 0.9787, "step": 1606 }, { "epoch": 0.22691330132730866, "grad_norm": 5.041225098011339, "learning_rate": 1.9891093014092695e-05, "loss": 1.1582, "step": 1607 }, { "epoch": 0.22705450437729455, "grad_norm": 3.7433228897161754, "learning_rate": 1.9890868525837917e-05, "loss": 0.8656, "step": 1608 }, { "epoch": 0.22719570742728043, "grad_norm": 3.8263966088984698, "learning_rate": 1.9890643807723622e-05, "loss": 0.9359, "step": 1609 }, { "epoch": 0.2273369104772663, "grad_norm": 4.840298786277609, "learning_rate": 1.989041885975504e-05, "loss": 1.1281, "step": 1610 }, { "epoch": 0.2274781135272522, "grad_norm": 4.025610919817758, "learning_rate": 1.9890193681937395e-05, "loss": 1.0443, "step": 1611 }, { "epoch": 0.22761931657723808, "grad_norm": 4.584359521097056, "learning_rate": 1.9889968274275916e-05, "loss": 1.1226, "step": 1612 }, { "epoch": 0.22776051962722396, "grad_norm": 4.203958313483456, "learning_rate": 1.988974263677585e-05, "loss": 1.0554, "step": 1613 }, { "epoch": 0.22790172267720982, "grad_norm": 5.48371195877179, "learning_rate": 1.9889516769442436e-05, "loss": 1.1697, "step": 1614 }, { "epoch": 0.2280429257271957, "grad_norm": 4.438725634337092, "learning_rate": 1.988929067228092e-05, "loss": 1.008, "step": 1615 }, { "epoch": 0.22818412877718158, "grad_norm": 6.411832488371095, "learning_rate": 1.9889064345296563e-05, "loss": 1.3004, "step": 1616 }, { "epoch": 0.22832533182716747, "grad_norm": 4.642232756329534, "learning_rate": 1.988883778849462e-05, "loss": 1.1189, "step": 1617 }, { "epoch": 0.22846653487715335, "grad_norm": 4.025352841483412, "learning_rate": 1.9888611001880357e-05, "loss": 0.9657, "step": 1618 }, { "epoch": 0.22860773792713923, "grad_norm": 5.279854436286271, "learning_rate": 1.9888383985459047e-05, "loss": 1.1961, "step": 1619 }, { "epoch": 0.22874894097712511, "grad_norm": 5.1586505585892555, "learning_rate": 1.988815673923596e-05, "loss": 1.2634, "step": 1620 }, { "epoch": 0.228890144027111, "grad_norm": 4.755881687895121, "learning_rate": 1.9887929263216382e-05, "loss": 1.1455, "step": 1621 }, { "epoch": 0.22903134707709685, "grad_norm": 4.239708742975153, "learning_rate": 1.9887701557405598e-05, "loss": 1.1488, "step": 1622 }, { "epoch": 0.22917255012708274, "grad_norm": 5.4964979493626, "learning_rate": 1.9887473621808904e-05, "loss": 1.4709, "step": 1623 }, { "epoch": 0.22931375317706862, "grad_norm": 4.752632069110938, "learning_rate": 1.988724545643159e-05, "loss": 1.2249, "step": 1624 }, { "epoch": 0.2294549562270545, "grad_norm": 5.763181318267491, "learning_rate": 1.9887017061278962e-05, "loss": 1.3164, "step": 1625 }, { "epoch": 0.22959615927704038, "grad_norm": 3.8139962099498668, "learning_rate": 1.9886788436356325e-05, "loss": 0.7099, "step": 1626 }, { "epoch": 0.22973736232702627, "grad_norm": 4.27628324361042, "learning_rate": 1.9886559581669e-05, "loss": 1.0002, "step": 1627 }, { "epoch": 0.22987856537701215, "grad_norm": 4.233865796128205, "learning_rate": 1.9886330497222294e-05, "loss": 1.0313, "step": 1628 }, { "epoch": 0.23001976842699803, "grad_norm": 4.172323111604617, "learning_rate": 1.988610118302154e-05, "loss": 1.2652, "step": 1629 }, { "epoch": 0.23016097147698392, "grad_norm": 4.405271108499988, "learning_rate": 1.988587163907206e-05, "loss": 1.0866, "step": 1630 }, { "epoch": 0.23030217452696977, "grad_norm": 4.4386995630694965, "learning_rate": 1.9885641865379197e-05, "loss": 1.0777, "step": 1631 }, { "epoch": 0.23044337757695565, "grad_norm": 4.498919369634765, "learning_rate": 1.9885411861948287e-05, "loss": 1.1183, "step": 1632 }, { "epoch": 0.23058458062694154, "grad_norm": 4.641683352066934, "learning_rate": 1.988518162878467e-05, "loss": 1.157, "step": 1633 }, { "epoch": 0.23072578367692742, "grad_norm": 4.615818095742444, "learning_rate": 1.9884951165893706e-05, "loss": 1.0426, "step": 1634 }, { "epoch": 0.2308669867269133, "grad_norm": 4.600505659773712, "learning_rate": 1.9884720473280744e-05, "loss": 0.9647, "step": 1635 }, { "epoch": 0.2310081897768992, "grad_norm": 4.539403666804867, "learning_rate": 1.9884489550951146e-05, "loss": 0.9634, "step": 1636 }, { "epoch": 0.23114939282688507, "grad_norm": 4.765487592744753, "learning_rate": 1.988425839891028e-05, "loss": 1.0634, "step": 1637 }, { "epoch": 0.23129059587687095, "grad_norm": 4.808267275996351, "learning_rate": 1.9884027017163515e-05, "loss": 1.2106, "step": 1638 }, { "epoch": 0.2314317989268568, "grad_norm": 4.32328230949736, "learning_rate": 1.9883795405716236e-05, "loss": 0.9145, "step": 1639 }, { "epoch": 0.2315730019768427, "grad_norm": 4.55498929764465, "learning_rate": 1.9883563564573815e-05, "loss": 1.1138, "step": 1640 }, { "epoch": 0.23171420502682857, "grad_norm": 4.246380775676594, "learning_rate": 1.9883331493741652e-05, "loss": 0.9501, "step": 1641 }, { "epoch": 0.23185540807681446, "grad_norm": 4.400183714381763, "learning_rate": 1.9883099193225125e-05, "loss": 1.0882, "step": 1642 }, { "epoch": 0.23199661112680034, "grad_norm": 4.30313156476879, "learning_rate": 1.9882866663029645e-05, "loss": 1.0138, "step": 1643 }, { "epoch": 0.23213781417678622, "grad_norm": 4.42083778849144, "learning_rate": 1.9882633903160612e-05, "loss": 0.9531, "step": 1644 }, { "epoch": 0.2322790172267721, "grad_norm": 4.350775836451018, "learning_rate": 1.9882400913623436e-05, "loss": 0.9756, "step": 1645 }, { "epoch": 0.232420220276758, "grad_norm": 4.484867879510931, "learning_rate": 1.988216769442353e-05, "loss": 0.9172, "step": 1646 }, { "epoch": 0.23256142332674387, "grad_norm": 4.517316634333912, "learning_rate": 1.9881934245566313e-05, "loss": 1.299, "step": 1647 }, { "epoch": 0.23270262637672973, "grad_norm": 4.36812790086071, "learning_rate": 1.9881700567057214e-05, "loss": 0.9679, "step": 1648 }, { "epoch": 0.2328438294267156, "grad_norm": 4.313217494495988, "learning_rate": 1.9881466658901664e-05, "loss": 1.1748, "step": 1649 }, { "epoch": 0.2329850324767015, "grad_norm": 4.920702194718632, "learning_rate": 1.988123252110509e-05, "loss": 1.1448, "step": 1650 }, { "epoch": 0.23312623552668738, "grad_norm": 4.162698486147186, "learning_rate": 1.9880998153672945e-05, "loss": 1.0029, "step": 1651 }, { "epoch": 0.23326743857667326, "grad_norm": 4.008862954302087, "learning_rate": 1.9880763556610666e-05, "loss": 1.0292, "step": 1652 }, { "epoch": 0.23340864162665914, "grad_norm": 5.696606464318699, "learning_rate": 1.988052872992371e-05, "loss": 1.3239, "step": 1653 }, { "epoch": 0.23354984467664502, "grad_norm": 5.6417345089900595, "learning_rate": 1.988029367361753e-05, "loss": 1.3931, "step": 1654 }, { "epoch": 0.2336910477266309, "grad_norm": 4.903798270941525, "learning_rate": 1.98800583876976e-05, "loss": 1.3083, "step": 1655 }, { "epoch": 0.23383225077661676, "grad_norm": 4.8103839701358515, "learning_rate": 1.9879822872169378e-05, "loss": 1.0626, "step": 1656 }, { "epoch": 0.23397345382660265, "grad_norm": 4.547335470661128, "learning_rate": 1.9879587127038333e-05, "loss": 1.1994, "step": 1657 }, { "epoch": 0.23411465687658853, "grad_norm": 4.348026670633358, "learning_rate": 1.9879351152309955e-05, "loss": 1.0391, "step": 1658 }, { "epoch": 0.2342558599265744, "grad_norm": 5.117209783414749, "learning_rate": 1.9879114947989723e-05, "loss": 1.1435, "step": 1659 }, { "epoch": 0.2343970629765603, "grad_norm": 4.165254928531248, "learning_rate": 1.9878878514083124e-05, "loss": 0.9047, "step": 1660 }, { "epoch": 0.23453826602654618, "grad_norm": 4.505105868756046, "learning_rate": 1.9878641850595658e-05, "loss": 1.0744, "step": 1661 }, { "epoch": 0.23467946907653206, "grad_norm": 4.705833125224752, "learning_rate": 1.9878404957532817e-05, "loss": 1.0001, "step": 1662 }, { "epoch": 0.23482067212651794, "grad_norm": 5.032335874207705, "learning_rate": 1.9878167834900114e-05, "loss": 1.288, "step": 1663 }, { "epoch": 0.23496187517650383, "grad_norm": 4.705290949900482, "learning_rate": 1.9877930482703057e-05, "loss": 1.1444, "step": 1664 }, { "epoch": 0.23510307822648968, "grad_norm": 4.939845541324191, "learning_rate": 1.9877692900947156e-05, "loss": 1.073, "step": 1665 }, { "epoch": 0.23524428127647556, "grad_norm": 4.85716042244654, "learning_rate": 1.9877455089637944e-05, "loss": 1.2671, "step": 1666 }, { "epoch": 0.23538548432646145, "grad_norm": 4.500251023508907, "learning_rate": 1.987721704878094e-05, "loss": 1.2238, "step": 1667 }, { "epoch": 0.23552668737644733, "grad_norm": 4.242874695954041, "learning_rate": 1.9876978778381675e-05, "loss": 1.0523, "step": 1668 }, { "epoch": 0.2356678904264332, "grad_norm": 3.8910482723160786, "learning_rate": 1.987674027844569e-05, "loss": 0.9617, "step": 1669 }, { "epoch": 0.2358090934764191, "grad_norm": 4.731797056617106, "learning_rate": 1.9876501548978527e-05, "loss": 1.0026, "step": 1670 }, { "epoch": 0.23595029652640498, "grad_norm": 4.595267519435098, "learning_rate": 1.9876262589985737e-05, "loss": 1.0876, "step": 1671 }, { "epoch": 0.23609149957639086, "grad_norm": 4.873601545974836, "learning_rate": 1.9876023401472865e-05, "loss": 1.037, "step": 1672 }, { "epoch": 0.23623270262637672, "grad_norm": 4.307670544084782, "learning_rate": 1.9875783983445473e-05, "loss": 1.0749, "step": 1673 }, { "epoch": 0.2363739056763626, "grad_norm": 6.235705195087301, "learning_rate": 1.987554433590913e-05, "loss": 1.2804, "step": 1674 }, { "epoch": 0.23651510872634848, "grad_norm": 4.71858407063096, "learning_rate": 1.98753044588694e-05, "loss": 1.0622, "step": 1675 }, { "epoch": 0.23665631177633437, "grad_norm": 3.793384657262079, "learning_rate": 1.987506435233186e-05, "loss": 0.9055, "step": 1676 }, { "epoch": 0.23679751482632025, "grad_norm": 4.338239967516377, "learning_rate": 1.9874824016302088e-05, "loss": 1.186, "step": 1677 }, { "epoch": 0.23693871787630613, "grad_norm": 4.006051005718899, "learning_rate": 1.987458345078567e-05, "loss": 1.0295, "step": 1678 }, { "epoch": 0.23707992092629201, "grad_norm": 4.066927362349002, "learning_rate": 1.98743426557882e-05, "loss": 0.9697, "step": 1679 }, { "epoch": 0.2372211239762779, "grad_norm": 5.456678326874907, "learning_rate": 1.9874101631315268e-05, "loss": 1.2831, "step": 1680 }, { "epoch": 0.23736232702626378, "grad_norm": 4.1762344746772495, "learning_rate": 1.987386037737248e-05, "loss": 1.0886, "step": 1681 }, { "epoch": 0.23750353007624964, "grad_norm": 4.117573342576986, "learning_rate": 1.9873618893965442e-05, "loss": 0.9458, "step": 1682 }, { "epoch": 0.23764473312623552, "grad_norm": 5.3565714724989, "learning_rate": 1.9873377181099763e-05, "loss": 1.3172, "step": 1683 }, { "epoch": 0.2377859361762214, "grad_norm": 3.849748866888073, "learning_rate": 1.987313523878106e-05, "loss": 0.8927, "step": 1684 }, { "epoch": 0.23792713922620728, "grad_norm": 4.451472552340125, "learning_rate": 1.987289306701496e-05, "loss": 1.2681, "step": 1685 }, { "epoch": 0.23806834227619317, "grad_norm": 4.203497142817664, "learning_rate": 1.987265066580709e-05, "loss": 0.9937, "step": 1686 }, { "epoch": 0.23820954532617905, "grad_norm": 4.9282752744568725, "learning_rate": 1.9872408035163084e-05, "loss": 1.1195, "step": 1687 }, { "epoch": 0.23835074837616493, "grad_norm": 3.992380731046059, "learning_rate": 1.9872165175088578e-05, "loss": 1.0189, "step": 1688 }, { "epoch": 0.23849195142615082, "grad_norm": 4.098839998121916, "learning_rate": 1.9871922085589215e-05, "loss": 1.0931, "step": 1689 }, { "epoch": 0.23863315447613667, "grad_norm": 5.208312723738809, "learning_rate": 1.9871678766670647e-05, "loss": 1.4505, "step": 1690 }, { "epoch": 0.23877435752612255, "grad_norm": 4.745912837766724, "learning_rate": 1.9871435218338527e-05, "loss": 1.2483, "step": 1691 }, { "epoch": 0.23891556057610844, "grad_norm": 4.033888158834617, "learning_rate": 1.9871191440598515e-05, "loss": 1.1336, "step": 1692 }, { "epoch": 0.23905676362609432, "grad_norm": 3.891479233555438, "learning_rate": 1.9870947433456278e-05, "loss": 1.0295, "step": 1693 }, { "epoch": 0.2391979666760802, "grad_norm": 5.141893524541058, "learning_rate": 1.9870703196917485e-05, "loss": 1.2484, "step": 1694 }, { "epoch": 0.2393391697260661, "grad_norm": 4.203809566505391, "learning_rate": 1.9870458730987815e-05, "loss": 1.0601, "step": 1695 }, { "epoch": 0.23948037277605197, "grad_norm": 5.2456508956128, "learning_rate": 1.9870214035672945e-05, "loss": 1.1457, "step": 1696 }, { "epoch": 0.23962157582603785, "grad_norm": 4.552540297619828, "learning_rate": 1.986996911097856e-05, "loss": 0.9454, "step": 1697 }, { "epoch": 0.23976277887602374, "grad_norm": 4.224538218510822, "learning_rate": 1.986972395691036e-05, "loss": 1.1234, "step": 1698 }, { "epoch": 0.2399039819260096, "grad_norm": 3.833203831361616, "learning_rate": 1.9869478573474038e-05, "loss": 0.877, "step": 1699 }, { "epoch": 0.24004518497599547, "grad_norm": 5.052699288939831, "learning_rate": 1.9869232960675292e-05, "loss": 1.4346, "step": 1700 }, { "epoch": 0.24018638802598136, "grad_norm": 4.035177596131794, "learning_rate": 1.986898711851984e-05, "loss": 1.0472, "step": 1701 }, { "epoch": 0.24032759107596724, "grad_norm": 4.7608775188113475, "learning_rate": 1.9868741047013382e-05, "loss": 1.2139, "step": 1702 }, { "epoch": 0.24046879412595312, "grad_norm": 3.7414722798428137, "learning_rate": 1.9868494746161652e-05, "loss": 0.885, "step": 1703 }, { "epoch": 0.240609997175939, "grad_norm": 3.4924063060126103, "learning_rate": 1.986824821597036e-05, "loss": 0.8345, "step": 1704 }, { "epoch": 0.2407512002259249, "grad_norm": 4.422179777304984, "learning_rate": 1.986800145644524e-05, "loss": 1.0534, "step": 1705 }, { "epoch": 0.24089240327591077, "grad_norm": 4.466169029559722, "learning_rate": 1.9867754467592037e-05, "loss": 1.0033, "step": 1706 }, { "epoch": 0.24103360632589663, "grad_norm": 5.039255682778423, "learning_rate": 1.9867507249416476e-05, "loss": 1.4238, "step": 1707 }, { "epoch": 0.2411748093758825, "grad_norm": 4.281473088255995, "learning_rate": 1.9867259801924306e-05, "loss": 1.2614, "step": 1708 }, { "epoch": 0.2413160124258684, "grad_norm": 4.5342909187472875, "learning_rate": 1.9867012125121282e-05, "loss": 1.0153, "step": 1709 }, { "epoch": 0.24145721547585428, "grad_norm": 5.086330216663602, "learning_rate": 1.9866764219013154e-05, "loss": 1.2414, "step": 1710 }, { "epoch": 0.24159841852584016, "grad_norm": 4.678333873855341, "learning_rate": 1.9866516083605693e-05, "loss": 1.0939, "step": 1711 }, { "epoch": 0.24173962157582604, "grad_norm": 5.202446602126027, "learning_rate": 1.9866267718904655e-05, "loss": 1.2421, "step": 1712 }, { "epoch": 0.24188082462581192, "grad_norm": 4.714916813295156, "learning_rate": 1.986601912491582e-05, "loss": 1.2287, "step": 1713 }, { "epoch": 0.2420220276757978, "grad_norm": 4.144168509639218, "learning_rate": 1.9865770301644956e-05, "loss": 1.1826, "step": 1714 }, { "epoch": 0.2421632307257837, "grad_norm": 4.5366926271985735, "learning_rate": 1.9865521249097854e-05, "loss": 1.0736, "step": 1715 }, { "epoch": 0.24230443377576955, "grad_norm": 5.142197600056926, "learning_rate": 1.9865271967280297e-05, "loss": 1.2572, "step": 1716 }, { "epoch": 0.24244563682575543, "grad_norm": 4.238112032002369, "learning_rate": 1.986502245619808e-05, "loss": 0.9562, "step": 1717 }, { "epoch": 0.2425868398757413, "grad_norm": 4.600405060786195, "learning_rate": 1.9864772715857e-05, "loss": 1.2361, "step": 1718 }, { "epoch": 0.2427280429257272, "grad_norm": 4.5580871962843315, "learning_rate": 1.9864522746262867e-05, "loss": 0.9631, "step": 1719 }, { "epoch": 0.24286924597571308, "grad_norm": 4.192100401700932, "learning_rate": 1.9864272547421482e-05, "loss": 1.1597, "step": 1720 }, { "epoch": 0.24301044902569896, "grad_norm": 4.794180102186992, "learning_rate": 1.9864022119338667e-05, "loss": 1.1438, "step": 1721 }, { "epoch": 0.24315165207568484, "grad_norm": 4.558330996321931, "learning_rate": 1.9863771462020235e-05, "loss": 1.0424, "step": 1722 }, { "epoch": 0.24329285512567073, "grad_norm": 5.314968533501325, "learning_rate": 1.9863520575472014e-05, "loss": 1.5042, "step": 1723 }, { "epoch": 0.24343405817565658, "grad_norm": 4.028263697608648, "learning_rate": 1.9863269459699836e-05, "loss": 1.1039, "step": 1724 }, { "epoch": 0.24357526122564246, "grad_norm": 4.240398429772271, "learning_rate": 1.9863018114709534e-05, "loss": 1.3008, "step": 1725 }, { "epoch": 0.24371646427562835, "grad_norm": 6.089845837229606, "learning_rate": 1.986276654050695e-05, "loss": 1.222, "step": 1726 }, { "epoch": 0.24385766732561423, "grad_norm": 4.273309056232436, "learning_rate": 1.986251473709793e-05, "loss": 1.0969, "step": 1727 }, { "epoch": 0.2439988703756001, "grad_norm": 4.252715119148133, "learning_rate": 1.986226270448833e-05, "loss": 1.1508, "step": 1728 }, { "epoch": 0.244140073425586, "grad_norm": 4.1978920833157725, "learning_rate": 1.9862010442684004e-05, "loss": 0.9508, "step": 1729 }, { "epoch": 0.24428127647557188, "grad_norm": 4.870562332202742, "learning_rate": 1.9861757951690813e-05, "loss": 1.099, "step": 1730 }, { "epoch": 0.24442247952555776, "grad_norm": 3.929110816382615, "learning_rate": 1.9861505231514626e-05, "loss": 0.9672, "step": 1731 }, { "epoch": 0.24456368257554364, "grad_norm": 4.5533792958763915, "learning_rate": 1.9861252282161313e-05, "loss": 1.1598, "step": 1732 }, { "epoch": 0.2447048856255295, "grad_norm": 5.061786705193654, "learning_rate": 1.986099910363676e-05, "loss": 1.21, "step": 1733 }, { "epoch": 0.24484608867551538, "grad_norm": 4.199344169831758, "learning_rate": 1.9860745695946848e-05, "loss": 1.0084, "step": 1734 }, { "epoch": 0.24498729172550127, "grad_norm": 5.0954181604947735, "learning_rate": 1.986049205909746e-05, "loss": 1.3114, "step": 1735 }, { "epoch": 0.24512849477548715, "grad_norm": 4.367454921385469, "learning_rate": 1.9860238193094497e-05, "loss": 1.1882, "step": 1736 }, { "epoch": 0.24526969782547303, "grad_norm": 4.005041540695415, "learning_rate": 1.9859984097943855e-05, "loss": 1.0128, "step": 1737 }, { "epoch": 0.24541090087545891, "grad_norm": 5.14873948961256, "learning_rate": 1.9859729773651446e-05, "loss": 1.2109, "step": 1738 }, { "epoch": 0.2455521039254448, "grad_norm": 4.367870147356684, "learning_rate": 1.985947522022317e-05, "loss": 1.0385, "step": 1739 }, { "epoch": 0.24569330697543068, "grad_norm": 4.309987246038187, "learning_rate": 1.985922043766495e-05, "loss": 1.16, "step": 1740 }, { "epoch": 0.24583451002541654, "grad_norm": 4.943393901506551, "learning_rate": 1.9858965425982703e-05, "loss": 1.1889, "step": 1741 }, { "epoch": 0.24597571307540242, "grad_norm": 4.232150717719606, "learning_rate": 1.985871018518236e-05, "loss": 1.2098, "step": 1742 }, { "epoch": 0.2461169161253883, "grad_norm": 4.994145106063239, "learning_rate": 1.985845471526985e-05, "loss": 1.3068, "step": 1743 }, { "epoch": 0.24625811917537418, "grad_norm": 5.020111169772371, "learning_rate": 1.9858199016251106e-05, "loss": 1.1494, "step": 1744 }, { "epoch": 0.24639932222536007, "grad_norm": 4.528137671511692, "learning_rate": 1.985794308813208e-05, "loss": 1.0523, "step": 1745 }, { "epoch": 0.24654052527534595, "grad_norm": 4.141333717613249, "learning_rate": 1.985768693091871e-05, "loss": 0.9969, "step": 1746 }, { "epoch": 0.24668172832533183, "grad_norm": 3.5676800497331267, "learning_rate": 1.9857430544616953e-05, "loss": 0.9079, "step": 1747 }, { "epoch": 0.24682293137531772, "grad_norm": 4.316703458443119, "learning_rate": 1.9857173929232768e-05, "loss": 1.0516, "step": 1748 }, { "epoch": 0.2469641344253036, "grad_norm": 4.258493156549214, "learning_rate": 1.9856917084772117e-05, "loss": 1.1117, "step": 1749 }, { "epoch": 0.24710533747528945, "grad_norm": 4.140218259882781, "learning_rate": 1.985666001124097e-05, "loss": 1.1266, "step": 1750 }, { "epoch": 0.24724654052527534, "grad_norm": 4.21720054668738, "learning_rate": 1.9856402708645305e-05, "loss": 0.9311, "step": 1751 }, { "epoch": 0.24738774357526122, "grad_norm": 5.593212202313464, "learning_rate": 1.9856145176991093e-05, "loss": 1.4965, "step": 1752 }, { "epoch": 0.2475289466252471, "grad_norm": 4.057438177436997, "learning_rate": 1.9855887416284325e-05, "loss": 1.1052, "step": 1753 }, { "epoch": 0.247670149675233, "grad_norm": 4.356891711290078, "learning_rate": 1.9855629426530992e-05, "loss": 1.0993, "step": 1754 }, { "epoch": 0.24781135272521887, "grad_norm": 4.342036746806214, "learning_rate": 1.9855371207737084e-05, "loss": 1.0341, "step": 1755 }, { "epoch": 0.24795255577520475, "grad_norm": 4.641989901468239, "learning_rate": 1.9855112759908607e-05, "loss": 1.0026, "step": 1756 }, { "epoch": 0.24809375882519064, "grad_norm": 4.5300387958261386, "learning_rate": 1.9854854083051563e-05, "loss": 1.1155, "step": 1757 }, { "epoch": 0.2482349618751765, "grad_norm": 3.277970357469273, "learning_rate": 1.9854595177171968e-05, "loss": 0.923, "step": 1758 }, { "epoch": 0.24837616492516237, "grad_norm": 3.7674849049875077, "learning_rate": 1.985433604227584e-05, "loss": 0.9485, "step": 1759 }, { "epoch": 0.24851736797514826, "grad_norm": 5.019852036579426, "learning_rate": 1.9854076678369197e-05, "loss": 1.0545, "step": 1760 }, { "epoch": 0.24865857102513414, "grad_norm": 4.313766028592151, "learning_rate": 1.9853817085458065e-05, "loss": 1.0266, "step": 1761 }, { "epoch": 0.24879977407512002, "grad_norm": 4.254151271200883, "learning_rate": 1.985355726354848e-05, "loss": 1.1362, "step": 1762 }, { "epoch": 0.2489409771251059, "grad_norm": 4.542462728662068, "learning_rate": 1.985329721264648e-05, "loss": 1.0829, "step": 1763 }, { "epoch": 0.2490821801750918, "grad_norm": 4.43666385220854, "learning_rate": 1.985303693275811e-05, "loss": 1.1686, "step": 1764 }, { "epoch": 0.24922338322507767, "grad_norm": 4.442954744062333, "learning_rate": 1.9852776423889414e-05, "loss": 1.2025, "step": 1765 }, { "epoch": 0.24936458627506355, "grad_norm": 4.306206916782786, "learning_rate": 1.9852515686046453e-05, "loss": 1.0653, "step": 1766 }, { "epoch": 0.2495057893250494, "grad_norm": 4.4121707619056405, "learning_rate": 1.9852254719235276e-05, "loss": 1.0743, "step": 1767 }, { "epoch": 0.2496469923750353, "grad_norm": 8.826121169885152, "learning_rate": 1.985199352346196e-05, "loss": 0.8463, "step": 1768 }, { "epoch": 0.24978819542502118, "grad_norm": 5.423001436827784, "learning_rate": 1.9851732098732565e-05, "loss": 1.1477, "step": 1769 }, { "epoch": 0.24992939847500706, "grad_norm": 4.349952410355591, "learning_rate": 1.9851470445053173e-05, "loss": 1.0024, "step": 1770 }, { "epoch": 0.25007060152499294, "grad_norm": 3.952842332133127, "learning_rate": 1.9851208562429863e-05, "loss": 1.1046, "step": 1771 }, { "epoch": 0.2502118045749788, "grad_norm": 4.483581229400321, "learning_rate": 1.985094645086872e-05, "loss": 1.0459, "step": 1772 }, { "epoch": 0.2503530076249647, "grad_norm": 4.336622748908567, "learning_rate": 1.9850684110375836e-05, "loss": 0.9991, "step": 1773 }, { "epoch": 0.2504942106749506, "grad_norm": 4.805756904086518, "learning_rate": 1.9850421540957307e-05, "loss": 1.0262, "step": 1774 }, { "epoch": 0.2506354137249365, "grad_norm": 4.158873471280884, "learning_rate": 1.9850158742619233e-05, "loss": 1.1686, "step": 1775 }, { "epoch": 0.25077661677492236, "grad_norm": 3.8921747621044975, "learning_rate": 1.9849895715367728e-05, "loss": 1.007, "step": 1776 }, { "epoch": 0.25091781982490824, "grad_norm": 5.009127888899049, "learning_rate": 1.9849632459208895e-05, "loss": 1.2482, "step": 1777 }, { "epoch": 0.2510590228748941, "grad_norm": 4.480161958489549, "learning_rate": 1.9849368974148865e-05, "loss": 1.0608, "step": 1778 }, { "epoch": 0.25120022592488, "grad_norm": 5.315526488222281, "learning_rate": 1.984910526019375e-05, "loss": 1.1975, "step": 1779 }, { "epoch": 0.25134142897486583, "grad_norm": 3.9086107591683708, "learning_rate": 1.984884131734968e-05, "loss": 0.8438, "step": 1780 }, { "epoch": 0.2514826320248517, "grad_norm": 6.768880213102031, "learning_rate": 1.984857714562279e-05, "loss": 1.4772, "step": 1781 }, { "epoch": 0.2516238350748376, "grad_norm": 4.911678150941068, "learning_rate": 1.9848312745019224e-05, "loss": 1.1367, "step": 1782 }, { "epoch": 0.2517650381248235, "grad_norm": 3.7779031035231063, "learning_rate": 1.9848048115545125e-05, "loss": 0.9677, "step": 1783 }, { "epoch": 0.25190624117480936, "grad_norm": 4.058710684059094, "learning_rate": 1.984778325720664e-05, "loss": 1.1123, "step": 1784 }, { "epoch": 0.25204744422479525, "grad_norm": 4.453232888731743, "learning_rate": 1.984751817000992e-05, "loss": 1.1083, "step": 1785 }, { "epoch": 0.25218864727478113, "grad_norm": 4.370839913831289, "learning_rate": 1.9847252853961136e-05, "loss": 1.0981, "step": 1786 }, { "epoch": 0.252329850324767, "grad_norm": 3.9839838840807453, "learning_rate": 1.9846987309066445e-05, "loss": 0.9425, "step": 1787 }, { "epoch": 0.2524710533747529, "grad_norm": 4.054469771188677, "learning_rate": 1.984672153533202e-05, "loss": 0.9768, "step": 1788 }, { "epoch": 0.2526122564247388, "grad_norm": 4.272242433499775, "learning_rate": 1.9846455532764043e-05, "loss": 1.0561, "step": 1789 }, { "epoch": 0.25275345947472466, "grad_norm": 3.875389121875868, "learning_rate": 1.984618930136869e-05, "loss": 0.9315, "step": 1790 }, { "epoch": 0.25289466252471055, "grad_norm": 4.951368455212235, "learning_rate": 1.9845922841152153e-05, "loss": 1.0357, "step": 1791 }, { "epoch": 0.25303586557469643, "grad_norm": 3.995807978284194, "learning_rate": 1.9845656152120617e-05, "loss": 1.07, "step": 1792 }, { "epoch": 0.2531770686246823, "grad_norm": 5.65951238732216, "learning_rate": 1.9845389234280285e-05, "loss": 1.0214, "step": 1793 }, { "epoch": 0.2533182716746682, "grad_norm": 4.777351695852719, "learning_rate": 1.984512208763736e-05, "loss": 1.2566, "step": 1794 }, { "epoch": 0.2534594747246541, "grad_norm": 4.764090788148082, "learning_rate": 1.984485471219805e-05, "loss": 1.3729, "step": 1795 }, { "epoch": 0.25360067777463996, "grad_norm": 4.737575841062879, "learning_rate": 1.9844587107968567e-05, "loss": 1.0511, "step": 1796 }, { "epoch": 0.2537418808246258, "grad_norm": 3.626165689130723, "learning_rate": 1.9844319274955132e-05, "loss": 0.8752, "step": 1797 }, { "epoch": 0.25388308387461167, "grad_norm": 4.306529757723179, "learning_rate": 1.9844051213163967e-05, "loss": 1.096, "step": 1798 }, { "epoch": 0.25402428692459755, "grad_norm": 4.327195513342646, "learning_rate": 1.9843782922601305e-05, "loss": 1.2814, "step": 1799 }, { "epoch": 0.25416548997458344, "grad_norm": 4.125935957404839, "learning_rate": 1.9843514403273378e-05, "loss": 1.1041, "step": 1800 }, { "epoch": 0.2543066930245693, "grad_norm": 4.984178006536586, "learning_rate": 1.984324565518643e-05, "loss": 0.9635, "step": 1801 }, { "epoch": 0.2544478960745552, "grad_norm": 4.652130141229484, "learning_rate": 1.98429766783467e-05, "loss": 1.0501, "step": 1802 }, { "epoch": 0.2545890991245411, "grad_norm": 4.021118114615804, "learning_rate": 1.9842707472760443e-05, "loss": 0.9566, "step": 1803 }, { "epoch": 0.25473030217452697, "grad_norm": 4.0336123105712725, "learning_rate": 1.984243803843392e-05, "loss": 1.1905, "step": 1804 }, { "epoch": 0.25487150522451285, "grad_norm": 4.947408705066826, "learning_rate": 1.984216837537338e-05, "loss": 1.1713, "step": 1805 }, { "epoch": 0.25501270827449873, "grad_norm": 4.153105601150481, "learning_rate": 1.98418984835851e-05, "loss": 1.1338, "step": 1806 }, { "epoch": 0.2551539113244846, "grad_norm": 3.889235495695041, "learning_rate": 1.9841628363075353e-05, "loss": 0.9695, "step": 1807 }, { "epoch": 0.2552951143744705, "grad_norm": 4.446497531794838, "learning_rate": 1.9841358013850413e-05, "loss": 1.1453, "step": 1808 }, { "epoch": 0.2554363174244564, "grad_norm": 4.399695425177541, "learning_rate": 1.9841087435916558e-05, "loss": 0.9461, "step": 1809 }, { "epoch": 0.25557752047444227, "grad_norm": 4.538241626368104, "learning_rate": 1.9840816629280087e-05, "loss": 1.3242, "step": 1810 }, { "epoch": 0.25571872352442815, "grad_norm": 5.591825633558243, "learning_rate": 1.9840545593947286e-05, "loss": 1.2403, "step": 1811 }, { "epoch": 0.25585992657441403, "grad_norm": 4.486984518292431, "learning_rate": 1.9840274329924452e-05, "loss": 1.0376, "step": 1812 }, { "epoch": 0.2560011296243999, "grad_norm": 4.329310339441637, "learning_rate": 1.9840002837217894e-05, "loss": 1.0225, "step": 1813 }, { "epoch": 0.25614233267438574, "grad_norm": 4.431670697571927, "learning_rate": 1.983973111583392e-05, "loss": 0.9871, "step": 1814 }, { "epoch": 0.2562835357243716, "grad_norm": 4.562471030669177, "learning_rate": 1.9839459165778842e-05, "loss": 1.2063, "step": 1815 }, { "epoch": 0.2564247387743575, "grad_norm": 4.63735361125598, "learning_rate": 1.9839186987058986e-05, "loss": 1.1223, "step": 1816 }, { "epoch": 0.2565659418243434, "grad_norm": 5.525235587274402, "learning_rate": 1.983891457968067e-05, "loss": 1.2139, "step": 1817 }, { "epoch": 0.2567071448743293, "grad_norm": 4.2274964265540165, "learning_rate": 1.9838641943650234e-05, "loss": 0.9316, "step": 1818 }, { "epoch": 0.25684834792431516, "grad_norm": 4.201389861322536, "learning_rate": 1.9838369078974003e-05, "loss": 0.9655, "step": 1819 }, { "epoch": 0.25698955097430104, "grad_norm": 3.7078024571924892, "learning_rate": 1.9838095985658324e-05, "loss": 0.9423, "step": 1820 }, { "epoch": 0.2571307540242869, "grad_norm": 3.8335519991973657, "learning_rate": 1.9837822663709544e-05, "loss": 0.9154, "step": 1821 }, { "epoch": 0.2572719570742728, "grad_norm": 4.203192669592402, "learning_rate": 1.9837549113134015e-05, "loss": 0.9266, "step": 1822 }, { "epoch": 0.2574131601242587, "grad_norm": 4.936588423601883, "learning_rate": 1.9837275333938093e-05, "loss": 1.1949, "step": 1823 }, { "epoch": 0.25755436317424457, "grad_norm": 4.021932857666165, "learning_rate": 1.983700132612814e-05, "loss": 1.0484, "step": 1824 }, { "epoch": 0.25769556622423045, "grad_norm": 4.924022429296793, "learning_rate": 1.983672708971052e-05, "loss": 1.3763, "step": 1825 }, { "epoch": 0.25783676927421634, "grad_norm": 5.015444273726999, "learning_rate": 1.9836452624691617e-05, "loss": 1.1707, "step": 1826 }, { "epoch": 0.2579779723242022, "grad_norm": 4.532623168637367, "learning_rate": 1.98361779310778e-05, "loss": 1.0912, "step": 1827 }, { "epoch": 0.2581191753741881, "grad_norm": 5.193585695370866, "learning_rate": 1.9835903008875458e-05, "loss": 1.2462, "step": 1828 }, { "epoch": 0.258260378424174, "grad_norm": 3.9134066716890397, "learning_rate": 1.9835627858090977e-05, "loss": 1.0365, "step": 1829 }, { "epoch": 0.25840158147415987, "grad_norm": 4.120363927549136, "learning_rate": 1.983535247873075e-05, "loss": 1.2367, "step": 1830 }, { "epoch": 0.2585427845241457, "grad_norm": 5.263593883539839, "learning_rate": 1.9835076870801183e-05, "loss": 1.2767, "step": 1831 }, { "epoch": 0.2586839875741316, "grad_norm": 5.3404783896382915, "learning_rate": 1.9834801034308674e-05, "loss": 1.0237, "step": 1832 }, { "epoch": 0.25882519062411746, "grad_norm": 4.470164343138368, "learning_rate": 1.9834524969259636e-05, "loss": 1.0849, "step": 1833 }, { "epoch": 0.25896639367410335, "grad_norm": 4.139023629204524, "learning_rate": 1.9834248675660484e-05, "loss": 1.2606, "step": 1834 }, { "epoch": 0.25910759672408923, "grad_norm": 4.0174191362774945, "learning_rate": 1.983397215351764e-05, "loss": 0.9803, "step": 1835 }, { "epoch": 0.2592487997740751, "grad_norm": 4.3630589931847314, "learning_rate": 1.9833695402837536e-05, "loss": 1.0285, "step": 1836 }, { "epoch": 0.259390002824061, "grad_norm": 3.8069062518913546, "learning_rate": 1.9833418423626593e-05, "loss": 1.0439, "step": 1837 }, { "epoch": 0.2595312058740469, "grad_norm": 4.464269887314775, "learning_rate": 1.9833141215891253e-05, "loss": 1.1284, "step": 1838 }, { "epoch": 0.25967240892403276, "grad_norm": 4.614950166656232, "learning_rate": 1.9832863779637958e-05, "loss": 1.2166, "step": 1839 }, { "epoch": 0.25981361197401864, "grad_norm": 4.548326789240738, "learning_rate": 1.9832586114873154e-05, "loss": 0.955, "step": 1840 }, { "epoch": 0.2599548150240045, "grad_norm": 3.997095965911288, "learning_rate": 1.9832308221603296e-05, "loss": 1.1058, "step": 1841 }, { "epoch": 0.2600960180739904, "grad_norm": 4.660185467602098, "learning_rate": 1.983203009983484e-05, "loss": 1.1507, "step": 1842 }, { "epoch": 0.2602372211239763, "grad_norm": 4.079378078114112, "learning_rate": 1.9831751749574256e-05, "loss": 0.9784, "step": 1843 }, { "epoch": 0.2603784241739622, "grad_norm": 4.564282973872989, "learning_rate": 1.9831473170828003e-05, "loss": 1.1426, "step": 1844 }, { "epoch": 0.26051962722394806, "grad_norm": 4.942052127544571, "learning_rate": 1.9831194363602558e-05, "loss": 1.114, "step": 1845 }, { "epoch": 0.26066083027393394, "grad_norm": 4.055289426871556, "learning_rate": 1.9830915327904402e-05, "loss": 1.0268, "step": 1846 }, { "epoch": 0.2608020333239198, "grad_norm": 4.429536312295024, "learning_rate": 1.9830636063740023e-05, "loss": 1.207, "step": 1847 }, { "epoch": 0.26094323637390565, "grad_norm": 4.200766234184153, "learning_rate": 1.9830356571115904e-05, "loss": 1.2327, "step": 1848 }, { "epoch": 0.26108443942389153, "grad_norm": 3.9245031102356123, "learning_rate": 1.9830076850038545e-05, "loss": 0.9127, "step": 1849 }, { "epoch": 0.2612256424738774, "grad_norm": 3.867561791187962, "learning_rate": 1.9829796900514445e-05, "loss": 1.2064, "step": 1850 }, { "epoch": 0.2613668455238633, "grad_norm": 3.668051280232123, "learning_rate": 1.9829516722550113e-05, "loss": 0.8755, "step": 1851 }, { "epoch": 0.2615080485738492, "grad_norm": 5.0322788153662295, "learning_rate": 1.982923631615205e-05, "loss": 1.2376, "step": 1852 }, { "epoch": 0.26164925162383507, "grad_norm": 3.8740407222660154, "learning_rate": 1.982895568132679e-05, "loss": 0.9204, "step": 1853 }, { "epoch": 0.26179045467382095, "grad_norm": 4.558662679720923, "learning_rate": 1.9828674818080837e-05, "loss": 1.1142, "step": 1854 }, { "epoch": 0.26193165772380683, "grad_norm": 4.532216931048915, "learning_rate": 1.982839372642073e-05, "loss": 1.0541, "step": 1855 }, { "epoch": 0.2620728607737927, "grad_norm": 3.4928341904820592, "learning_rate": 1.9828112406352994e-05, "loss": 0.9201, "step": 1856 }, { "epoch": 0.2622140638237786, "grad_norm": 4.182121407039527, "learning_rate": 1.9827830857884173e-05, "loss": 1.2461, "step": 1857 }, { "epoch": 0.2623552668737645, "grad_norm": 3.830371471883557, "learning_rate": 1.9827549081020806e-05, "loss": 0.9488, "step": 1858 }, { "epoch": 0.26249646992375036, "grad_norm": 3.6586193188431904, "learning_rate": 1.9827267075769444e-05, "loss": 0.8693, "step": 1859 }, { "epoch": 0.26263767297373625, "grad_norm": 4.098906291559029, "learning_rate": 1.9826984842136637e-05, "loss": 1.0552, "step": 1860 }, { "epoch": 0.26277887602372213, "grad_norm": 4.1658586058030656, "learning_rate": 1.9826702380128946e-05, "loss": 1.0166, "step": 1861 }, { "epoch": 0.262920079073708, "grad_norm": 4.4573469226027935, "learning_rate": 1.9826419689752935e-05, "loss": 0.9906, "step": 1862 }, { "epoch": 0.2630612821236939, "grad_norm": 4.0536958678435004, "learning_rate": 1.9826136771015177e-05, "loss": 0.9548, "step": 1863 }, { "epoch": 0.2632024851736798, "grad_norm": 4.328741615896123, "learning_rate": 1.9825853623922244e-05, "loss": 1.3807, "step": 1864 }, { "epoch": 0.2633436882236656, "grad_norm": 4.965706208648264, "learning_rate": 1.9825570248480713e-05, "loss": 1.2048, "step": 1865 }, { "epoch": 0.2634848912736515, "grad_norm": 4.404130566579816, "learning_rate": 1.9825286644697176e-05, "loss": 0.958, "step": 1866 }, { "epoch": 0.26362609432363737, "grad_norm": 3.9337423491594024, "learning_rate": 1.982500281257822e-05, "loss": 0.903, "step": 1867 }, { "epoch": 0.26376729737362326, "grad_norm": 5.5165073415084, "learning_rate": 1.982471875213044e-05, "loss": 1.1091, "step": 1868 }, { "epoch": 0.26390850042360914, "grad_norm": 5.458028776760574, "learning_rate": 1.9824434463360442e-05, "loss": 1.2246, "step": 1869 }, { "epoch": 0.264049703473595, "grad_norm": 3.644002562072783, "learning_rate": 1.9824149946274827e-05, "loss": 0.9188, "step": 1870 }, { "epoch": 0.2641909065235809, "grad_norm": 5.070646160407135, "learning_rate": 1.9823865200880212e-05, "loss": 1.2341, "step": 1871 }, { "epoch": 0.2643321095735668, "grad_norm": 4.998665095140983, "learning_rate": 1.982358022718321e-05, "loss": 1.2827, "step": 1872 }, { "epoch": 0.26447331262355267, "grad_norm": 5.41471968244836, "learning_rate": 1.9823295025190448e-05, "loss": 1.1906, "step": 1873 }, { "epoch": 0.26461451567353855, "grad_norm": 4.767947283912256, "learning_rate": 1.9823009594908553e-05, "loss": 1.0733, "step": 1874 }, { "epoch": 0.26475571872352444, "grad_norm": 4.690024521267456, "learning_rate": 1.9822723936344154e-05, "loss": 1.0014, "step": 1875 }, { "epoch": 0.2648969217735103, "grad_norm": 4.456701659906732, "learning_rate": 1.9822438049503894e-05, "loss": 1.2019, "step": 1876 }, { "epoch": 0.2650381248234962, "grad_norm": 4.495809605495094, "learning_rate": 1.9822151934394415e-05, "loss": 1.1006, "step": 1877 }, { "epoch": 0.2651793278734821, "grad_norm": 4.340686436182212, "learning_rate": 1.982186559102237e-05, "loss": 1.0079, "step": 1878 }, { "epoch": 0.26532053092346797, "grad_norm": 4.027422854136589, "learning_rate": 1.982157901939441e-05, "loss": 0.8951, "step": 1879 }, { "epoch": 0.26546173397345385, "grad_norm": 4.113046938285744, "learning_rate": 1.982129221951719e-05, "loss": 1.1811, "step": 1880 }, { "epoch": 0.26560293702343973, "grad_norm": 4.074988999428498, "learning_rate": 1.9821005191397387e-05, "loss": 1.007, "step": 1881 }, { "epoch": 0.26574414007342556, "grad_norm": 4.806825603790409, "learning_rate": 1.982071793504166e-05, "loss": 1.2616, "step": 1882 }, { "epoch": 0.26588534312341144, "grad_norm": 4.828424122877705, "learning_rate": 1.982043045045669e-05, "loss": 1.1724, "step": 1883 }, { "epoch": 0.2660265461733973, "grad_norm": 4.317464637359753, "learning_rate": 1.982014273764916e-05, "loss": 0.9919, "step": 1884 }, { "epoch": 0.2661677492233832, "grad_norm": 3.8615390443583806, "learning_rate": 1.9819854796625756e-05, "loss": 1.0182, "step": 1885 }, { "epoch": 0.2663089522733691, "grad_norm": 4.7700047863813175, "learning_rate": 1.981956662739316e-05, "loss": 1.2221, "step": 1886 }, { "epoch": 0.266450155323355, "grad_norm": 4.166544717120669, "learning_rate": 1.981927822995808e-05, "loss": 1.4011, "step": 1887 }, { "epoch": 0.26659135837334086, "grad_norm": 4.532693743281657, "learning_rate": 1.9818989604327218e-05, "loss": 1.1463, "step": 1888 }, { "epoch": 0.26673256142332674, "grad_norm": 5.886451921880403, "learning_rate": 1.9818700750507275e-05, "loss": 1.2252, "step": 1889 }, { "epoch": 0.2668737644733126, "grad_norm": 4.411944526600107, "learning_rate": 1.9818411668504965e-05, "loss": 1.1067, "step": 1890 }, { "epoch": 0.2670149675232985, "grad_norm": 3.5556559459789727, "learning_rate": 1.981812235832701e-05, "loss": 0.841, "step": 1891 }, { "epoch": 0.2671561705732844, "grad_norm": 3.98631227097006, "learning_rate": 1.981783281998013e-05, "loss": 1.0951, "step": 1892 }, { "epoch": 0.2672973736232703, "grad_norm": 4.297593422012009, "learning_rate": 1.9817543053471058e-05, "loss": 1.2269, "step": 1893 }, { "epoch": 0.26743857667325616, "grad_norm": 4.330913247914617, "learning_rate": 1.9817253058806525e-05, "loss": 0.9833, "step": 1894 }, { "epoch": 0.26757977972324204, "grad_norm": 4.353124030317538, "learning_rate": 1.9816962835993268e-05, "loss": 1.0562, "step": 1895 }, { "epoch": 0.2677209827732279, "grad_norm": 5.140174186532284, "learning_rate": 1.9816672385038033e-05, "loss": 1.1521, "step": 1896 }, { "epoch": 0.2678621858232138, "grad_norm": 4.84021068206007, "learning_rate": 1.9816381705947575e-05, "loss": 1.066, "step": 1897 }, { "epoch": 0.2680033888731997, "grad_norm": 4.655159459646432, "learning_rate": 1.9816090798728648e-05, "loss": 1.0596, "step": 1898 }, { "epoch": 0.2681445919231855, "grad_norm": 3.855920025729369, "learning_rate": 1.9815799663388003e-05, "loss": 0.961, "step": 1899 }, { "epoch": 0.2682857949731714, "grad_norm": 4.596026299347673, "learning_rate": 1.9815508299932417e-05, "loss": 1.038, "step": 1900 }, { "epoch": 0.2684269980231573, "grad_norm": 4.310839975237934, "learning_rate": 1.981521670836866e-05, "loss": 1.0237, "step": 1901 }, { "epoch": 0.26856820107314316, "grad_norm": 5.173806071000135, "learning_rate": 1.98149248887035e-05, "loss": 1.1202, "step": 1902 }, { "epoch": 0.26870940412312905, "grad_norm": 4.97568068247098, "learning_rate": 1.9814632840943728e-05, "loss": 1.328, "step": 1903 }, { "epoch": 0.26885060717311493, "grad_norm": 5.68666850582161, "learning_rate": 1.9814340565096124e-05, "loss": 1.2292, "step": 1904 }, { "epoch": 0.2689918102231008, "grad_norm": 4.3267050761984756, "learning_rate": 1.9814048061167486e-05, "loss": 0.9448, "step": 1905 }, { "epoch": 0.2691330132730867, "grad_norm": 4.969852177416566, "learning_rate": 1.981375532916461e-05, "loss": 1.183, "step": 1906 }, { "epoch": 0.2692742163230726, "grad_norm": 5.102118002696035, "learning_rate": 1.9813462369094297e-05, "loss": 1.3245, "step": 1907 }, { "epoch": 0.26941541937305846, "grad_norm": 4.3326818741031214, "learning_rate": 1.981316918096336e-05, "loss": 1.125, "step": 1908 }, { "epoch": 0.26955662242304435, "grad_norm": 4.1085574487523875, "learning_rate": 1.9812875764778604e-05, "loss": 1.0606, "step": 1909 }, { "epoch": 0.26969782547303023, "grad_norm": 4.072834574455018, "learning_rate": 1.9812582120546854e-05, "loss": 0.9583, "step": 1910 }, { "epoch": 0.2698390285230161, "grad_norm": 4.279078397477056, "learning_rate": 1.981228824827494e-05, "loss": 0.8896, "step": 1911 }, { "epoch": 0.269980231573002, "grad_norm": 3.5694853898929417, "learning_rate": 1.9811994147969676e-05, "loss": 1.0055, "step": 1912 }, { "epoch": 0.2701214346229879, "grad_norm": 4.110524682268194, "learning_rate": 1.981169981963791e-05, "loss": 1.0007, "step": 1913 }, { "epoch": 0.27026263767297376, "grad_norm": 4.0376177016082435, "learning_rate": 1.9811405263286475e-05, "loss": 1.0296, "step": 1914 }, { "epoch": 0.27040384072295964, "grad_norm": 4.411827189006288, "learning_rate": 1.981111047892222e-05, "loss": 1.2874, "step": 1915 }, { "epoch": 0.27054504377294547, "grad_norm": 4.058164664932261, "learning_rate": 1.9810815466551996e-05, "loss": 1.0238, "step": 1916 }, { "epoch": 0.27068624682293135, "grad_norm": 3.59293615374635, "learning_rate": 1.9810520226182657e-05, "loss": 0.8512, "step": 1917 }, { "epoch": 0.27082744987291724, "grad_norm": 4.279823982298696, "learning_rate": 1.9810224757821063e-05, "loss": 1.119, "step": 1918 }, { "epoch": 0.2709686529229031, "grad_norm": 4.691157088386239, "learning_rate": 1.9809929061474084e-05, "loss": 1.2303, "step": 1919 }, { "epoch": 0.271109855972889, "grad_norm": 4.905502708349846, "learning_rate": 1.980963313714859e-05, "loss": 1.1388, "step": 1920 }, { "epoch": 0.2712510590228749, "grad_norm": 4.264705545238572, "learning_rate": 1.980933698485146e-05, "loss": 0.949, "step": 1921 }, { "epoch": 0.27139226207286077, "grad_norm": 3.955521900346505, "learning_rate": 1.9809040604589572e-05, "loss": 0.9812, "step": 1922 }, { "epoch": 0.27153346512284665, "grad_norm": 4.301451343495329, "learning_rate": 1.9808743996369816e-05, "loss": 1.0911, "step": 1923 }, { "epoch": 0.27167466817283253, "grad_norm": 4.333781540715173, "learning_rate": 1.9808447160199087e-05, "loss": 1.0575, "step": 1924 }, { "epoch": 0.2718158712228184, "grad_norm": 4.434276229226678, "learning_rate": 1.980815009608428e-05, "loss": 1.1284, "step": 1925 }, { "epoch": 0.2719570742728043, "grad_norm": 4.266299412214705, "learning_rate": 1.9807852804032306e-05, "loss": 0.9993, "step": 1926 }, { "epoch": 0.2720982773227902, "grad_norm": 4.864722831595278, "learning_rate": 1.9807555284050063e-05, "loss": 1.2809, "step": 1927 }, { "epoch": 0.27223948037277607, "grad_norm": 5.0037588339866605, "learning_rate": 1.9807257536144474e-05, "loss": 1.0746, "step": 1928 }, { "epoch": 0.27238068342276195, "grad_norm": 4.687973094368147, "learning_rate": 1.9806959560322455e-05, "loss": 1.2387, "step": 1929 }, { "epoch": 0.27252188647274783, "grad_norm": 4.050124522510865, "learning_rate": 1.980666135659093e-05, "loss": 1.1225, "step": 1930 }, { "epoch": 0.2726630895227337, "grad_norm": 5.044334944078193, "learning_rate": 1.980636292495683e-05, "loss": 1.2669, "step": 1931 }, { "epoch": 0.2728042925727196, "grad_norm": 3.820292265866119, "learning_rate": 1.9806064265427093e-05, "loss": 1.1048, "step": 1932 }, { "epoch": 0.2729454956227054, "grad_norm": 4.45970707568549, "learning_rate": 1.9805765378008653e-05, "loss": 1.2018, "step": 1933 }, { "epoch": 0.2730866986726913, "grad_norm": 4.393577316139637, "learning_rate": 1.9805466262708464e-05, "loss": 1.0597, "step": 1934 }, { "epoch": 0.2732279017226772, "grad_norm": 4.575523466843277, "learning_rate": 1.9805166919533474e-05, "loss": 1.0458, "step": 1935 }, { "epoch": 0.2733691047726631, "grad_norm": 4.618107131171763, "learning_rate": 1.980486734849064e-05, "loss": 1.2403, "step": 1936 }, { "epoch": 0.27351030782264896, "grad_norm": 4.6686069169116, "learning_rate": 1.980456754958692e-05, "loss": 1.3606, "step": 1937 }, { "epoch": 0.27365151087263484, "grad_norm": 4.13624548917745, "learning_rate": 1.9804267522829287e-05, "loss": 1.0938, "step": 1938 }, { "epoch": 0.2737927139226207, "grad_norm": 4.194531072087034, "learning_rate": 1.9803967268224708e-05, "loss": 0.9957, "step": 1939 }, { "epoch": 0.2739339169726066, "grad_norm": 4.122994480672871, "learning_rate": 1.9803666785780165e-05, "loss": 0.9382, "step": 1940 }, { "epoch": 0.2740751200225925, "grad_norm": 5.075566278725887, "learning_rate": 1.9803366075502638e-05, "loss": 0.9363, "step": 1941 }, { "epoch": 0.27421632307257837, "grad_norm": 3.7067382502660537, "learning_rate": 1.9803065137399123e-05, "loss": 0.9876, "step": 1942 }, { "epoch": 0.27435752612256425, "grad_norm": 4.09596158947342, "learning_rate": 1.98027639714766e-05, "loss": 1.2029, "step": 1943 }, { "epoch": 0.27449872917255014, "grad_norm": 4.766167113287923, "learning_rate": 1.980246257774208e-05, "loss": 1.0014, "step": 1944 }, { "epoch": 0.274639932222536, "grad_norm": 4.920310314345624, "learning_rate": 1.9802160956202564e-05, "loss": 1.0501, "step": 1945 }, { "epoch": 0.2747811352725219, "grad_norm": 4.173753343779286, "learning_rate": 1.980185910686506e-05, "loss": 1.1264, "step": 1946 }, { "epoch": 0.2749223383225078, "grad_norm": 4.145207220069322, "learning_rate": 1.9801557029736585e-05, "loss": 1.1966, "step": 1947 }, { "epoch": 0.27506354137249367, "grad_norm": 4.646764195670235, "learning_rate": 1.9801254724824152e-05, "loss": 1.0586, "step": 1948 }, { "epoch": 0.27520474442247955, "grad_norm": 3.89261256015617, "learning_rate": 1.98009521921348e-05, "loss": 0.9301, "step": 1949 }, { "epoch": 0.2753459474724654, "grad_norm": 3.914085594450724, "learning_rate": 1.9800649431675544e-05, "loss": 1.0692, "step": 1950 }, { "epoch": 0.27548715052245126, "grad_norm": 6.217100541119199, "learning_rate": 1.9800346443453434e-05, "loss": 1.2925, "step": 1951 }, { "epoch": 0.27562835357243715, "grad_norm": 4.180937250009694, "learning_rate": 1.98000432274755e-05, "loss": 1.0339, "step": 1952 }, { "epoch": 0.27576955662242303, "grad_norm": 4.833108318940379, "learning_rate": 1.9799739783748798e-05, "loss": 1.286, "step": 1953 }, { "epoch": 0.2759107596724089, "grad_norm": 4.19034590252324, "learning_rate": 1.9799436112280374e-05, "loss": 0.9185, "step": 1954 }, { "epoch": 0.2760519627223948, "grad_norm": 3.9467514311165757, "learning_rate": 1.9799132213077283e-05, "loss": 1.175, "step": 1955 }, { "epoch": 0.2761931657723807, "grad_norm": 4.342706629159306, "learning_rate": 1.9798828086146596e-05, "loss": 1.2331, "step": 1956 }, { "epoch": 0.27633436882236656, "grad_norm": 3.9251272153361216, "learning_rate": 1.9798523731495373e-05, "loss": 0.9976, "step": 1957 }, { "epoch": 0.27647557187235244, "grad_norm": 4.9915444721580045, "learning_rate": 1.9798219149130692e-05, "loss": 1.3574, "step": 1958 }, { "epoch": 0.2766167749223383, "grad_norm": 4.668249178903173, "learning_rate": 1.979791433905963e-05, "loss": 1.2374, "step": 1959 }, { "epoch": 0.2767579779723242, "grad_norm": 4.081275883545955, "learning_rate": 1.979760930128927e-05, "loss": 1.0287, "step": 1960 }, { "epoch": 0.2768991810223101, "grad_norm": 4.38518891166007, "learning_rate": 1.97973040358267e-05, "loss": 1.1765, "step": 1961 }, { "epoch": 0.277040384072296, "grad_norm": 4.024707125769459, "learning_rate": 1.9796998542679015e-05, "loss": 1.0753, "step": 1962 }, { "epoch": 0.27718158712228186, "grad_norm": 4.525752594534333, "learning_rate": 1.9796692821853315e-05, "loss": 1.0752, "step": 1963 }, { "epoch": 0.27732279017226774, "grad_norm": 4.426957831487955, "learning_rate": 1.979638687335671e-05, "loss": 1.0836, "step": 1964 }, { "epoch": 0.2774639932222536, "grad_norm": 4.161187631948007, "learning_rate": 1.97960806971963e-05, "loss": 0.9516, "step": 1965 }, { "epoch": 0.2776051962722395, "grad_norm": 4.837229134664591, "learning_rate": 1.9795774293379206e-05, "loss": 1.1914, "step": 1966 }, { "epoch": 0.27774639932222533, "grad_norm": 4.590584776593588, "learning_rate": 1.9795467661912545e-05, "loss": 1.1588, "step": 1967 }, { "epoch": 0.2778876023722112, "grad_norm": 4.755577760977489, "learning_rate": 1.979516080280345e-05, "loss": 1.0861, "step": 1968 }, { "epoch": 0.2780288054221971, "grad_norm": 3.8656949427527536, "learning_rate": 1.9794853716059045e-05, "loss": 0.9719, "step": 1969 }, { "epoch": 0.278170008472183, "grad_norm": 4.575697976512219, "learning_rate": 1.979454640168647e-05, "loss": 1.1418, "step": 1970 }, { "epoch": 0.27831121152216887, "grad_norm": 4.900714937784135, "learning_rate": 1.9794238859692866e-05, "loss": 1.1897, "step": 1971 }, { "epoch": 0.27845241457215475, "grad_norm": 4.640687438285169, "learning_rate": 1.9793931090085385e-05, "loss": 0.9471, "step": 1972 }, { "epoch": 0.27859361762214063, "grad_norm": 4.5373388357865165, "learning_rate": 1.9793623092871172e-05, "loss": 1.2946, "step": 1973 }, { "epoch": 0.2787348206721265, "grad_norm": 4.642345560210742, "learning_rate": 1.9793314868057387e-05, "loss": 0.9214, "step": 1974 }, { "epoch": 0.2788760237221124, "grad_norm": 4.390810116610071, "learning_rate": 1.9793006415651198e-05, "loss": 1.1299, "step": 1975 }, { "epoch": 0.2790172267720983, "grad_norm": 4.438961443781038, "learning_rate": 1.9792697735659766e-05, "loss": 1.2401, "step": 1976 }, { "epoch": 0.27915842982208416, "grad_norm": 4.110013407261194, "learning_rate": 1.979238882809027e-05, "loss": 0.9505, "step": 1977 }, { "epoch": 0.27929963287207005, "grad_norm": 4.240267762400407, "learning_rate": 1.979207969294988e-05, "loss": 1.005, "step": 1978 }, { "epoch": 0.27944083592205593, "grad_norm": 4.0388072578216905, "learning_rate": 1.9791770330245793e-05, "loss": 0.8163, "step": 1979 }, { "epoch": 0.2795820389720418, "grad_norm": 4.050622215379461, "learning_rate": 1.979146073998519e-05, "loss": 0.9849, "step": 1980 }, { "epoch": 0.2797232420220277, "grad_norm": 4.696401766465645, "learning_rate": 1.979115092217527e-05, "loss": 1.328, "step": 1981 }, { "epoch": 0.2798644450720136, "grad_norm": 4.005898408070976, "learning_rate": 1.979084087682323e-05, "loss": 1.0332, "step": 1982 }, { "epoch": 0.28000564812199946, "grad_norm": 4.176778626936255, "learning_rate": 1.9790530603936275e-05, "loss": 0.9744, "step": 1983 }, { "epoch": 0.2801468511719853, "grad_norm": 3.9965116131100897, "learning_rate": 1.979022010352162e-05, "loss": 1.0151, "step": 1984 }, { "epoch": 0.2802880542219712, "grad_norm": 4.508909087531889, "learning_rate": 1.9789909375586477e-05, "loss": 1.3784, "step": 1985 }, { "epoch": 0.28042925727195706, "grad_norm": 3.999101969427345, "learning_rate": 1.9789598420138065e-05, "loss": 0.9298, "step": 1986 }, { "epoch": 0.28057046032194294, "grad_norm": 4.861343308960142, "learning_rate": 1.9789287237183616e-05, "loss": 1.1876, "step": 1987 }, { "epoch": 0.2807116633719288, "grad_norm": 5.267061242652416, "learning_rate": 1.9788975826730362e-05, "loss": 1.3012, "step": 1988 }, { "epoch": 0.2808528664219147, "grad_norm": 5.259085379914763, "learning_rate": 1.9788664188785535e-05, "loss": 1.271, "step": 1989 }, { "epoch": 0.2809940694719006, "grad_norm": 4.418607763924726, "learning_rate": 1.9788352323356376e-05, "loss": 1.1713, "step": 1990 }, { "epoch": 0.28113527252188647, "grad_norm": 4.352207990373677, "learning_rate": 1.978804023045014e-05, "loss": 1.137, "step": 1991 }, { "epoch": 0.28127647557187235, "grad_norm": 4.061215820668716, "learning_rate": 1.9787727910074077e-05, "loss": 0.9199, "step": 1992 }, { "epoch": 0.28141767862185824, "grad_norm": 5.02335803297421, "learning_rate": 1.978741536223544e-05, "loss": 1.3946, "step": 1993 }, { "epoch": 0.2815588816718441, "grad_norm": 4.521580657225586, "learning_rate": 1.97871025869415e-05, "loss": 1.1222, "step": 1994 }, { "epoch": 0.28170008472183, "grad_norm": 3.8191148360109244, "learning_rate": 1.9786789584199523e-05, "loss": 1.0476, "step": 1995 }, { "epoch": 0.2818412877718159, "grad_norm": 4.174635577704492, "learning_rate": 1.9786476354016782e-05, "loss": 0.997, "step": 1996 }, { "epoch": 0.28198249082180177, "grad_norm": 3.4299898513764697, "learning_rate": 1.9786162896400558e-05, "loss": 0.8523, "step": 1997 }, { "epoch": 0.28212369387178765, "grad_norm": 3.795614369695515, "learning_rate": 1.9785849211358133e-05, "loss": 1.0443, "step": 1998 }, { "epoch": 0.28226489692177353, "grad_norm": 4.534375150432486, "learning_rate": 1.9785535298896797e-05, "loss": 0.9503, "step": 1999 }, { "epoch": 0.2824060999717594, "grad_norm": 4.337289273239031, "learning_rate": 1.9785221159023852e-05, "loss": 1.1207, "step": 2000 }, { "epoch": 0.28254730302174524, "grad_norm": 4.026007673664495, "learning_rate": 1.978490679174659e-05, "loss": 0.9742, "step": 2001 }, { "epoch": 0.2826885060717311, "grad_norm": 4.402877866545439, "learning_rate": 1.9784592197072317e-05, "loss": 1.0959, "step": 2002 }, { "epoch": 0.282829709121717, "grad_norm": 4.145075755409406, "learning_rate": 1.978427737500835e-05, "loss": 0.8702, "step": 2003 }, { "epoch": 0.2829709121717029, "grad_norm": 4.473533883781703, "learning_rate": 1.9783962325562004e-05, "loss": 1.0151, "step": 2004 }, { "epoch": 0.2831121152216888, "grad_norm": 4.248658560673944, "learning_rate": 1.9783647048740597e-05, "loss": 1.1081, "step": 2005 }, { "epoch": 0.28325331827167466, "grad_norm": 4.760186713189362, "learning_rate": 1.9783331544551457e-05, "loss": 1.3244, "step": 2006 }, { "epoch": 0.28339452132166054, "grad_norm": 3.89054324974062, "learning_rate": 1.978301581300192e-05, "loss": 1.0176, "step": 2007 }, { "epoch": 0.2835357243716464, "grad_norm": 4.416503973128699, "learning_rate": 1.9782699854099316e-05, "loss": 0.9002, "step": 2008 }, { "epoch": 0.2836769274216323, "grad_norm": 4.539685188810968, "learning_rate": 1.9782383667850994e-05, "loss": 0.9914, "step": 2009 }, { "epoch": 0.2838181304716182, "grad_norm": 4.557279010744634, "learning_rate": 1.97820672542643e-05, "loss": 1.1256, "step": 2010 }, { "epoch": 0.2839593335216041, "grad_norm": 4.934417060893699, "learning_rate": 1.978175061334659e-05, "loss": 1.3845, "step": 2011 }, { "epoch": 0.28410053657158996, "grad_norm": 4.17693016203107, "learning_rate": 1.9781433745105218e-05, "loss": 1.0407, "step": 2012 }, { "epoch": 0.28424173962157584, "grad_norm": 3.744653586266438, "learning_rate": 1.978111664954755e-05, "loss": 0.9955, "step": 2013 }, { "epoch": 0.2843829426715617, "grad_norm": 4.561772943998098, "learning_rate": 1.9780799326680956e-05, "loss": 1.1598, "step": 2014 }, { "epoch": 0.2845241457215476, "grad_norm": 5.080010390830661, "learning_rate": 1.978048177651281e-05, "loss": 1.361, "step": 2015 }, { "epoch": 0.2846653487715335, "grad_norm": 3.82776282777789, "learning_rate": 1.978016399905049e-05, "loss": 0.9883, "step": 2016 }, { "epoch": 0.28480655182151937, "grad_norm": 4.32869082156598, "learning_rate": 1.977984599430138e-05, "loss": 1.289, "step": 2017 }, { "epoch": 0.2849477548715052, "grad_norm": 4.3485794939525055, "learning_rate": 1.9779527762272877e-05, "loss": 1.1465, "step": 2018 }, { "epoch": 0.2850889579214911, "grad_norm": 4.040964286303334, "learning_rate": 1.9779209302972372e-05, "loss": 1.0906, "step": 2019 }, { "epoch": 0.28523016097147696, "grad_norm": 4.7333264742203705, "learning_rate": 1.9778890616407266e-05, "loss": 1.3597, "step": 2020 }, { "epoch": 0.28537136402146285, "grad_norm": 4.120846192004582, "learning_rate": 1.9778571702584964e-05, "loss": 1.1739, "step": 2021 }, { "epoch": 0.28551256707144873, "grad_norm": 4.516244367261513, "learning_rate": 1.977825256151288e-05, "loss": 1.3577, "step": 2022 }, { "epoch": 0.2856537701214346, "grad_norm": 4.0108925589159306, "learning_rate": 1.977793319319843e-05, "loss": 0.8988, "step": 2023 }, { "epoch": 0.2857949731714205, "grad_norm": 4.3321504713073935, "learning_rate": 1.9777613597649033e-05, "loss": 1.0573, "step": 2024 }, { "epoch": 0.2859361762214064, "grad_norm": 4.285484247734015, "learning_rate": 1.977729377487212e-05, "loss": 1.1399, "step": 2025 }, { "epoch": 0.28607737927139226, "grad_norm": 3.681731421199415, "learning_rate": 1.9776973724875123e-05, "loss": 1.0067, "step": 2026 }, { "epoch": 0.28621858232137815, "grad_norm": 4.50057295846331, "learning_rate": 1.9776653447665476e-05, "loss": 1.091, "step": 2027 }, { "epoch": 0.28635978537136403, "grad_norm": 4.377690403930399, "learning_rate": 1.9776332943250628e-05, "loss": 0.871, "step": 2028 }, { "epoch": 0.2865009884213499, "grad_norm": 3.917457818102609, "learning_rate": 1.977601221163802e-05, "loss": 1.1456, "step": 2029 }, { "epoch": 0.2866421914713358, "grad_norm": 4.334577106906182, "learning_rate": 1.9775691252835113e-05, "loss": 1.1127, "step": 2030 }, { "epoch": 0.2867833945213217, "grad_norm": 4.310776613987475, "learning_rate": 1.9775370066849363e-05, "loss": 1.2173, "step": 2031 }, { "epoch": 0.28692459757130756, "grad_norm": 3.6107278214409466, "learning_rate": 1.9775048653688235e-05, "loss": 1.1559, "step": 2032 }, { "epoch": 0.28706580062129344, "grad_norm": 4.50530080782248, "learning_rate": 1.97747270133592e-05, "loss": 1.1955, "step": 2033 }, { "epoch": 0.2872070036712793, "grad_norm": 4.288788220244859, "learning_rate": 1.9774405145869728e-05, "loss": 1.2162, "step": 2034 }, { "epoch": 0.28734820672126515, "grad_norm": 3.808717115242739, "learning_rate": 1.97740830512273e-05, "loss": 0.9448, "step": 2035 }, { "epoch": 0.28748940977125104, "grad_norm": 4.009799020437934, "learning_rate": 1.9773760729439405e-05, "loss": 0.9945, "step": 2036 }, { "epoch": 0.2876306128212369, "grad_norm": 4.840755939516986, "learning_rate": 1.9773438180513533e-05, "loss": 1.1422, "step": 2037 }, { "epoch": 0.2877718158712228, "grad_norm": 3.441429572592453, "learning_rate": 1.9773115404457175e-05, "loss": 0.8611, "step": 2038 }, { "epoch": 0.2879130189212087, "grad_norm": 4.3531540814975855, "learning_rate": 1.9772792401277837e-05, "loss": 1.1632, "step": 2039 }, { "epoch": 0.28805422197119457, "grad_norm": 4.345000000297585, "learning_rate": 1.9772469170983026e-05, "loss": 0.9961, "step": 2040 }, { "epoch": 0.28819542502118045, "grad_norm": 4.194915802734927, "learning_rate": 1.977214571358025e-05, "loss": 1.094, "step": 2041 }, { "epoch": 0.28833662807116633, "grad_norm": 4.028493876038733, "learning_rate": 1.9771822029077028e-05, "loss": 1.0783, "step": 2042 }, { "epoch": 0.2884778311211522, "grad_norm": 3.923437266480302, "learning_rate": 1.9771498117480885e-05, "loss": 0.9019, "step": 2043 }, { "epoch": 0.2886190341711381, "grad_norm": 4.730233937110911, "learning_rate": 1.977117397879934e-05, "loss": 1.1551, "step": 2044 }, { "epoch": 0.288760237221124, "grad_norm": 4.082521639866751, "learning_rate": 1.9770849613039934e-05, "loss": 0.8055, "step": 2045 }, { "epoch": 0.28890144027110987, "grad_norm": 4.326972895394156, "learning_rate": 1.9770525020210204e-05, "loss": 1.0815, "step": 2046 }, { "epoch": 0.28904264332109575, "grad_norm": 4.404864862808989, "learning_rate": 1.9770200200317694e-05, "loss": 1.2375, "step": 2047 }, { "epoch": 0.28918384637108163, "grad_norm": 4.484117726255576, "learning_rate": 1.976987515336995e-05, "loss": 1.2743, "step": 2048 }, { "epoch": 0.2893250494210675, "grad_norm": 4.063025502518518, "learning_rate": 1.9769549879374524e-05, "loss": 1.1252, "step": 2049 }, { "epoch": 0.2894662524710534, "grad_norm": 3.983980411580159, "learning_rate": 1.9769224378338978e-05, "loss": 1.0919, "step": 2050 }, { "epoch": 0.2896074555210393, "grad_norm": 4.759913262493748, "learning_rate": 1.9768898650270877e-05, "loss": 1.2276, "step": 2051 }, { "epoch": 0.2897486585710251, "grad_norm": 4.544744615479969, "learning_rate": 1.976857269517779e-05, "loss": 1.1281, "step": 2052 }, { "epoch": 0.289889861621011, "grad_norm": 3.7370236760771416, "learning_rate": 1.9768246513067295e-05, "loss": 0.9291, "step": 2053 }, { "epoch": 0.2900310646709969, "grad_norm": 4.492300710270915, "learning_rate": 1.976792010394697e-05, "loss": 1.318, "step": 2054 }, { "epoch": 0.29017226772098276, "grad_norm": 4.498157558033843, "learning_rate": 1.97675934678244e-05, "loss": 1.0936, "step": 2055 }, { "epoch": 0.29031347077096864, "grad_norm": 3.6316301131730526, "learning_rate": 1.9767266604707172e-05, "loss": 0.8455, "step": 2056 }, { "epoch": 0.2904546738209545, "grad_norm": 3.9246863697555536, "learning_rate": 1.9766939514602885e-05, "loss": 1.0643, "step": 2057 }, { "epoch": 0.2905958768709404, "grad_norm": 4.314612904258364, "learning_rate": 1.9766612197519146e-05, "loss": 1.0944, "step": 2058 }, { "epoch": 0.2907370799209263, "grad_norm": 4.323314277102569, "learning_rate": 1.9766284653463558e-05, "loss": 1.0158, "step": 2059 }, { "epoch": 0.29087828297091217, "grad_norm": 4.822479394109334, "learning_rate": 1.976595688244373e-05, "loss": 1.0973, "step": 2060 }, { "epoch": 0.29101948602089805, "grad_norm": 3.922924559815942, "learning_rate": 1.9765628884467283e-05, "loss": 1.0773, "step": 2061 }, { "epoch": 0.29116068907088394, "grad_norm": 4.341775403330064, "learning_rate": 1.9765300659541837e-05, "loss": 1.1477, "step": 2062 }, { "epoch": 0.2913018921208698, "grad_norm": 4.788375099739378, "learning_rate": 1.976497220767502e-05, "loss": 1.319, "step": 2063 }, { "epoch": 0.2914430951708557, "grad_norm": 3.9212088446350326, "learning_rate": 1.976464352887447e-05, "loss": 0.9932, "step": 2064 }, { "epoch": 0.2915842982208416, "grad_norm": 4.585040004039015, "learning_rate": 1.976431462314782e-05, "loss": 1.1709, "step": 2065 }, { "epoch": 0.29172550127082747, "grad_norm": 4.291513162972788, "learning_rate": 1.9763985490502714e-05, "loss": 1.3046, "step": 2066 }, { "epoch": 0.29186670432081335, "grad_norm": 5.093619370039751, "learning_rate": 1.97636561309468e-05, "loss": 1.2358, "step": 2067 }, { "epoch": 0.29200790737079924, "grad_norm": 3.754190466944284, "learning_rate": 1.976332654448774e-05, "loss": 1.002, "step": 2068 }, { "epoch": 0.29214911042078506, "grad_norm": 4.141108481796237, "learning_rate": 1.976299673113318e-05, "loss": 1.1374, "step": 2069 }, { "epoch": 0.29229031347077095, "grad_norm": 4.886492875931723, "learning_rate": 1.9762666690890796e-05, "loss": 1.1219, "step": 2070 }, { "epoch": 0.29243151652075683, "grad_norm": 3.452987479332645, "learning_rate": 1.9762336423768257e-05, "loss": 0.9034, "step": 2071 }, { "epoch": 0.2925727195707427, "grad_norm": 4.266678687110639, "learning_rate": 1.976200592977323e-05, "loss": 1.2724, "step": 2072 }, { "epoch": 0.2927139226207286, "grad_norm": 4.0257082259290735, "learning_rate": 1.9761675208913408e-05, "loss": 0.9951, "step": 2073 }, { "epoch": 0.2928551256707145, "grad_norm": 3.96832581908812, "learning_rate": 1.9761344261196464e-05, "loss": 1.0852, "step": 2074 }, { "epoch": 0.29299632872070036, "grad_norm": 4.617685000382881, "learning_rate": 1.9761013086630096e-05, "loss": 1.1784, "step": 2075 }, { "epoch": 0.29313753177068624, "grad_norm": 3.6319940014803267, "learning_rate": 1.9760681685222e-05, "loss": 0.9583, "step": 2076 }, { "epoch": 0.2932787348206721, "grad_norm": 4.2324410829378944, "learning_rate": 1.9760350056979877e-05, "loss": 1.0565, "step": 2077 }, { "epoch": 0.293419937870658, "grad_norm": 3.7029137727175483, "learning_rate": 1.976001820191143e-05, "loss": 1.0016, "step": 2078 }, { "epoch": 0.2935611409206439, "grad_norm": 3.650246675487592, "learning_rate": 1.975968612002438e-05, "loss": 0.9138, "step": 2079 }, { "epoch": 0.2937023439706298, "grad_norm": 3.9568489080451936, "learning_rate": 1.975935381132644e-05, "loss": 1.0436, "step": 2080 }, { "epoch": 0.29384354702061566, "grad_norm": 4.9449641178102635, "learning_rate": 1.975902127582533e-05, "loss": 1.2078, "step": 2081 }, { "epoch": 0.29398475007060154, "grad_norm": 4.121101354372049, "learning_rate": 1.9758688513528783e-05, "loss": 1.0161, "step": 2082 }, { "epoch": 0.2941259531205874, "grad_norm": 4.94270977761333, "learning_rate": 1.9758355524444526e-05, "loss": 1.1251, "step": 2083 }, { "epoch": 0.2942671561705733, "grad_norm": 4.345004231911718, "learning_rate": 1.9758022308580306e-05, "loss": 1.019, "step": 2084 }, { "epoch": 0.2944083592205592, "grad_norm": 4.668617086823921, "learning_rate": 1.9757688865943855e-05, "loss": 1.0925, "step": 2085 }, { "epoch": 0.294549562270545, "grad_norm": 3.8205038776183136, "learning_rate": 1.975735519654293e-05, "loss": 0.9605, "step": 2086 }, { "epoch": 0.2946907653205309, "grad_norm": 3.6174909433375433, "learning_rate": 1.9757021300385288e-05, "loss": 0.9056, "step": 2087 }, { "epoch": 0.2948319683705168, "grad_norm": 5.204993341242176, "learning_rate": 1.9756687177478683e-05, "loss": 1.3593, "step": 2088 }, { "epoch": 0.29497317142050267, "grad_norm": 3.6509927194113008, "learning_rate": 1.9756352827830878e-05, "loss": 0.8878, "step": 2089 }, { "epoch": 0.29511437447048855, "grad_norm": 4.715387960943878, "learning_rate": 1.975601825144965e-05, "loss": 1.3048, "step": 2090 }, { "epoch": 0.29525557752047443, "grad_norm": 4.391515569665896, "learning_rate": 1.9755683448342774e-05, "loss": 1.0547, "step": 2091 }, { "epoch": 0.2953967805704603, "grad_norm": 4.718727020149414, "learning_rate": 1.9755348418518022e-05, "loss": 1.2065, "step": 2092 }, { "epoch": 0.2955379836204462, "grad_norm": 4.6274706615496, "learning_rate": 1.9755013161983188e-05, "loss": 1.1108, "step": 2093 }, { "epoch": 0.2956791866704321, "grad_norm": 4.262219880039032, "learning_rate": 1.9754677678746064e-05, "loss": 1.1746, "step": 2094 }, { "epoch": 0.29582038972041796, "grad_norm": 4.973375591970985, "learning_rate": 1.975434196881444e-05, "loss": 1.0213, "step": 2095 }, { "epoch": 0.29596159277040385, "grad_norm": 4.247965421169264, "learning_rate": 1.9754006032196123e-05, "loss": 1.201, "step": 2096 }, { "epoch": 0.29610279582038973, "grad_norm": 3.6558771372991523, "learning_rate": 1.9753669868898915e-05, "loss": 0.9072, "step": 2097 }, { "epoch": 0.2962439988703756, "grad_norm": 3.790561171440481, "learning_rate": 1.9753333478930632e-05, "loss": 1.1078, "step": 2098 }, { "epoch": 0.2963852019203615, "grad_norm": 3.815647120930167, "learning_rate": 1.9752996862299098e-05, "loss": 1.0469, "step": 2099 }, { "epoch": 0.2965264049703474, "grad_norm": 4.219261450992644, "learning_rate": 1.975266001901212e-05, "loss": 0.8996, "step": 2100 }, { "epoch": 0.29666760802033326, "grad_norm": 4.214037712732588, "learning_rate": 1.975232294907754e-05, "loss": 1.0994, "step": 2101 }, { "epoch": 0.29680881107031915, "grad_norm": 4.157645276184329, "learning_rate": 1.9751985652503187e-05, "loss": 1.0049, "step": 2102 }, { "epoch": 0.296950014120305, "grad_norm": 4.285734499050421, "learning_rate": 1.9751648129296893e-05, "loss": 1.2352, "step": 2103 }, { "epoch": 0.29709121717029086, "grad_norm": 4.3516889160874745, "learning_rate": 1.9751310379466514e-05, "loss": 1.011, "step": 2104 }, { "epoch": 0.29723242022027674, "grad_norm": 5.264245001109766, "learning_rate": 1.9750972403019894e-05, "loss": 1.3632, "step": 2105 }, { "epoch": 0.2973736232702626, "grad_norm": 3.842938326979059, "learning_rate": 1.9750634199964882e-05, "loss": 0.9896, "step": 2106 }, { "epoch": 0.2975148263202485, "grad_norm": 4.099463182172017, "learning_rate": 1.9750295770309346e-05, "loss": 1.1322, "step": 2107 }, { "epoch": 0.2976560293702344, "grad_norm": 5.260603302006069, "learning_rate": 1.9749957114061143e-05, "loss": 1.2615, "step": 2108 }, { "epoch": 0.29779723242022027, "grad_norm": 4.844109036544185, "learning_rate": 1.974961823122815e-05, "loss": 1.1653, "step": 2109 }, { "epoch": 0.29793843547020615, "grad_norm": 4.729861458467353, "learning_rate": 1.9749279121818235e-05, "loss": 1.1966, "step": 2110 }, { "epoch": 0.29807963852019204, "grad_norm": 4.44295162211147, "learning_rate": 1.974893978583929e-05, "loss": 1.0891, "step": 2111 }, { "epoch": 0.2982208415701779, "grad_norm": 4.483507072957521, "learning_rate": 1.9748600223299192e-05, "loss": 1.1678, "step": 2112 }, { "epoch": 0.2983620446201638, "grad_norm": 3.940404450878208, "learning_rate": 1.9748260434205835e-05, "loss": 0.9854, "step": 2113 }, { "epoch": 0.2985032476701497, "grad_norm": 3.9262605679028932, "learning_rate": 1.9747920418567117e-05, "loss": 1.0451, "step": 2114 }, { "epoch": 0.29864445072013557, "grad_norm": 4.096441516935058, "learning_rate": 1.9747580176390938e-05, "loss": 1.1508, "step": 2115 }, { "epoch": 0.29878565377012145, "grad_norm": 3.6950068845578503, "learning_rate": 1.9747239707685205e-05, "loss": 1.0449, "step": 2116 }, { "epoch": 0.29892685682010733, "grad_norm": 4.814733536298415, "learning_rate": 1.9746899012457828e-05, "loss": 1.3223, "step": 2117 }, { "epoch": 0.2990680598700932, "grad_norm": 4.710908762560615, "learning_rate": 1.974655809071673e-05, "loss": 1.0588, "step": 2118 }, { "epoch": 0.2992092629200791, "grad_norm": 5.874353984735284, "learning_rate": 1.974621694246983e-05, "loss": 1.4003, "step": 2119 }, { "epoch": 0.2993504659700649, "grad_norm": 4.733486325167167, "learning_rate": 1.9745875567725058e-05, "loss": 1.0456, "step": 2120 }, { "epoch": 0.2994916690200508, "grad_norm": 4.388512374531519, "learning_rate": 1.9745533966490346e-05, "loss": 1.1836, "step": 2121 }, { "epoch": 0.2996328720700367, "grad_norm": 5.472906473352105, "learning_rate": 1.9745192138773633e-05, "loss": 1.3995, "step": 2122 }, { "epoch": 0.2997740751200226, "grad_norm": 4.214611236572622, "learning_rate": 1.9744850084582868e-05, "loss": 1.0988, "step": 2123 }, { "epoch": 0.29991527817000846, "grad_norm": 4.6535323988970605, "learning_rate": 1.974450780392599e-05, "loss": 1.2336, "step": 2124 }, { "epoch": 0.30005648121999434, "grad_norm": 4.216973525657969, "learning_rate": 1.974416529681096e-05, "loss": 1.0138, "step": 2125 }, { "epoch": 0.3001976842699802, "grad_norm": 4.54751853001935, "learning_rate": 1.9743822563245738e-05, "loss": 1.0632, "step": 2126 }, { "epoch": 0.3003388873199661, "grad_norm": 4.410058274448994, "learning_rate": 1.9743479603238285e-05, "loss": 1.0406, "step": 2127 }, { "epoch": 0.300480090369952, "grad_norm": 4.505072932558089, "learning_rate": 1.9743136416796578e-05, "loss": 1.1924, "step": 2128 }, { "epoch": 0.3006212934199379, "grad_norm": 4.259655297474813, "learning_rate": 1.9742793003928587e-05, "loss": 1.0439, "step": 2129 }, { "epoch": 0.30076249646992376, "grad_norm": 4.0170902072458246, "learning_rate": 1.9742449364642293e-05, "loss": 1.1054, "step": 2130 }, { "epoch": 0.30090369951990964, "grad_norm": 3.85904065084625, "learning_rate": 1.9742105498945685e-05, "loss": 0.9522, "step": 2131 }, { "epoch": 0.3010449025698955, "grad_norm": 4.149821997292952, "learning_rate": 1.974176140684675e-05, "loss": 1.0282, "step": 2132 }, { "epoch": 0.3011861056198814, "grad_norm": 3.895860611066988, "learning_rate": 1.974141708835349e-05, "loss": 1.0095, "step": 2133 }, { "epoch": 0.3013273086698673, "grad_norm": 3.490149077007251, "learning_rate": 1.97410725434739e-05, "loss": 0.7841, "step": 2134 }, { "epoch": 0.30146851171985317, "grad_norm": 3.601206672543684, "learning_rate": 1.9740727772215994e-05, "loss": 0.9907, "step": 2135 }, { "epoch": 0.30160971476983905, "grad_norm": 4.086020251916973, "learning_rate": 1.974038277458778e-05, "loss": 1.0585, "step": 2136 }, { "epoch": 0.3017509178198249, "grad_norm": 4.43966582064688, "learning_rate": 1.974003755059728e-05, "loss": 1.1171, "step": 2137 }, { "epoch": 0.30189212086981076, "grad_norm": 4.299078171831713, "learning_rate": 1.973969210025251e-05, "loss": 0.8867, "step": 2138 }, { "epoch": 0.30203332391979665, "grad_norm": 4.267444521800434, "learning_rate": 1.97393464235615e-05, "loss": 0.9956, "step": 2139 }, { "epoch": 0.30217452696978253, "grad_norm": 4.824881506561139, "learning_rate": 1.973900052053229e-05, "loss": 1.2953, "step": 2140 }, { "epoch": 0.3023157300197684, "grad_norm": 4.841472336560749, "learning_rate": 1.9738654391172912e-05, "loss": 1.0828, "step": 2141 }, { "epoch": 0.3024569330697543, "grad_norm": 3.963816648644586, "learning_rate": 1.973830803549141e-05, "loss": 0.7916, "step": 2142 }, { "epoch": 0.3025981361197402, "grad_norm": 3.7120311200752756, "learning_rate": 1.973796145349584e-05, "loss": 1.0114, "step": 2143 }, { "epoch": 0.30273933916972606, "grad_norm": 4.566427698894491, "learning_rate": 1.9737614645194246e-05, "loss": 1.1034, "step": 2144 }, { "epoch": 0.30288054221971195, "grad_norm": 3.658668801593328, "learning_rate": 1.9737267610594696e-05, "loss": 0.9294, "step": 2145 }, { "epoch": 0.30302174526969783, "grad_norm": 3.7909626617417804, "learning_rate": 1.9736920349705252e-05, "loss": 1.0643, "step": 2146 }, { "epoch": 0.3031629483196837, "grad_norm": 4.723961687939991, "learning_rate": 1.9736572862533982e-05, "loss": 1.2165, "step": 2147 }, { "epoch": 0.3033041513696696, "grad_norm": 3.454731927272283, "learning_rate": 1.9736225149088964e-05, "loss": 1.163, "step": 2148 }, { "epoch": 0.3034453544196555, "grad_norm": 4.54516822071072, "learning_rate": 1.973587720937828e-05, "loss": 1.3348, "step": 2149 }, { "epoch": 0.30358655746964136, "grad_norm": 4.129944101989155, "learning_rate": 1.9735529043410012e-05, "loss": 1.0159, "step": 2150 }, { "epoch": 0.30372776051962724, "grad_norm": 4.421217810744664, "learning_rate": 1.9735180651192258e-05, "loss": 1.0217, "step": 2151 }, { "epoch": 0.3038689635696131, "grad_norm": 5.510581582628757, "learning_rate": 1.97348320327331e-05, "loss": 0.8774, "step": 2152 }, { "epoch": 0.304010166619599, "grad_norm": 4.563096046896744, "learning_rate": 1.973448318804066e-05, "loss": 1.4281, "step": 2153 }, { "epoch": 0.30415136966958484, "grad_norm": 4.57215781177901, "learning_rate": 1.9734134117123028e-05, "loss": 1.121, "step": 2154 }, { "epoch": 0.3042925727195707, "grad_norm": 3.6378546353345285, "learning_rate": 1.9733784819988327e-05, "loss": 0.9235, "step": 2155 }, { "epoch": 0.3044337757695566, "grad_norm": 4.355323387033869, "learning_rate": 1.973343529664467e-05, "loss": 1.3777, "step": 2156 }, { "epoch": 0.3045749788195425, "grad_norm": 4.1589182416077675, "learning_rate": 1.9733085547100178e-05, "loss": 0.9132, "step": 2157 }, { "epoch": 0.30471618186952837, "grad_norm": 4.73090358416012, "learning_rate": 1.9732735571362985e-05, "loss": 1.0293, "step": 2158 }, { "epoch": 0.30485738491951425, "grad_norm": 3.58171323511884, "learning_rate": 1.973238536944122e-05, "loss": 1.0627, "step": 2159 }, { "epoch": 0.30499858796950013, "grad_norm": 3.2489930587960694, "learning_rate": 1.973203494134302e-05, "loss": 0.8192, "step": 2160 }, { "epoch": 0.305139791019486, "grad_norm": 4.257174402786828, "learning_rate": 1.973168428707653e-05, "loss": 1.0641, "step": 2161 }, { "epoch": 0.3052809940694719, "grad_norm": 5.007663980347736, "learning_rate": 1.9731333406649905e-05, "loss": 1.2569, "step": 2162 }, { "epoch": 0.3054221971194578, "grad_norm": 4.062986544743405, "learning_rate": 1.973098230007129e-05, "loss": 1.0777, "step": 2163 }, { "epoch": 0.30556340016944367, "grad_norm": 4.746232951452049, "learning_rate": 1.973063096734885e-05, "loss": 1.0871, "step": 2164 }, { "epoch": 0.30570460321942955, "grad_norm": 3.8301889599639938, "learning_rate": 1.9730279408490745e-05, "loss": 0.8697, "step": 2165 }, { "epoch": 0.30584580626941543, "grad_norm": 4.728308124948648, "learning_rate": 1.9729927623505153e-05, "loss": 1.1847, "step": 2166 }, { "epoch": 0.3059870093194013, "grad_norm": 4.703547765788962, "learning_rate": 1.9729575612400243e-05, "loss": 1.0545, "step": 2167 }, { "epoch": 0.3061282123693872, "grad_norm": 4.543642508071172, "learning_rate": 1.97292233751842e-05, "loss": 0.9916, "step": 2168 }, { "epoch": 0.3062694154193731, "grad_norm": 4.737480553838843, "learning_rate": 1.9728870911865206e-05, "loss": 1.1312, "step": 2169 }, { "epoch": 0.30641061846935896, "grad_norm": 5.511363765219922, "learning_rate": 1.9728518222451454e-05, "loss": 1.278, "step": 2170 }, { "epoch": 0.3065518215193448, "grad_norm": 4.803924273845113, "learning_rate": 1.9728165306951143e-05, "loss": 1.2992, "step": 2171 }, { "epoch": 0.3066930245693307, "grad_norm": 4.383592224379284, "learning_rate": 1.9727812165372463e-05, "loss": 0.9418, "step": 2172 }, { "epoch": 0.30683422761931656, "grad_norm": 3.9911199863584956, "learning_rate": 1.9727458797723638e-05, "loss": 1.1835, "step": 2173 }, { "epoch": 0.30697543066930244, "grad_norm": 4.495905276183238, "learning_rate": 1.972710520401287e-05, "loss": 1.1202, "step": 2174 }, { "epoch": 0.3071166337192883, "grad_norm": 4.134165598526877, "learning_rate": 1.9726751384248373e-05, "loss": 0.9348, "step": 2175 }, { "epoch": 0.3072578367692742, "grad_norm": 4.249087624018567, "learning_rate": 1.972639733843838e-05, "loss": 1.1545, "step": 2176 }, { "epoch": 0.3073990398192601, "grad_norm": 3.8063788381456836, "learning_rate": 1.9726043066591106e-05, "loss": 1.0045, "step": 2177 }, { "epoch": 0.30754024286924597, "grad_norm": 4.2023709573269095, "learning_rate": 1.97256885687148e-05, "loss": 1.0069, "step": 2178 }, { "epoch": 0.30768144591923186, "grad_norm": 4.434431438532583, "learning_rate": 1.9725333844817688e-05, "loss": 0.938, "step": 2179 }, { "epoch": 0.30782264896921774, "grad_norm": 4.514349061897778, "learning_rate": 1.9724978894908017e-05, "loss": 1.148, "step": 2180 }, { "epoch": 0.3079638520192036, "grad_norm": 3.94349057640504, "learning_rate": 1.9724623718994038e-05, "loss": 0.944, "step": 2181 }, { "epoch": 0.3081050550691895, "grad_norm": 4.360945344245002, "learning_rate": 1.9724268317084e-05, "loss": 1.1838, "step": 2182 }, { "epoch": 0.3082462581191754, "grad_norm": 3.9156721591909878, "learning_rate": 1.972391268918617e-05, "loss": 1.0602, "step": 2183 }, { "epoch": 0.30838746116916127, "grad_norm": 3.901685389641189, "learning_rate": 1.9723556835308807e-05, "loss": 0.9022, "step": 2184 }, { "epoch": 0.30852866421914715, "grad_norm": 5.265854896634157, "learning_rate": 1.9723200755460183e-05, "loss": 1.1246, "step": 2185 }, { "epoch": 0.30866986726913304, "grad_norm": 3.922950898345806, "learning_rate": 1.9722844449648572e-05, "loss": 1.1506, "step": 2186 }, { "epoch": 0.3088110703191189, "grad_norm": 4.982707597455844, "learning_rate": 1.9722487917882257e-05, "loss": 1.3951, "step": 2187 }, { "epoch": 0.30895227336910475, "grad_norm": 4.2837207749021395, "learning_rate": 1.972213116016952e-05, "loss": 1.123, "step": 2188 }, { "epoch": 0.30909347641909063, "grad_norm": 6.972628218422345, "learning_rate": 1.972177417651865e-05, "loss": 1.0585, "step": 2189 }, { "epoch": 0.3092346794690765, "grad_norm": 7.33086537059121, "learning_rate": 1.972141696693795e-05, "loss": 1.1828, "step": 2190 }, { "epoch": 0.3093758825190624, "grad_norm": 4.723754246228618, "learning_rate": 1.972105953143572e-05, "loss": 1.3529, "step": 2191 }, { "epoch": 0.3095170855690483, "grad_norm": 5.372719029088434, "learning_rate": 1.972070187002026e-05, "loss": 1.3714, "step": 2192 }, { "epoch": 0.30965828861903416, "grad_norm": 4.490557409952471, "learning_rate": 1.972034398269989e-05, "loss": 1.2876, "step": 2193 }, { "epoch": 0.30979949166902004, "grad_norm": 5.53244491342042, "learning_rate": 1.9719985869482925e-05, "loss": 1.2566, "step": 2194 }, { "epoch": 0.3099406947190059, "grad_norm": 3.817446392756854, "learning_rate": 1.9719627530377683e-05, "loss": 1.0397, "step": 2195 }, { "epoch": 0.3100818977689918, "grad_norm": 4.359072692702619, "learning_rate": 1.9719268965392495e-05, "loss": 1.151, "step": 2196 }, { "epoch": 0.3102231008189777, "grad_norm": 4.105114312705257, "learning_rate": 1.9718910174535695e-05, "loss": 1.1014, "step": 2197 }, { "epoch": 0.3103643038689636, "grad_norm": 4.039580388619772, "learning_rate": 1.971855115781562e-05, "loss": 1.0804, "step": 2198 }, { "epoch": 0.31050550691894946, "grad_norm": 4.273555151424891, "learning_rate": 1.9718191915240613e-05, "loss": 1.2258, "step": 2199 }, { "epoch": 0.31064670996893534, "grad_norm": 4.290573309578367, "learning_rate": 1.971783244681902e-05, "loss": 1.0384, "step": 2200 }, { "epoch": 0.3107879130189212, "grad_norm": 5.588844665281358, "learning_rate": 1.97174727525592e-05, "loss": 1.1098, "step": 2201 }, { "epoch": 0.3109291160689071, "grad_norm": 4.147057914305952, "learning_rate": 1.971711283246951e-05, "loss": 1.0316, "step": 2202 }, { "epoch": 0.311070319118893, "grad_norm": 4.7445716258617345, "learning_rate": 1.9716752686558316e-05, "loss": 1.2063, "step": 2203 }, { "epoch": 0.3112115221688789, "grad_norm": 4.50985472440183, "learning_rate": 1.9716392314833982e-05, "loss": 0.9951, "step": 2204 }, { "epoch": 0.3113527252188647, "grad_norm": 4.599093905108446, "learning_rate": 1.971603171730489e-05, "loss": 1.1999, "step": 2205 }, { "epoch": 0.3114939282688506, "grad_norm": 4.470245353988294, "learning_rate": 1.9715670893979416e-05, "loss": 0.9959, "step": 2206 }, { "epoch": 0.31163513131883647, "grad_norm": 3.647674698123316, "learning_rate": 1.9715309844865948e-05, "loss": 0.9983, "step": 2207 }, { "epoch": 0.31177633436882235, "grad_norm": 3.8325591088875135, "learning_rate": 1.9714948569972873e-05, "loss": 0.8896, "step": 2208 }, { "epoch": 0.31191753741880823, "grad_norm": 4.322446454861351, "learning_rate": 1.9714587069308586e-05, "loss": 1.1614, "step": 2209 }, { "epoch": 0.3120587404687941, "grad_norm": 4.096968760546903, "learning_rate": 1.9714225342881495e-05, "loss": 0.9872, "step": 2210 }, { "epoch": 0.31219994351878, "grad_norm": 4.43071482148293, "learning_rate": 1.97138633907e-05, "loss": 1.0658, "step": 2211 }, { "epoch": 0.3123411465687659, "grad_norm": 4.460815617057209, "learning_rate": 1.9713501212772518e-05, "loss": 1.0141, "step": 2212 }, { "epoch": 0.31248234961875176, "grad_norm": 4.699188484042622, "learning_rate": 1.971313880910746e-05, "loss": 1.136, "step": 2213 }, { "epoch": 0.31262355266873765, "grad_norm": 3.7511074647728297, "learning_rate": 1.9712776179713252e-05, "loss": 1.1509, "step": 2214 }, { "epoch": 0.31276475571872353, "grad_norm": 3.369012966882251, "learning_rate": 1.971241332459832e-05, "loss": 0.757, "step": 2215 }, { "epoch": 0.3129059587687094, "grad_norm": 4.6869804728779165, "learning_rate": 1.9712050243771095e-05, "loss": 0.9483, "step": 2216 }, { "epoch": 0.3130471618186953, "grad_norm": 4.6090962133332845, "learning_rate": 1.971168693724002e-05, "loss": 1.0309, "step": 2217 }, { "epoch": 0.3131883648686812, "grad_norm": 4.275677536908616, "learning_rate": 1.9711323405013535e-05, "loss": 1.0831, "step": 2218 }, { "epoch": 0.31332956791866706, "grad_norm": 4.8223103568116255, "learning_rate": 1.9710959647100086e-05, "loss": 1.2202, "step": 2219 }, { "epoch": 0.31347077096865295, "grad_norm": 4.12401500763805, "learning_rate": 1.9710595663508125e-05, "loss": 1.0846, "step": 2220 }, { "epoch": 0.31361197401863883, "grad_norm": 3.570240360000423, "learning_rate": 1.971023145424612e-05, "loss": 0.9074, "step": 2221 }, { "epoch": 0.31375317706862466, "grad_norm": 5.3735486917820126, "learning_rate": 1.9709867019322528e-05, "loss": 1.1143, "step": 2222 }, { "epoch": 0.31389438011861054, "grad_norm": 6.981542359363994, "learning_rate": 1.970950235874582e-05, "loss": 1.226, "step": 2223 }, { "epoch": 0.3140355831685964, "grad_norm": 4.733503256508291, "learning_rate": 1.970913747252447e-05, "loss": 1.2104, "step": 2224 }, { "epoch": 0.3141767862185823, "grad_norm": 4.502802855125862, "learning_rate": 1.9708772360666958e-05, "loss": 0.9966, "step": 2225 }, { "epoch": 0.3143179892685682, "grad_norm": 4.824824802941916, "learning_rate": 1.970840702318177e-05, "loss": 1.1623, "step": 2226 }, { "epoch": 0.31445919231855407, "grad_norm": 4.60102460178849, "learning_rate": 1.970804146007739e-05, "loss": 1.0669, "step": 2227 }, { "epoch": 0.31460039536853995, "grad_norm": 4.102688051779516, "learning_rate": 1.9707675671362328e-05, "loss": 1.0422, "step": 2228 }, { "epoch": 0.31474159841852584, "grad_norm": 4.9530865705857305, "learning_rate": 1.970730965704507e-05, "loss": 1.2153, "step": 2229 }, { "epoch": 0.3148828014685117, "grad_norm": 5.647072408080908, "learning_rate": 1.970694341713413e-05, "loss": 1.3566, "step": 2230 }, { "epoch": 0.3150240045184976, "grad_norm": 4.410612812401028, "learning_rate": 1.970657695163802e-05, "loss": 1.1777, "step": 2231 }, { "epoch": 0.3151652075684835, "grad_norm": 4.179832746892072, "learning_rate": 1.9706210260565246e-05, "loss": 0.9849, "step": 2232 }, { "epoch": 0.31530641061846937, "grad_norm": 5.6119791472771166, "learning_rate": 1.9705843343924343e-05, "loss": 1.4604, "step": 2233 }, { "epoch": 0.31544761366845525, "grad_norm": 4.531309335191492, "learning_rate": 1.970547620172383e-05, "loss": 0.9494, "step": 2234 }, { "epoch": 0.31558881671844113, "grad_norm": 4.138794717710657, "learning_rate": 1.9705108833972245e-05, "loss": 1.1322, "step": 2235 }, { "epoch": 0.315730019768427, "grad_norm": 3.723050722715303, "learning_rate": 1.9704741240678117e-05, "loss": 0.8949, "step": 2236 }, { "epoch": 0.3158712228184129, "grad_norm": 4.308460466255025, "learning_rate": 1.9704373421849997e-05, "loss": 1.2013, "step": 2237 }, { "epoch": 0.3160124258683988, "grad_norm": 4.02847827917829, "learning_rate": 1.9704005377496428e-05, "loss": 0.8946, "step": 2238 }, { "epoch": 0.3161536289183846, "grad_norm": 4.146422648055185, "learning_rate": 1.9703637107625968e-05, "loss": 0.9191, "step": 2239 }, { "epoch": 0.3162948319683705, "grad_norm": 5.242960843481294, "learning_rate": 1.9703268612247172e-05, "loss": 1.1627, "step": 2240 }, { "epoch": 0.3164360350183564, "grad_norm": 3.6915202857414284, "learning_rate": 1.9702899891368598e-05, "loss": 0.9252, "step": 2241 }, { "epoch": 0.31657723806834226, "grad_norm": 3.900462169800725, "learning_rate": 1.9702530944998825e-05, "loss": 1.1165, "step": 2242 }, { "epoch": 0.31671844111832814, "grad_norm": 4.23113707821588, "learning_rate": 1.9702161773146425e-05, "loss": 1.1995, "step": 2243 }, { "epoch": 0.316859644168314, "grad_norm": 5.817870853456311, "learning_rate": 1.9701792375819974e-05, "loss": 1.3674, "step": 2244 }, { "epoch": 0.3170008472182999, "grad_norm": 5.23726859943459, "learning_rate": 1.9701422753028056e-05, "loss": 1.2433, "step": 2245 }, { "epoch": 0.3171420502682858, "grad_norm": 3.8637380779963397, "learning_rate": 1.9701052904779265e-05, "loss": 1.0092, "step": 2246 }, { "epoch": 0.3172832533182717, "grad_norm": 3.8394782560806635, "learning_rate": 1.9700682831082192e-05, "loss": 0.8823, "step": 2247 }, { "epoch": 0.31742445636825756, "grad_norm": 3.531391740158467, "learning_rate": 1.9700312531945444e-05, "loss": 0.6829, "step": 2248 }, { "epoch": 0.31756565941824344, "grad_norm": 4.321739130353061, "learning_rate": 1.9699942007377615e-05, "loss": 0.9913, "step": 2249 }, { "epoch": 0.3177068624682293, "grad_norm": 4.517541202923386, "learning_rate": 1.969957125738733e-05, "loss": 1.0155, "step": 2250 }, { "epoch": 0.3178480655182152, "grad_norm": 4.064080497860532, "learning_rate": 1.9699200281983193e-05, "loss": 1.1729, "step": 2251 }, { "epoch": 0.3179892685682011, "grad_norm": 4.076528011985761, "learning_rate": 1.969882908117383e-05, "loss": 0.9392, "step": 2252 }, { "epoch": 0.31813047161818697, "grad_norm": 6.294534121552598, "learning_rate": 1.969845765496787e-05, "loss": 1.0236, "step": 2253 }, { "epoch": 0.31827167466817285, "grad_norm": 4.67106701428831, "learning_rate": 1.969808600337394e-05, "loss": 1.1509, "step": 2254 }, { "epoch": 0.31841287771815874, "grad_norm": 5.387895298890177, "learning_rate": 1.9697714126400685e-05, "loss": 1.1714, "step": 2255 }, { "epoch": 0.31855408076814457, "grad_norm": 4.062775613949614, "learning_rate": 1.969734202405674e-05, "loss": 1.0541, "step": 2256 }, { "epoch": 0.31869528381813045, "grad_norm": 3.8406219647303033, "learning_rate": 1.969696969635075e-05, "loss": 1.0246, "step": 2257 }, { "epoch": 0.31883648686811633, "grad_norm": 3.7054376967390894, "learning_rate": 1.9696597143291377e-05, "loss": 1.1746, "step": 2258 }, { "epoch": 0.3189776899181022, "grad_norm": 5.534654597134407, "learning_rate": 1.969622436488727e-05, "loss": 1.1577, "step": 2259 }, { "epoch": 0.3191188929680881, "grad_norm": 5.208506771007611, "learning_rate": 1.9695851361147098e-05, "loss": 1.2104, "step": 2260 }, { "epoch": 0.319260096018074, "grad_norm": 4.781560300561307, "learning_rate": 1.969547813207953e-05, "loss": 1.0875, "step": 2261 }, { "epoch": 0.31940129906805986, "grad_norm": 3.120448096867317, "learning_rate": 1.9695104677693234e-05, "loss": 0.8372, "step": 2262 }, { "epoch": 0.31954250211804575, "grad_norm": 4.460284041440932, "learning_rate": 1.9694730997996894e-05, "loss": 1.2279, "step": 2263 }, { "epoch": 0.31968370516803163, "grad_norm": 4.762989993413503, "learning_rate": 1.969435709299919e-05, "loss": 1.2951, "step": 2264 }, { "epoch": 0.3198249082180175, "grad_norm": 3.7743808194138606, "learning_rate": 1.9693982962708817e-05, "loss": 1.0129, "step": 2265 }, { "epoch": 0.3199661112680034, "grad_norm": 3.853819806092262, "learning_rate": 1.9693608607134466e-05, "loss": 1.1274, "step": 2266 }, { "epoch": 0.3201073143179893, "grad_norm": 4.552559038536874, "learning_rate": 1.9693234026284838e-05, "loss": 1.1363, "step": 2267 }, { "epoch": 0.32024851736797516, "grad_norm": 4.083531282015146, "learning_rate": 1.9692859220168634e-05, "loss": 1.1818, "step": 2268 }, { "epoch": 0.32038972041796104, "grad_norm": 3.801218347133587, "learning_rate": 1.969248418879457e-05, "loss": 0.9667, "step": 2269 }, { "epoch": 0.3205309234679469, "grad_norm": 4.3754034926446375, "learning_rate": 1.969210893217136e-05, "loss": 1.0672, "step": 2270 }, { "epoch": 0.3206721265179328, "grad_norm": 3.3930154921345475, "learning_rate": 1.9691733450307723e-05, "loss": 0.9784, "step": 2271 }, { "epoch": 0.3208133295679187, "grad_norm": 3.617325007809873, "learning_rate": 1.9691357743212385e-05, "loss": 0.9056, "step": 2272 }, { "epoch": 0.3209545326179045, "grad_norm": 3.7901050149973483, "learning_rate": 1.969098181089408e-05, "loss": 0.9735, "step": 2273 }, { "epoch": 0.3210957356678904, "grad_norm": 3.4871191393006806, "learning_rate": 1.969060565336154e-05, "loss": 0.8325, "step": 2274 }, { "epoch": 0.3212369387178763, "grad_norm": 4.037254679862918, "learning_rate": 1.9690229270623512e-05, "loss": 1.066, "step": 2275 }, { "epoch": 0.32137814176786217, "grad_norm": 5.024526437284346, "learning_rate": 1.9689852662688743e-05, "loss": 1.0779, "step": 2276 }, { "epoch": 0.32151934481784805, "grad_norm": 4.014150004299187, "learning_rate": 1.9689475829565983e-05, "loss": 0.9446, "step": 2277 }, { "epoch": 0.32166054786783393, "grad_norm": 5.310095403068603, "learning_rate": 1.9689098771263982e-05, "loss": 1.3517, "step": 2278 }, { "epoch": 0.3218017509178198, "grad_norm": 3.932002534868036, "learning_rate": 1.9688721487791516e-05, "loss": 1.0366, "step": 2279 }, { "epoch": 0.3219429539678057, "grad_norm": 4.314535282748354, "learning_rate": 1.9688343979157348e-05, "loss": 1.0116, "step": 2280 }, { "epoch": 0.3220841570177916, "grad_norm": 4.922067903596106, "learning_rate": 1.9687966245370248e-05, "loss": 1.0343, "step": 2281 }, { "epoch": 0.32222536006777747, "grad_norm": 5.30805813701868, "learning_rate": 1.9687588286438995e-05, "loss": 1.2836, "step": 2282 }, { "epoch": 0.32236656311776335, "grad_norm": 4.3862063056249605, "learning_rate": 1.9687210102372376e-05, "loss": 1.1088, "step": 2283 }, { "epoch": 0.32250776616774923, "grad_norm": 4.7312264962376505, "learning_rate": 1.9686831693179178e-05, "loss": 1.2889, "step": 2284 }, { "epoch": 0.3226489692177351, "grad_norm": 4.444645986117651, "learning_rate": 1.9686453058868194e-05, "loss": 1.0056, "step": 2285 }, { "epoch": 0.322790172267721, "grad_norm": 3.5145813062527655, "learning_rate": 1.9686074199448222e-05, "loss": 0.8307, "step": 2286 }, { "epoch": 0.3229313753177069, "grad_norm": 4.531713080558804, "learning_rate": 1.9685695114928073e-05, "loss": 1.1106, "step": 2287 }, { "epoch": 0.32307257836769276, "grad_norm": 4.376349285533183, "learning_rate": 1.9685315805316548e-05, "loss": 1.1299, "step": 2288 }, { "epoch": 0.32321378141767865, "grad_norm": 4.613639883725166, "learning_rate": 1.968493627062247e-05, "loss": 1.133, "step": 2289 }, { "epoch": 0.3233549844676645, "grad_norm": 4.565096596178036, "learning_rate": 1.9684556510854655e-05, "loss": 1.2705, "step": 2290 }, { "epoch": 0.32349618751765036, "grad_norm": 4.97049887256306, "learning_rate": 1.968417652602193e-05, "loss": 1.0895, "step": 2291 }, { "epoch": 0.32363739056763624, "grad_norm": 3.8781297688908, "learning_rate": 1.968379631613312e-05, "loss": 0.9398, "step": 2292 }, { "epoch": 0.3237785936176221, "grad_norm": 3.6134142846663124, "learning_rate": 1.968341588119707e-05, "loss": 0.9916, "step": 2293 }, { "epoch": 0.323919796667608, "grad_norm": 4.816024744581271, "learning_rate": 1.9683035221222617e-05, "loss": 1.3077, "step": 2294 }, { "epoch": 0.3240609997175939, "grad_norm": 4.112581548716897, "learning_rate": 1.9682654336218606e-05, "loss": 1.0834, "step": 2295 }, { "epoch": 0.3242022027675798, "grad_norm": 4.179499416866548, "learning_rate": 1.968227322619389e-05, "loss": 0.9383, "step": 2296 }, { "epoch": 0.32434340581756566, "grad_norm": 3.7838338554642688, "learning_rate": 1.9681891891157324e-05, "loss": 0.7676, "step": 2297 }, { "epoch": 0.32448460886755154, "grad_norm": 4.479553073138848, "learning_rate": 1.968151033111777e-05, "loss": 1.4906, "step": 2298 }, { "epoch": 0.3246258119175374, "grad_norm": 3.7491469144109204, "learning_rate": 1.9681128546084106e-05, "loss": 1.0551, "step": 2299 }, { "epoch": 0.3247670149675233, "grad_norm": 4.298377019188501, "learning_rate": 1.968074653606519e-05, "loss": 1.159, "step": 2300 }, { "epoch": 0.3249082180175092, "grad_norm": 4.928129958302385, "learning_rate": 1.9680364301069903e-05, "loss": 1.4335, "step": 2301 }, { "epoch": 0.32504942106749507, "grad_norm": 4.412452828258116, "learning_rate": 1.967998184110713e-05, "loss": 1.1969, "step": 2302 }, { "epoch": 0.32519062411748095, "grad_norm": 3.9405662994701096, "learning_rate": 1.9679599156185765e-05, "loss": 1.0777, "step": 2303 }, { "epoch": 0.32533182716746684, "grad_norm": 4.2657046789376, "learning_rate": 1.9679216246314694e-05, "loss": 1.0045, "step": 2304 }, { "epoch": 0.3254730302174527, "grad_norm": 3.597822329291592, "learning_rate": 1.9678833111502816e-05, "loss": 0.9852, "step": 2305 }, { "epoch": 0.3256142332674386, "grad_norm": 3.6559826198117493, "learning_rate": 1.9678449751759036e-05, "loss": 0.9243, "step": 2306 }, { "epoch": 0.32575543631742443, "grad_norm": 4.519659469856864, "learning_rate": 1.9678066167092266e-05, "loss": 1.323, "step": 2307 }, { "epoch": 0.3258966393674103, "grad_norm": 3.736198557751935, "learning_rate": 1.9677682357511415e-05, "loss": 0.9683, "step": 2308 }, { "epoch": 0.3260378424173962, "grad_norm": 4.819416542090107, "learning_rate": 1.9677298323025406e-05, "loss": 1.1789, "step": 2309 }, { "epoch": 0.3261790454673821, "grad_norm": 3.9161058305992733, "learning_rate": 1.9676914063643165e-05, "loss": 1.1343, "step": 2310 }, { "epoch": 0.32632024851736796, "grad_norm": 4.0034884591293425, "learning_rate": 1.967652957937362e-05, "loss": 1.0775, "step": 2311 }, { "epoch": 0.32646145156735384, "grad_norm": 4.582151915522272, "learning_rate": 1.9676144870225705e-05, "loss": 1.194, "step": 2312 }, { "epoch": 0.3266026546173397, "grad_norm": 3.7953146737823333, "learning_rate": 1.9675759936208366e-05, "loss": 1.0392, "step": 2313 }, { "epoch": 0.3267438576673256, "grad_norm": 4.07181436837981, "learning_rate": 1.967537477733054e-05, "loss": 1.1028, "step": 2314 }, { "epoch": 0.3268850607173115, "grad_norm": 3.7774592887637493, "learning_rate": 1.967498939360118e-05, "loss": 1.1445, "step": 2315 }, { "epoch": 0.3270262637672974, "grad_norm": 4.80717853239273, "learning_rate": 1.9674603785029252e-05, "loss": 1.3526, "step": 2316 }, { "epoch": 0.32716746681728326, "grad_norm": 4.115197795540143, "learning_rate": 1.967421795162371e-05, "loss": 0.9775, "step": 2317 }, { "epoch": 0.32730866986726914, "grad_norm": 4.2540951339164135, "learning_rate": 1.967383189339352e-05, "loss": 1.271, "step": 2318 }, { "epoch": 0.327449872917255, "grad_norm": 4.4396000163122515, "learning_rate": 1.967344561034765e-05, "loss": 1.1593, "step": 2319 }, { "epoch": 0.3275910759672409, "grad_norm": 3.8358837850269305, "learning_rate": 1.9673059102495084e-05, "loss": 1.0068, "step": 2320 }, { "epoch": 0.3277322790172268, "grad_norm": 4.513169062795983, "learning_rate": 1.9672672369844802e-05, "loss": 1.154, "step": 2321 }, { "epoch": 0.3278734820672127, "grad_norm": 3.6129045320024384, "learning_rate": 1.967228541240579e-05, "loss": 1.0916, "step": 2322 }, { "epoch": 0.32801468511719856, "grad_norm": 4.075394387412882, "learning_rate": 1.9671898230187046e-05, "loss": 1.0195, "step": 2323 }, { "epoch": 0.3281558881671844, "grad_norm": 4.350846032119964, "learning_rate": 1.9671510823197562e-05, "loss": 0.9136, "step": 2324 }, { "epoch": 0.32829709121717027, "grad_norm": 4.356668821258284, "learning_rate": 1.967112319144634e-05, "loss": 1.0915, "step": 2325 }, { "epoch": 0.32843829426715615, "grad_norm": 5.094593064698161, "learning_rate": 1.9670735334942398e-05, "loss": 1.3087, "step": 2326 }, { "epoch": 0.32857949731714203, "grad_norm": 3.8518625505789803, "learning_rate": 1.967034725369474e-05, "loss": 0.9872, "step": 2327 }, { "epoch": 0.3287207003671279, "grad_norm": 4.2296180243734645, "learning_rate": 1.966995894771239e-05, "loss": 1.1364, "step": 2328 }, { "epoch": 0.3288619034171138, "grad_norm": 3.77506336243467, "learning_rate": 1.966957041700437e-05, "loss": 0.9489, "step": 2329 }, { "epoch": 0.3290031064670997, "grad_norm": 3.868957073053426, "learning_rate": 1.9669181661579708e-05, "loss": 1.0364, "step": 2330 }, { "epoch": 0.32914430951708556, "grad_norm": 4.646830027257799, "learning_rate": 1.9668792681447437e-05, "loss": 1.3238, "step": 2331 }, { "epoch": 0.32928551256707145, "grad_norm": 4.193223549682492, "learning_rate": 1.9668403476616604e-05, "loss": 1.3836, "step": 2332 }, { "epoch": 0.32942671561705733, "grad_norm": 4.737718832370786, "learning_rate": 1.966801404709625e-05, "loss": 1.1446, "step": 2333 }, { "epoch": 0.3295679186670432, "grad_norm": 4.054518041853819, "learning_rate": 1.9667624392895423e-05, "loss": 1.0182, "step": 2334 }, { "epoch": 0.3297091217170291, "grad_norm": 4.682605524282007, "learning_rate": 1.966723451402318e-05, "loss": 1.2124, "step": 2335 }, { "epoch": 0.329850324767015, "grad_norm": 4.144604458287708, "learning_rate": 1.966684441048858e-05, "loss": 1.0375, "step": 2336 }, { "epoch": 0.32999152781700086, "grad_norm": 4.1442605119625355, "learning_rate": 1.9666454082300692e-05, "loss": 1.0873, "step": 2337 }, { "epoch": 0.33013273086698675, "grad_norm": 4.474715100783228, "learning_rate": 1.966606352946859e-05, "loss": 1.0921, "step": 2338 }, { "epoch": 0.33027393391697263, "grad_norm": 4.19773923963997, "learning_rate": 1.966567275200134e-05, "loss": 1.1468, "step": 2339 }, { "epoch": 0.3304151369669585, "grad_norm": 4.0253274923548155, "learning_rate": 1.9665281749908034e-05, "loss": 1.0117, "step": 2340 }, { "epoch": 0.33055634001694434, "grad_norm": 4.1035166483079815, "learning_rate": 1.9664890523197752e-05, "loss": 1.0293, "step": 2341 }, { "epoch": 0.3306975430669302, "grad_norm": 3.6697945159275984, "learning_rate": 1.966449907187959e-05, "loss": 1.0499, "step": 2342 }, { "epoch": 0.3308387461169161, "grad_norm": 4.600255890706265, "learning_rate": 1.966410739596264e-05, "loss": 1.2319, "step": 2343 }, { "epoch": 0.330979949166902, "grad_norm": 4.716300359279863, "learning_rate": 1.9663715495456012e-05, "loss": 1.1395, "step": 2344 }, { "epoch": 0.33112115221688787, "grad_norm": 3.3502764908621665, "learning_rate": 1.9663323370368807e-05, "loss": 0.9503, "step": 2345 }, { "epoch": 0.33126235526687375, "grad_norm": 3.5601084487564165, "learning_rate": 1.9662931020710138e-05, "loss": 0.903, "step": 2346 }, { "epoch": 0.33140355831685964, "grad_norm": 3.961753823794761, "learning_rate": 1.966253844648913e-05, "loss": 0.9938, "step": 2347 }, { "epoch": 0.3315447613668455, "grad_norm": 5.874227348884095, "learning_rate": 1.9662145647714896e-05, "loss": 1.5006, "step": 2348 }, { "epoch": 0.3316859644168314, "grad_norm": 3.9565349648325263, "learning_rate": 1.9661752624396575e-05, "loss": 1.2199, "step": 2349 }, { "epoch": 0.3318271674668173, "grad_norm": 3.958515681867395, "learning_rate": 1.9661359376543295e-05, "loss": 0.9815, "step": 2350 }, { "epoch": 0.33196837051680317, "grad_norm": 4.32504214111788, "learning_rate": 1.9660965904164193e-05, "loss": 0.9117, "step": 2351 }, { "epoch": 0.33210957356678905, "grad_norm": 3.9879223245020965, "learning_rate": 1.9660572207268416e-05, "loss": 1.0867, "step": 2352 }, { "epoch": 0.33225077661677493, "grad_norm": 3.9698258374326665, "learning_rate": 1.9660178285865114e-05, "loss": 1.0729, "step": 2353 }, { "epoch": 0.3323919796667608, "grad_norm": 4.055386495571354, "learning_rate": 1.9659784139963437e-05, "loss": 1.0742, "step": 2354 }, { "epoch": 0.3325331827167467, "grad_norm": 4.908828157744329, "learning_rate": 1.9659389769572553e-05, "loss": 1.2383, "step": 2355 }, { "epoch": 0.3326743857667326, "grad_norm": 4.640570287878632, "learning_rate": 1.9658995174701622e-05, "loss": 1.2139, "step": 2356 }, { "epoch": 0.33281558881671847, "grad_norm": 4.332213214017479, "learning_rate": 1.965860035535981e-05, "loss": 1.1257, "step": 2357 }, { "epoch": 0.3329567918667043, "grad_norm": 4.461699314113292, "learning_rate": 1.9658205311556304e-05, "loss": 1.0254, "step": 2358 }, { "epoch": 0.3330979949166902, "grad_norm": 4.327645135556568, "learning_rate": 1.9657810043300274e-05, "loss": 1.2754, "step": 2359 }, { "epoch": 0.33323919796667606, "grad_norm": 5.204008545388776, "learning_rate": 1.9657414550600907e-05, "loss": 1.2085, "step": 2360 }, { "epoch": 0.33338040101666194, "grad_norm": 4.167632287820464, "learning_rate": 1.96570188334674e-05, "loss": 1.071, "step": 2361 }, { "epoch": 0.3335216040666478, "grad_norm": 4.5991995940731645, "learning_rate": 1.9656622891908944e-05, "loss": 1.4672, "step": 2362 }, { "epoch": 0.3336628071166337, "grad_norm": 4.451950497792509, "learning_rate": 1.9656226725934745e-05, "loss": 1.2229, "step": 2363 }, { "epoch": 0.3338040101666196, "grad_norm": 4.359007743925706, "learning_rate": 1.9655830335554004e-05, "loss": 1.3637, "step": 2364 }, { "epoch": 0.3339452132166055, "grad_norm": 4.312915061603823, "learning_rate": 1.965543372077594e-05, "loss": 1.2552, "step": 2365 }, { "epoch": 0.33408641626659136, "grad_norm": 4.0791258079300015, "learning_rate": 1.9655036881609763e-05, "loss": 1.107, "step": 2366 }, { "epoch": 0.33422761931657724, "grad_norm": 3.8918306662932447, "learning_rate": 1.9654639818064698e-05, "loss": 1.0658, "step": 2367 }, { "epoch": 0.3343688223665631, "grad_norm": 4.07886903482661, "learning_rate": 1.9654242530149974e-05, "loss": 0.9243, "step": 2368 }, { "epoch": 0.334510025416549, "grad_norm": 4.824168743994136, "learning_rate": 1.9653845017874822e-05, "loss": 0.9971, "step": 2369 }, { "epoch": 0.3346512284665349, "grad_norm": 4.268365663035962, "learning_rate": 1.9653447281248484e-05, "loss": 1.2813, "step": 2370 }, { "epoch": 0.33479243151652077, "grad_norm": 3.800499338421693, "learning_rate": 1.9653049320280197e-05, "loss": 1.0285, "step": 2371 }, { "epoch": 0.33493363456650665, "grad_norm": 4.6797039905650255, "learning_rate": 1.9652651134979215e-05, "loss": 1.3236, "step": 2372 }, { "epoch": 0.33507483761649254, "grad_norm": 3.5132544721615155, "learning_rate": 1.9652252725354784e-05, "loss": 0.9121, "step": 2373 }, { "epoch": 0.3352160406664784, "grad_norm": 3.8595258647211748, "learning_rate": 1.9651854091416175e-05, "loss": 0.8959, "step": 2374 }, { "epoch": 0.33535724371646425, "grad_norm": 4.430499987359151, "learning_rate": 1.9651455233172643e-05, "loss": 1.0045, "step": 2375 }, { "epoch": 0.33549844676645013, "grad_norm": 4.401753502355797, "learning_rate": 1.965105615063346e-05, "loss": 1.1951, "step": 2376 }, { "epoch": 0.335639649816436, "grad_norm": 4.038971070082858, "learning_rate": 1.9650656843807897e-05, "loss": 1.0643, "step": 2377 }, { "epoch": 0.3357808528664219, "grad_norm": 3.4656831023291184, "learning_rate": 1.965025731270524e-05, "loss": 0.9263, "step": 2378 }, { "epoch": 0.3359220559164078, "grad_norm": 4.0485532273108795, "learning_rate": 1.964985755733477e-05, "loss": 1.186, "step": 2379 }, { "epoch": 0.33606325896639366, "grad_norm": 4.698114735175097, "learning_rate": 1.964945757770578e-05, "loss": 1.1756, "step": 2380 }, { "epoch": 0.33620446201637955, "grad_norm": 4.678917493787567, "learning_rate": 1.964905737382756e-05, "loss": 1.2601, "step": 2381 }, { "epoch": 0.33634566506636543, "grad_norm": 4.976761772979513, "learning_rate": 1.9648656945709413e-05, "loss": 1.2188, "step": 2382 }, { "epoch": 0.3364868681163513, "grad_norm": 4.433791513937911, "learning_rate": 1.9648256293360645e-05, "loss": 1.2602, "step": 2383 }, { "epoch": 0.3366280711663372, "grad_norm": 3.987883162727097, "learning_rate": 1.964785541679057e-05, "loss": 1.1293, "step": 2384 }, { "epoch": 0.3367692742163231, "grad_norm": 4.400476555565281, "learning_rate": 1.96474543160085e-05, "loss": 1.109, "step": 2385 }, { "epoch": 0.33691047726630896, "grad_norm": 4.3928186133433655, "learning_rate": 1.964705299102376e-05, "loss": 1.1372, "step": 2386 }, { "epoch": 0.33705168031629484, "grad_norm": 4.67497194054581, "learning_rate": 1.9646651441845676e-05, "loss": 1.1721, "step": 2387 }, { "epoch": 0.3371928833662807, "grad_norm": 3.5429347936901756, "learning_rate": 1.9646249668483575e-05, "loss": 0.8902, "step": 2388 }, { "epoch": 0.3373340864162666, "grad_norm": 3.6901184293780425, "learning_rate": 1.9645847670946798e-05, "loss": 0.9684, "step": 2389 }, { "epoch": 0.3374752894662525, "grad_norm": 4.719066839363746, "learning_rate": 1.964544544924469e-05, "loss": 0.9956, "step": 2390 }, { "epoch": 0.3376164925162384, "grad_norm": 4.40288359375305, "learning_rate": 1.9645043003386594e-05, "loss": 1.1719, "step": 2391 }, { "epoch": 0.3377576955662242, "grad_norm": 4.203912816884141, "learning_rate": 1.9644640333381862e-05, "loss": 1.3073, "step": 2392 }, { "epoch": 0.3378988986162101, "grad_norm": 4.6181023594890585, "learning_rate": 1.9644237439239853e-05, "loss": 1.2131, "step": 2393 }, { "epoch": 0.33804010166619597, "grad_norm": 4.160814448700728, "learning_rate": 1.964383432096993e-05, "loss": 1.1531, "step": 2394 }, { "epoch": 0.33818130471618185, "grad_norm": 3.6153190407914564, "learning_rate": 1.964343097858147e-05, "loss": 0.9677, "step": 2395 }, { "epoch": 0.33832250776616773, "grad_norm": 3.542416860970481, "learning_rate": 1.964302741208383e-05, "loss": 1.161, "step": 2396 }, { "epoch": 0.3384637108161536, "grad_norm": 4.076836005248402, "learning_rate": 1.9642623621486403e-05, "loss": 1.1216, "step": 2397 }, { "epoch": 0.3386049138661395, "grad_norm": 4.5017686573088165, "learning_rate": 1.9642219606798566e-05, "loss": 1.0349, "step": 2398 }, { "epoch": 0.3387461169161254, "grad_norm": 4.171949373603423, "learning_rate": 1.964181536802971e-05, "loss": 1.1737, "step": 2399 }, { "epoch": 0.33888731996611127, "grad_norm": 4.42170242437467, "learning_rate": 1.964141090518923e-05, "loss": 1.1504, "step": 2400 }, { "epoch": 0.33902852301609715, "grad_norm": 4.208929719352292, "learning_rate": 1.964100621828652e-05, "loss": 1.0181, "step": 2401 }, { "epoch": 0.33916972606608303, "grad_norm": 3.711429047834989, "learning_rate": 1.964060130733099e-05, "loss": 0.8933, "step": 2402 }, { "epoch": 0.3393109291160689, "grad_norm": 4.239570852500366, "learning_rate": 1.9640196172332053e-05, "loss": 1.1691, "step": 2403 }, { "epoch": 0.3394521321660548, "grad_norm": 4.016978412051192, "learning_rate": 1.963979081329912e-05, "loss": 1.0316, "step": 2404 }, { "epoch": 0.3395933352160407, "grad_norm": 4.267331213384971, "learning_rate": 1.9639385230241606e-05, "loss": 1.2225, "step": 2405 }, { "epoch": 0.33973453826602656, "grad_norm": 5.143212916056214, "learning_rate": 1.9638979423168948e-05, "loss": 1.2342, "step": 2406 }, { "epoch": 0.33987574131601245, "grad_norm": 4.558666127313283, "learning_rate": 1.9638573392090567e-05, "loss": 1.1004, "step": 2407 }, { "epoch": 0.34001694436599833, "grad_norm": 5.223102049528061, "learning_rate": 1.9638167137015905e-05, "loss": 1.0491, "step": 2408 }, { "epoch": 0.34015814741598416, "grad_norm": 4.379798037424821, "learning_rate": 1.96377606579544e-05, "loss": 0.9218, "step": 2409 }, { "epoch": 0.34029935046597004, "grad_norm": 3.48225427242103, "learning_rate": 1.9637353954915503e-05, "loss": 1.0881, "step": 2410 }, { "epoch": 0.3404405535159559, "grad_norm": 4.392059248251775, "learning_rate": 1.963694702790866e-05, "loss": 1.1077, "step": 2411 }, { "epoch": 0.3405817565659418, "grad_norm": 3.894676270930297, "learning_rate": 1.9636539876943325e-05, "loss": 1.119, "step": 2412 }, { "epoch": 0.3407229596159277, "grad_norm": 3.5318741043340327, "learning_rate": 1.963613250202897e-05, "loss": 0.8803, "step": 2413 }, { "epoch": 0.3408641626659136, "grad_norm": 3.998772412989041, "learning_rate": 1.9635724903175055e-05, "loss": 1.0134, "step": 2414 }, { "epoch": 0.34100536571589946, "grad_norm": 3.6201962319529755, "learning_rate": 1.9635317080391058e-05, "loss": 0.9789, "step": 2415 }, { "epoch": 0.34114656876588534, "grad_norm": 4.873649122840227, "learning_rate": 1.963490903368645e-05, "loss": 1.2571, "step": 2416 }, { "epoch": 0.3412877718158712, "grad_norm": 4.151730010185322, "learning_rate": 1.9634500763070718e-05, "loss": 1.1259, "step": 2417 }, { "epoch": 0.3414289748658571, "grad_norm": 4.785997092304954, "learning_rate": 1.963409226855335e-05, "loss": 1.119, "step": 2418 }, { "epoch": 0.341570177915843, "grad_norm": 4.770550207819993, "learning_rate": 1.963368355014384e-05, "loss": 0.9843, "step": 2419 }, { "epoch": 0.34171138096582887, "grad_norm": 3.8054131553863093, "learning_rate": 1.963327460785168e-05, "loss": 0.9707, "step": 2420 }, { "epoch": 0.34185258401581475, "grad_norm": 3.5526989167853733, "learning_rate": 1.963286544168638e-05, "loss": 0.8548, "step": 2421 }, { "epoch": 0.34199378706580064, "grad_norm": 3.7404251447877734, "learning_rate": 1.9632456051657448e-05, "loss": 1.0831, "step": 2422 }, { "epoch": 0.3421349901157865, "grad_norm": 4.07951445670617, "learning_rate": 1.96320464377744e-05, "loss": 0.9465, "step": 2423 }, { "epoch": 0.3422761931657724, "grad_norm": 4.883382515834337, "learning_rate": 1.9631636600046748e-05, "loss": 1.1575, "step": 2424 }, { "epoch": 0.3424173962157583, "grad_norm": 4.153052335524542, "learning_rate": 1.9631226538484026e-05, "loss": 1.1216, "step": 2425 }, { "epoch": 0.3425585992657441, "grad_norm": 4.717587171064485, "learning_rate": 1.9630816253095754e-05, "loss": 1.449, "step": 2426 }, { "epoch": 0.34269980231573, "grad_norm": 4.270216797248425, "learning_rate": 1.9630405743891475e-05, "loss": 1.0937, "step": 2427 }, { "epoch": 0.3428410053657159, "grad_norm": 4.39908829878803, "learning_rate": 1.9629995010880724e-05, "loss": 1.105, "step": 2428 }, { "epoch": 0.34298220841570176, "grad_norm": 3.832943972552059, "learning_rate": 1.9629584054073044e-05, "loss": 0.9623, "step": 2429 }, { "epoch": 0.34312341146568764, "grad_norm": 4.437313959133174, "learning_rate": 1.9629172873477995e-05, "loss": 1.2047, "step": 2430 }, { "epoch": 0.3432646145156735, "grad_norm": 4.225571839938346, "learning_rate": 1.9628761469105123e-05, "loss": 1.0078, "step": 2431 }, { "epoch": 0.3434058175656594, "grad_norm": 3.6150598916411023, "learning_rate": 1.9628349840963997e-05, "loss": 1.0849, "step": 2432 }, { "epoch": 0.3435470206156453, "grad_norm": 4.1367108536683865, "learning_rate": 1.9627937989064177e-05, "loss": 1.0606, "step": 2433 }, { "epoch": 0.3436882236656312, "grad_norm": 3.338367183512359, "learning_rate": 1.9627525913415234e-05, "loss": 0.8998, "step": 2434 }, { "epoch": 0.34382942671561706, "grad_norm": 4.1311151035671045, "learning_rate": 1.9627113614026746e-05, "loss": 1.0938, "step": 2435 }, { "epoch": 0.34397062976560294, "grad_norm": 3.541097384560549, "learning_rate": 1.96267010909083e-05, "loss": 0.9975, "step": 2436 }, { "epoch": 0.3441118328155888, "grad_norm": 3.732686121976509, "learning_rate": 1.9626288344069475e-05, "loss": 0.9095, "step": 2437 }, { "epoch": 0.3442530358655747, "grad_norm": 3.9669232928078713, "learning_rate": 1.9625875373519866e-05, "loss": 0.8914, "step": 2438 }, { "epoch": 0.3443942389155606, "grad_norm": 3.9068507063648377, "learning_rate": 1.962546217926907e-05, "loss": 1.298, "step": 2439 }, { "epoch": 0.3445354419655465, "grad_norm": 3.7603490566803184, "learning_rate": 1.962504876132669e-05, "loss": 1.1631, "step": 2440 }, { "epoch": 0.34467664501553236, "grad_norm": 4.974272913453476, "learning_rate": 1.9624635119702334e-05, "loss": 1.2221, "step": 2441 }, { "epoch": 0.34481784806551824, "grad_norm": 4.720861925473938, "learning_rate": 1.962422125440562e-05, "loss": 1.3398, "step": 2442 }, { "epoch": 0.34495905111550407, "grad_norm": 3.3495920264351304, "learning_rate": 1.9623807165446154e-05, "loss": 0.8139, "step": 2443 }, { "epoch": 0.34510025416548995, "grad_norm": 4.103388296732692, "learning_rate": 1.962339285283357e-05, "loss": 1.0345, "step": 2444 }, { "epoch": 0.34524145721547583, "grad_norm": 3.9204409495502492, "learning_rate": 1.9622978316577485e-05, "loss": 0.9395, "step": 2445 }, { "epoch": 0.3453826602654617, "grad_norm": 3.9245196864122502, "learning_rate": 1.9622563556687545e-05, "loss": 0.9193, "step": 2446 }, { "epoch": 0.3455238633154476, "grad_norm": 3.682812805172933, "learning_rate": 1.962214857317338e-05, "loss": 1.018, "step": 2447 }, { "epoch": 0.3456650663654335, "grad_norm": 3.772926890504689, "learning_rate": 1.9621733366044644e-05, "loss": 0.9365, "step": 2448 }, { "epoch": 0.34580626941541937, "grad_norm": 4.70117475693373, "learning_rate": 1.9621317935310973e-05, "loss": 1.288, "step": 2449 }, { "epoch": 0.34594747246540525, "grad_norm": 3.495948212276124, "learning_rate": 1.962090228098203e-05, "loss": 0.845, "step": 2450 }, { "epoch": 0.34608867551539113, "grad_norm": 3.9384807747329504, "learning_rate": 1.9620486403067477e-05, "loss": 0.9899, "step": 2451 }, { "epoch": 0.346229878565377, "grad_norm": 4.452608929216782, "learning_rate": 1.9620070301576974e-05, "loss": 1.3221, "step": 2452 }, { "epoch": 0.3463710816153629, "grad_norm": 4.721961785875029, "learning_rate": 1.9619653976520188e-05, "loss": 1.1992, "step": 2453 }, { "epoch": 0.3465122846653488, "grad_norm": 4.225339899243639, "learning_rate": 1.96192374279068e-05, "loss": 1.174, "step": 2454 }, { "epoch": 0.34665348771533466, "grad_norm": 4.681510562768214, "learning_rate": 1.9618820655746488e-05, "loss": 1.0914, "step": 2455 }, { "epoch": 0.34679469076532055, "grad_norm": 3.857565966113763, "learning_rate": 1.9618403660048937e-05, "loss": 1.0712, "step": 2456 }, { "epoch": 0.34693589381530643, "grad_norm": 4.147096171538478, "learning_rate": 1.961798644082384e-05, "loss": 1.0199, "step": 2457 }, { "epoch": 0.3470770968652923, "grad_norm": 4.034810878139281, "learning_rate": 1.9617568998080893e-05, "loss": 1.1382, "step": 2458 }, { "epoch": 0.3472182999152782, "grad_norm": 3.2545154726521117, "learning_rate": 1.9617151331829794e-05, "loss": 1.0045, "step": 2459 }, { "epoch": 0.347359502965264, "grad_norm": 3.5374421400406337, "learning_rate": 1.9616733442080253e-05, "loss": 0.9161, "step": 2460 }, { "epoch": 0.3475007060152499, "grad_norm": 3.763914069834324, "learning_rate": 1.9616315328841976e-05, "loss": 1.1935, "step": 2461 }, { "epoch": 0.3476419090652358, "grad_norm": 4.087782005852018, "learning_rate": 1.961589699212469e-05, "loss": 1.0329, "step": 2462 }, { "epoch": 0.34778311211522167, "grad_norm": 5.735757635104397, "learning_rate": 1.9615478431938104e-05, "loss": 1.4835, "step": 2463 }, { "epoch": 0.34792431516520755, "grad_norm": 3.716893499583309, "learning_rate": 1.9615059648291956e-05, "loss": 1.0032, "step": 2464 }, { "epoch": 0.34806551821519344, "grad_norm": 4.59197907100095, "learning_rate": 1.9614640641195975e-05, "loss": 1.0288, "step": 2465 }, { "epoch": 0.3482067212651793, "grad_norm": 3.8502591325942714, "learning_rate": 1.9614221410659896e-05, "loss": 1.0686, "step": 2466 }, { "epoch": 0.3483479243151652, "grad_norm": 3.3020466238325143, "learning_rate": 1.9613801956693463e-05, "loss": 0.819, "step": 2467 }, { "epoch": 0.3484891273651511, "grad_norm": 3.3968251428585967, "learning_rate": 1.9613382279306426e-05, "loss": 0.8916, "step": 2468 }, { "epoch": 0.34863033041513697, "grad_norm": 4.022462610854897, "learning_rate": 1.9612962378508534e-05, "loss": 1.1449, "step": 2469 }, { "epoch": 0.34877153346512285, "grad_norm": 4.732278945418171, "learning_rate": 1.961254225430955e-05, "loss": 1.2111, "step": 2470 }, { "epoch": 0.34891273651510873, "grad_norm": 3.334154516381029, "learning_rate": 1.9612121906719235e-05, "loss": 0.8986, "step": 2471 }, { "epoch": 0.3490539395650946, "grad_norm": 4.9019649014218505, "learning_rate": 1.961170133574736e-05, "loss": 1.3104, "step": 2472 }, { "epoch": 0.3491951426150805, "grad_norm": 4.0697191906026395, "learning_rate": 1.9611280541403695e-05, "loss": 1.1868, "step": 2473 }, { "epoch": 0.3493363456650664, "grad_norm": 4.382259789271648, "learning_rate": 1.961085952369802e-05, "loss": 1.2189, "step": 2474 }, { "epoch": 0.34947754871505227, "grad_norm": 5.153839945708991, "learning_rate": 1.961043828264012e-05, "loss": 1.2231, "step": 2475 }, { "epoch": 0.34961875176503815, "grad_norm": 4.110011128468003, "learning_rate": 1.9610016818239788e-05, "loss": 1.1495, "step": 2476 }, { "epoch": 0.349759954815024, "grad_norm": 4.014970773737679, "learning_rate": 1.960959513050681e-05, "loss": 1.2126, "step": 2477 }, { "epoch": 0.34990115786500986, "grad_norm": 3.619337629750043, "learning_rate": 1.9609173219450998e-05, "loss": 1.0152, "step": 2478 }, { "epoch": 0.35004236091499574, "grad_norm": 3.6679701293293405, "learning_rate": 1.9608751085082146e-05, "loss": 0.9868, "step": 2479 }, { "epoch": 0.3501835639649816, "grad_norm": 4.396825149745631, "learning_rate": 1.960832872741007e-05, "loss": 1.2014, "step": 2480 }, { "epoch": 0.3503247670149675, "grad_norm": 4.583431504551717, "learning_rate": 1.9607906146444582e-05, "loss": 1.2305, "step": 2481 }, { "epoch": 0.3504659700649534, "grad_norm": 4.378229825511286, "learning_rate": 1.9607483342195505e-05, "loss": 1.0974, "step": 2482 }, { "epoch": 0.3506071731149393, "grad_norm": 4.421448249337399, "learning_rate": 1.9607060314672667e-05, "loss": 1.1826, "step": 2483 }, { "epoch": 0.35074837616492516, "grad_norm": 4.339944923598533, "learning_rate": 1.9606637063885892e-05, "loss": 0.9886, "step": 2484 }, { "epoch": 0.35088957921491104, "grad_norm": 4.20171443855241, "learning_rate": 1.9606213589845024e-05, "loss": 1.2929, "step": 2485 }, { "epoch": 0.3510307822648969, "grad_norm": 4.095742315667121, "learning_rate": 1.9605789892559902e-05, "loss": 1.0634, "step": 2486 }, { "epoch": 0.3511719853148828, "grad_norm": 4.378674881740143, "learning_rate": 1.9605365972040368e-05, "loss": 1.0321, "step": 2487 }, { "epoch": 0.3513131883648687, "grad_norm": 3.82355548137674, "learning_rate": 1.9604941828296275e-05, "loss": 1.1205, "step": 2488 }, { "epoch": 0.35145439141485457, "grad_norm": 4.582665505244621, "learning_rate": 1.9604517461337486e-05, "loss": 1.1336, "step": 2489 }, { "epoch": 0.35159559446484046, "grad_norm": 3.3613136712063185, "learning_rate": 1.960409287117386e-05, "loss": 0.835, "step": 2490 }, { "epoch": 0.35173679751482634, "grad_norm": 4.080135591133288, "learning_rate": 1.9603668057815256e-05, "loss": 1.0141, "step": 2491 }, { "epoch": 0.3518780005648122, "grad_norm": 4.30818591145011, "learning_rate": 1.9603243021271562e-05, "loss": 1.2129, "step": 2492 }, { "epoch": 0.3520192036147981, "grad_norm": 4.939170149805019, "learning_rate": 1.9602817761552643e-05, "loss": 1.2198, "step": 2493 }, { "epoch": 0.352160406664784, "grad_norm": 3.7132704205976386, "learning_rate": 1.960239227866839e-05, "loss": 0.9295, "step": 2494 }, { "epoch": 0.3523016097147698, "grad_norm": 3.8435256272365126, "learning_rate": 1.9601966572628686e-05, "loss": 0.9807, "step": 2495 }, { "epoch": 0.3524428127647557, "grad_norm": 3.6867309441746183, "learning_rate": 1.9601540643443423e-05, "loss": 1.0148, "step": 2496 }, { "epoch": 0.3525840158147416, "grad_norm": 4.393645506878234, "learning_rate": 1.9601114491122506e-05, "loss": 1.3695, "step": 2497 }, { "epoch": 0.35272521886472746, "grad_norm": 4.22756129761658, "learning_rate": 1.960068811567583e-05, "loss": 1.0401, "step": 2498 }, { "epoch": 0.35286642191471335, "grad_norm": 4.120954476302948, "learning_rate": 1.9600261517113312e-05, "loss": 1.1519, "step": 2499 }, { "epoch": 0.35300762496469923, "grad_norm": 4.009628987003973, "learning_rate": 1.9599834695444863e-05, "loss": 1.224, "step": 2500 }, { "epoch": 0.3531488280146851, "grad_norm": 3.631119025903588, "learning_rate": 1.9599407650680397e-05, "loss": 1.0084, "step": 2501 }, { "epoch": 0.353290031064671, "grad_norm": 3.3999451652376758, "learning_rate": 1.9598980382829848e-05, "loss": 1.1239, "step": 2502 }, { "epoch": 0.3534312341146569, "grad_norm": 4.270937545689775, "learning_rate": 1.9598552891903136e-05, "loss": 1.1968, "step": 2503 }, { "epoch": 0.35357243716464276, "grad_norm": 3.692335770695556, "learning_rate": 1.9598125177910204e-05, "loss": 0.9371, "step": 2504 }, { "epoch": 0.35371364021462864, "grad_norm": 3.8537915759975303, "learning_rate": 1.9597697240860983e-05, "loss": 0.9721, "step": 2505 }, { "epoch": 0.3538548432646145, "grad_norm": 4.318282105536294, "learning_rate": 1.959726908076543e-05, "loss": 0.9592, "step": 2506 }, { "epoch": 0.3539960463146004, "grad_norm": 4.4525781298293134, "learning_rate": 1.9596840697633484e-05, "loss": 1.1773, "step": 2507 }, { "epoch": 0.3541372493645863, "grad_norm": 3.9137208499398444, "learning_rate": 1.9596412091475107e-05, "loss": 0.9661, "step": 2508 }, { "epoch": 0.3542784524145722, "grad_norm": 4.261246179836905, "learning_rate": 1.9595983262300253e-05, "loss": 1.1474, "step": 2509 }, { "epoch": 0.35441965546455806, "grad_norm": 4.117969827162626, "learning_rate": 1.9595554210118896e-05, "loss": 0.8618, "step": 2510 }, { "epoch": 0.35456085851454394, "grad_norm": 3.986497611485309, "learning_rate": 1.9595124934941002e-05, "loss": 0.9507, "step": 2511 }, { "epoch": 0.35470206156452977, "grad_norm": 4.896057161074228, "learning_rate": 1.9594695436776548e-05, "loss": 1.2692, "step": 2512 }, { "epoch": 0.35484326461451565, "grad_norm": 4.9254643946988805, "learning_rate": 1.9594265715635515e-05, "loss": 1.2371, "step": 2513 }, { "epoch": 0.35498446766450154, "grad_norm": 4.601486289516987, "learning_rate": 1.9593835771527893e-05, "loss": 1.245, "step": 2514 }, { "epoch": 0.3551256707144874, "grad_norm": 4.811085086261459, "learning_rate": 1.9593405604463668e-05, "loss": 1.3297, "step": 2515 }, { "epoch": 0.3552668737644733, "grad_norm": 4.224424165414707, "learning_rate": 1.9592975214452844e-05, "loss": 1.119, "step": 2516 }, { "epoch": 0.3554080768144592, "grad_norm": 4.3187961764729454, "learning_rate": 1.9592544601505414e-05, "loss": 1.2035, "step": 2517 }, { "epoch": 0.35554927986444507, "grad_norm": 4.173887095267237, "learning_rate": 1.959211376563139e-05, "loss": 1.2194, "step": 2518 }, { "epoch": 0.35569048291443095, "grad_norm": 4.000003415608054, "learning_rate": 1.959168270684079e-05, "loss": 1.0176, "step": 2519 }, { "epoch": 0.35583168596441683, "grad_norm": 3.5849309205398687, "learning_rate": 1.959125142514362e-05, "loss": 0.971, "step": 2520 }, { "epoch": 0.3559728890144027, "grad_norm": 3.8655917985040253, "learning_rate": 1.9590819920549912e-05, "loss": 1.0038, "step": 2521 }, { "epoch": 0.3561140920643886, "grad_norm": 3.788802087877977, "learning_rate": 1.959038819306969e-05, "loss": 1.0044, "step": 2522 }, { "epoch": 0.3562552951143745, "grad_norm": 3.9164012052205406, "learning_rate": 1.958995624271299e-05, "loss": 1.2183, "step": 2523 }, { "epoch": 0.35639649816436036, "grad_norm": 4.927990507645713, "learning_rate": 1.958952406948985e-05, "loss": 1.1799, "step": 2524 }, { "epoch": 0.35653770121434625, "grad_norm": 4.013795916940915, "learning_rate": 1.9589091673410306e-05, "loss": 1.1286, "step": 2525 }, { "epoch": 0.35667890426433213, "grad_norm": 4.119246315026301, "learning_rate": 1.9588659054484417e-05, "loss": 1.205, "step": 2526 }, { "epoch": 0.356820107314318, "grad_norm": 3.4665389519728422, "learning_rate": 1.9588226212722233e-05, "loss": 1.0516, "step": 2527 }, { "epoch": 0.3569613103643039, "grad_norm": 3.6728180128216215, "learning_rate": 1.958779314813381e-05, "loss": 1.0172, "step": 2528 }, { "epoch": 0.3571025134142897, "grad_norm": 4.353478614843127, "learning_rate": 1.958735986072922e-05, "loss": 1.1668, "step": 2529 }, { "epoch": 0.3572437164642756, "grad_norm": 4.296046552366659, "learning_rate": 1.9586926350518522e-05, "loss": 1.1394, "step": 2530 }, { "epoch": 0.3573849195142615, "grad_norm": 4.100489280404129, "learning_rate": 1.9586492617511797e-05, "loss": 1.1776, "step": 2531 }, { "epoch": 0.3575261225642474, "grad_norm": 3.898626603361294, "learning_rate": 1.9586058661719124e-05, "loss": 1.2663, "step": 2532 }, { "epoch": 0.35766732561423326, "grad_norm": 3.642687197006916, "learning_rate": 1.9585624483150588e-05, "loss": 0.8974, "step": 2533 }, { "epoch": 0.35780852866421914, "grad_norm": 6.302146407584119, "learning_rate": 1.9585190081816277e-05, "loss": 1.0959, "step": 2534 }, { "epoch": 0.357949731714205, "grad_norm": 3.4604169743002955, "learning_rate": 1.958475545772629e-05, "loss": 0.796, "step": 2535 }, { "epoch": 0.3580909347641909, "grad_norm": 3.742509145968615, "learning_rate": 1.958432061089073e-05, "loss": 0.867, "step": 2536 }, { "epoch": 0.3582321378141768, "grad_norm": 5.142661899346792, "learning_rate": 1.958388554131969e-05, "loss": 1.367, "step": 2537 }, { "epoch": 0.35837334086416267, "grad_norm": 5.111679361334132, "learning_rate": 1.9583450249023292e-05, "loss": 1.4026, "step": 2538 }, { "epoch": 0.35851454391414855, "grad_norm": 4.567735102027591, "learning_rate": 1.958301473401165e-05, "loss": 1.1452, "step": 2539 }, { "epoch": 0.35865574696413444, "grad_norm": 3.5906392011566757, "learning_rate": 1.9582578996294882e-05, "loss": 0.8824, "step": 2540 }, { "epoch": 0.3587969500141203, "grad_norm": 4.614562551359501, "learning_rate": 1.958214303588312e-05, "loss": 1.1862, "step": 2541 }, { "epoch": 0.3589381530641062, "grad_norm": 4.6707418988975675, "learning_rate": 1.9581706852786492e-05, "loss": 1.1986, "step": 2542 }, { "epoch": 0.3590793561140921, "grad_norm": 3.6527111195083934, "learning_rate": 1.958127044701513e-05, "loss": 0.9403, "step": 2543 }, { "epoch": 0.35922055916407797, "grad_norm": 3.930119969334276, "learning_rate": 1.9580833818579184e-05, "loss": 1.067, "step": 2544 }, { "epoch": 0.35936176221406385, "grad_norm": 4.699400713506971, "learning_rate": 1.9580396967488797e-05, "loss": 1.1573, "step": 2545 }, { "epoch": 0.3595029652640497, "grad_norm": 4.143347786517, "learning_rate": 1.9579959893754123e-05, "loss": 1.1486, "step": 2546 }, { "epoch": 0.35964416831403556, "grad_norm": 4.346619719095544, "learning_rate": 1.9579522597385315e-05, "loss": 1.1658, "step": 2547 }, { "epoch": 0.35978537136402144, "grad_norm": 3.9050640348692705, "learning_rate": 1.9579085078392543e-05, "loss": 1.1804, "step": 2548 }, { "epoch": 0.3599265744140073, "grad_norm": 4.029578756898514, "learning_rate": 1.957864733678597e-05, "loss": 1.1095, "step": 2549 }, { "epoch": 0.3600677774639932, "grad_norm": 4.289717306795405, "learning_rate": 1.9578209372575766e-05, "loss": 1.2339, "step": 2550 }, { "epoch": 0.3602089805139791, "grad_norm": 4.273513100374371, "learning_rate": 1.9577771185772118e-05, "loss": 1.1185, "step": 2551 }, { "epoch": 0.360350183563965, "grad_norm": 4.848127197146267, "learning_rate": 1.95773327763852e-05, "loss": 1.0992, "step": 2552 }, { "epoch": 0.36049138661395086, "grad_norm": 5.602086404859899, "learning_rate": 1.957689414442521e-05, "loss": 1.2741, "step": 2553 }, { "epoch": 0.36063258966393674, "grad_norm": 4.350079135573795, "learning_rate": 1.9576455289902327e-05, "loss": 1.0776, "step": 2554 }, { "epoch": 0.3607737927139226, "grad_norm": 3.527970556189663, "learning_rate": 1.9576016212826766e-05, "loss": 0.9498, "step": 2555 }, { "epoch": 0.3609149957639085, "grad_norm": 4.241281288615647, "learning_rate": 1.9575576913208718e-05, "loss": 1.049, "step": 2556 }, { "epoch": 0.3610561988138944, "grad_norm": 4.402847496567008, "learning_rate": 1.9575137391058404e-05, "loss": 1.0306, "step": 2557 }, { "epoch": 0.3611974018638803, "grad_norm": 4.239163576663033, "learning_rate": 1.9574697646386027e-05, "loss": 1.1279, "step": 2558 }, { "epoch": 0.36133860491386616, "grad_norm": 3.6269966704917787, "learning_rate": 1.9574257679201814e-05, "loss": 0.7925, "step": 2559 }, { "epoch": 0.36147980796385204, "grad_norm": 4.064863474621262, "learning_rate": 1.957381748951599e-05, "loss": 1.0842, "step": 2560 }, { "epoch": 0.3616210110138379, "grad_norm": 3.963808849874696, "learning_rate": 1.957337707733878e-05, "loss": 1.0652, "step": 2561 }, { "epoch": 0.3617622140638238, "grad_norm": 4.756840585174615, "learning_rate": 1.9572936442680417e-05, "loss": 1.1014, "step": 2562 }, { "epoch": 0.36190341711380963, "grad_norm": 5.005031496742946, "learning_rate": 1.957249558555115e-05, "loss": 1.3319, "step": 2563 }, { "epoch": 0.3620446201637955, "grad_norm": 4.341396189894744, "learning_rate": 1.957205450596122e-05, "loss": 1.0208, "step": 2564 }, { "epoch": 0.3621858232137814, "grad_norm": 4.656639418326444, "learning_rate": 1.9571613203920874e-05, "loss": 1.1131, "step": 2565 }, { "epoch": 0.3623270262637673, "grad_norm": 6.44905363207047, "learning_rate": 1.9571171679440374e-05, "loss": 1.2414, "step": 2566 }, { "epoch": 0.36246822931375317, "grad_norm": 4.048724455435908, "learning_rate": 1.9570729932529974e-05, "loss": 0.9768, "step": 2567 }, { "epoch": 0.36260943236373905, "grad_norm": 4.011691898365749, "learning_rate": 1.9570287963199947e-05, "loss": 1.0937, "step": 2568 }, { "epoch": 0.36275063541372493, "grad_norm": 3.9083180934002293, "learning_rate": 1.956984577146056e-05, "loss": 0.9612, "step": 2569 }, { "epoch": 0.3628918384637108, "grad_norm": 4.689141875202764, "learning_rate": 1.956940335732209e-05, "loss": 1.1409, "step": 2570 }, { "epoch": 0.3630330415136967, "grad_norm": 4.992250499224746, "learning_rate": 1.956896072079482e-05, "loss": 1.3679, "step": 2571 }, { "epoch": 0.3631742445636826, "grad_norm": 3.666532753766148, "learning_rate": 1.9568517861889035e-05, "loss": 1.0293, "step": 2572 }, { "epoch": 0.36331544761366846, "grad_norm": 4.994677846394718, "learning_rate": 1.9568074780615026e-05, "loss": 0.9653, "step": 2573 }, { "epoch": 0.36345665066365435, "grad_norm": 4.320697678110346, "learning_rate": 1.9567631476983088e-05, "loss": 1.1125, "step": 2574 }, { "epoch": 0.36359785371364023, "grad_norm": 4.166960567512201, "learning_rate": 1.9567187951003533e-05, "loss": 1.0624, "step": 2575 }, { "epoch": 0.3637390567636261, "grad_norm": 4.226767354323393, "learning_rate": 1.9566744202686657e-05, "loss": 1.0811, "step": 2576 }, { "epoch": 0.363880259813612, "grad_norm": 4.176141665471873, "learning_rate": 1.9566300232042778e-05, "loss": 1.0234, "step": 2577 }, { "epoch": 0.3640214628635979, "grad_norm": 4.677846483539329, "learning_rate": 1.9565856039082213e-05, "loss": 1.194, "step": 2578 }, { "epoch": 0.36416266591358376, "grad_norm": 3.7442466210235854, "learning_rate": 1.9565411623815287e-05, "loss": 0.9986, "step": 2579 }, { "epoch": 0.3643038689635696, "grad_norm": 4.213189036260175, "learning_rate": 1.9564966986252326e-05, "loss": 1.075, "step": 2580 }, { "epoch": 0.36444507201355547, "grad_norm": 4.168082144709891, "learning_rate": 1.956452212640366e-05, "loss": 1.087, "step": 2581 }, { "epoch": 0.36458627506354135, "grad_norm": 3.6924693608572676, "learning_rate": 1.956407704427963e-05, "loss": 0.9708, "step": 2582 }, { "epoch": 0.36472747811352724, "grad_norm": 5.398897823996847, "learning_rate": 1.9563631739890586e-05, "loss": 1.2565, "step": 2583 }, { "epoch": 0.3648686811635131, "grad_norm": 3.9430816585498403, "learning_rate": 1.9563186213246864e-05, "loss": 1.138, "step": 2584 }, { "epoch": 0.365009884213499, "grad_norm": 4.640374438413612, "learning_rate": 1.9562740464358828e-05, "loss": 1.2093, "step": 2585 }, { "epoch": 0.3651510872634849, "grad_norm": 3.6057963344897535, "learning_rate": 1.9562294493236834e-05, "loss": 1.0812, "step": 2586 }, { "epoch": 0.36529229031347077, "grad_norm": 4.841710827043427, "learning_rate": 1.9561848299891243e-05, "loss": 1.105, "step": 2587 }, { "epoch": 0.36543349336345665, "grad_norm": 4.159329379608884, "learning_rate": 1.9561401884332424e-05, "loss": 1.0502, "step": 2588 }, { "epoch": 0.36557469641344253, "grad_norm": 3.4732234623733, "learning_rate": 1.956095524657076e-05, "loss": 0.7829, "step": 2589 }, { "epoch": 0.3657158994634284, "grad_norm": 3.989978056712313, "learning_rate": 1.9560508386616624e-05, "loss": 1.3175, "step": 2590 }, { "epoch": 0.3658571025134143, "grad_norm": 4.562216225358608, "learning_rate": 1.95600613044804e-05, "loss": 1.0369, "step": 2591 }, { "epoch": 0.3659983055634002, "grad_norm": 3.758237079390016, "learning_rate": 1.9559614000172483e-05, "loss": 0.918, "step": 2592 }, { "epoch": 0.36613950861338607, "grad_norm": 3.9128640295803243, "learning_rate": 1.9559166473703265e-05, "loss": 0.952, "step": 2593 }, { "epoch": 0.36628071166337195, "grad_norm": 4.297107498774017, "learning_rate": 1.9558718725083143e-05, "loss": 1.1633, "step": 2594 }, { "epoch": 0.36642191471335783, "grad_norm": 5.802336557122011, "learning_rate": 1.9558270754322528e-05, "loss": 1.4975, "step": 2595 }, { "epoch": 0.3665631177633437, "grad_norm": 3.9028995835946576, "learning_rate": 1.955782256143183e-05, "loss": 1.039, "step": 2596 }, { "epoch": 0.36670432081332954, "grad_norm": 3.1703449733640925, "learning_rate": 1.9557374146421462e-05, "loss": 0.7915, "step": 2597 }, { "epoch": 0.3668455238633154, "grad_norm": 4.295446399763511, "learning_rate": 1.9556925509301844e-05, "loss": 1.258, "step": 2598 }, { "epoch": 0.3669867269133013, "grad_norm": 4.116456735944132, "learning_rate": 1.9556476650083407e-05, "loss": 1.2132, "step": 2599 }, { "epoch": 0.3671279299632872, "grad_norm": 3.972161913324547, "learning_rate": 1.9556027568776577e-05, "loss": 1.1161, "step": 2600 }, { "epoch": 0.3672691330132731, "grad_norm": 7.778899117627368, "learning_rate": 1.9555578265391797e-05, "loss": 1.0107, "step": 2601 }, { "epoch": 0.36741033606325896, "grad_norm": 3.773900012317438, "learning_rate": 1.9555128739939504e-05, "loss": 1.056, "step": 2602 }, { "epoch": 0.36755153911324484, "grad_norm": 4.70357587726098, "learning_rate": 1.9554678992430145e-05, "loss": 1.222, "step": 2603 }, { "epoch": 0.3676927421632307, "grad_norm": 4.455645668662674, "learning_rate": 1.9554229022874175e-05, "loss": 1.0587, "step": 2604 }, { "epoch": 0.3678339452132166, "grad_norm": 3.864786247438436, "learning_rate": 1.9553778831282043e-05, "loss": 1.2616, "step": 2605 }, { "epoch": 0.3679751482632025, "grad_norm": 4.404179756595742, "learning_rate": 1.9553328417664223e-05, "loss": 1.2918, "step": 2606 }, { "epoch": 0.3681163513131884, "grad_norm": 3.732342746674166, "learning_rate": 1.9552877782031172e-05, "loss": 0.9101, "step": 2607 }, { "epoch": 0.36825755436317426, "grad_norm": 3.6330542339800393, "learning_rate": 1.9552426924393368e-05, "loss": 1.1368, "step": 2608 }, { "epoch": 0.36839875741316014, "grad_norm": 3.8957792385974126, "learning_rate": 1.955197584476129e-05, "loss": 0.9992, "step": 2609 }, { "epoch": 0.368539960463146, "grad_norm": 4.755621887955596, "learning_rate": 1.9551524543145417e-05, "loss": 1.1568, "step": 2610 }, { "epoch": 0.3686811635131319, "grad_norm": 3.6228377694934077, "learning_rate": 1.955107301955624e-05, "loss": 0.9163, "step": 2611 }, { "epoch": 0.3688223665631178, "grad_norm": 3.7523259538633744, "learning_rate": 1.9550621274004248e-05, "loss": 1.082, "step": 2612 }, { "epoch": 0.36896356961310367, "grad_norm": 4.395839233243182, "learning_rate": 1.9550169306499942e-05, "loss": 1.2092, "step": 2613 }, { "epoch": 0.3691047726630895, "grad_norm": 3.7344739937623537, "learning_rate": 1.9549717117053828e-05, "loss": 0.9991, "step": 2614 }, { "epoch": 0.3692459757130754, "grad_norm": 3.3198120065949817, "learning_rate": 1.954926470567641e-05, "loss": 0.7808, "step": 2615 }, { "epoch": 0.36938717876306126, "grad_norm": 3.8017412632461256, "learning_rate": 1.9548812072378208e-05, "loss": 1.0538, "step": 2616 }, { "epoch": 0.36952838181304715, "grad_norm": 4.015647756324, "learning_rate": 1.9548359217169732e-05, "loss": 1.0941, "step": 2617 }, { "epoch": 0.36966958486303303, "grad_norm": 4.309437839698541, "learning_rate": 1.9547906140061515e-05, "loss": 1.0449, "step": 2618 }, { "epoch": 0.3698107879130189, "grad_norm": 4.255473609926496, "learning_rate": 1.9547452841064083e-05, "loss": 1.1466, "step": 2619 }, { "epoch": 0.3699519909630048, "grad_norm": 4.757163261565678, "learning_rate": 1.9546999320187966e-05, "loss": 0.8497, "step": 2620 }, { "epoch": 0.3700931940129907, "grad_norm": 4.03713648622002, "learning_rate": 1.9546545577443715e-05, "loss": 0.901, "step": 2621 }, { "epoch": 0.37023439706297656, "grad_norm": 3.795108762358568, "learning_rate": 1.954609161284186e-05, "loss": 1.1363, "step": 2622 }, { "epoch": 0.37037560011296244, "grad_norm": 3.9216056360652853, "learning_rate": 1.9545637426392966e-05, "loss": 1.1115, "step": 2623 }, { "epoch": 0.3705168031629483, "grad_norm": 4.842391888666367, "learning_rate": 1.9545183018107576e-05, "loss": 1.2379, "step": 2624 }, { "epoch": 0.3706580062129342, "grad_norm": 5.09348271062579, "learning_rate": 1.9544728387996255e-05, "loss": 1.2357, "step": 2625 }, { "epoch": 0.3707992092629201, "grad_norm": 4.25666053926506, "learning_rate": 1.9544273536069573e-05, "loss": 1.0311, "step": 2626 }, { "epoch": 0.370940412312906, "grad_norm": 3.7601192077035965, "learning_rate": 1.9543818462338088e-05, "loss": 0.9642, "step": 2627 }, { "epoch": 0.37108161536289186, "grad_norm": 3.586899463771343, "learning_rate": 1.9543363166812387e-05, "loss": 1.003, "step": 2628 }, { "epoch": 0.37122281841287774, "grad_norm": 4.016519845216909, "learning_rate": 1.954290764950305e-05, "loss": 1.1244, "step": 2629 }, { "epoch": 0.3713640214628636, "grad_norm": 4.197927373014517, "learning_rate": 1.9542451910420655e-05, "loss": 1.0522, "step": 2630 }, { "epoch": 0.37150522451284945, "grad_norm": 4.28277985995062, "learning_rate": 1.9541995949575806e-05, "loss": 0.9575, "step": 2631 }, { "epoch": 0.37164642756283534, "grad_norm": 3.853798088707334, "learning_rate": 1.9541539766979087e-05, "loss": 0.9294, "step": 2632 }, { "epoch": 0.3717876306128212, "grad_norm": 3.5723717464735714, "learning_rate": 1.9541083362641105e-05, "loss": 0.9443, "step": 2633 }, { "epoch": 0.3719288336628071, "grad_norm": 4.3969911517167874, "learning_rate": 1.954062673657247e-05, "loss": 0.9765, "step": 2634 }, { "epoch": 0.372070036712793, "grad_norm": 3.8952237306387794, "learning_rate": 1.9540169888783786e-05, "loss": 1.0487, "step": 2635 }, { "epoch": 0.37221123976277887, "grad_norm": 4.154113295143288, "learning_rate": 1.9539712819285674e-05, "loss": 1.1628, "step": 2636 }, { "epoch": 0.37235244281276475, "grad_norm": 3.7128959034966704, "learning_rate": 1.9539255528088757e-05, "loss": 0.969, "step": 2637 }, { "epoch": 0.37249364586275063, "grad_norm": 4.08440270371656, "learning_rate": 1.953879801520366e-05, "loss": 1.0588, "step": 2638 }, { "epoch": 0.3726348489127365, "grad_norm": 3.862995324890109, "learning_rate": 1.9538340280641018e-05, "loss": 1.0782, "step": 2639 }, { "epoch": 0.3727760519627224, "grad_norm": 3.8558077011851215, "learning_rate": 1.953788232441147e-05, "loss": 1.0931, "step": 2640 }, { "epoch": 0.3729172550127083, "grad_norm": 4.58102720276026, "learning_rate": 1.953742414652565e-05, "loss": 1.1056, "step": 2641 }, { "epoch": 0.37305845806269416, "grad_norm": 4.374194040456852, "learning_rate": 1.9536965746994213e-05, "loss": 1.0056, "step": 2642 }, { "epoch": 0.37319966111268005, "grad_norm": 3.498352355097033, "learning_rate": 1.9536507125827812e-05, "loss": 0.9966, "step": 2643 }, { "epoch": 0.37334086416266593, "grad_norm": 3.532055317243616, "learning_rate": 1.9536048283037105e-05, "loss": 0.9219, "step": 2644 }, { "epoch": 0.3734820672126518, "grad_norm": 3.9817641631313383, "learning_rate": 1.9535589218632753e-05, "loss": 1.182, "step": 2645 }, { "epoch": 0.3736232702626377, "grad_norm": 5.785004833342896, "learning_rate": 1.9535129932625425e-05, "loss": 1.0559, "step": 2646 }, { "epoch": 0.3737644733126236, "grad_norm": 4.319712805816962, "learning_rate": 1.9534670425025797e-05, "loss": 1.1583, "step": 2647 }, { "epoch": 0.3739056763626094, "grad_norm": 3.80630344736385, "learning_rate": 1.9534210695844543e-05, "loss": 1.1674, "step": 2648 }, { "epoch": 0.3740468794125953, "grad_norm": 3.3294615444271836, "learning_rate": 1.953375074509235e-05, "loss": 0.9583, "step": 2649 }, { "epoch": 0.3741880824625812, "grad_norm": 3.7984871568571092, "learning_rate": 1.9533290572779912e-05, "loss": 1.0346, "step": 2650 }, { "epoch": 0.37432928551256706, "grad_norm": 4.322781365966304, "learning_rate": 1.9532830178917915e-05, "loss": 1.4114, "step": 2651 }, { "epoch": 0.37447048856255294, "grad_norm": 4.20493116503359, "learning_rate": 1.9532369563517066e-05, "loss": 1.2394, "step": 2652 }, { "epoch": 0.3746116916125388, "grad_norm": 3.7573858692757427, "learning_rate": 1.9531908726588054e-05, "loss": 0.9712, "step": 2653 }, { "epoch": 0.3747528946625247, "grad_norm": 3.5038034186922222, "learning_rate": 1.953144766814161e-05, "loss": 0.8316, "step": 2654 }, { "epoch": 0.3748940977125106, "grad_norm": 4.42422056042016, "learning_rate": 1.9530986388188435e-05, "loss": 1.225, "step": 2655 }, { "epoch": 0.37503530076249647, "grad_norm": 3.5229286643543594, "learning_rate": 1.9530524886739254e-05, "loss": 1.0534, "step": 2656 }, { "epoch": 0.37517650381248235, "grad_norm": 4.421760015644147, "learning_rate": 1.9530063163804788e-05, "loss": 1.1626, "step": 2657 }, { "epoch": 0.37531770686246824, "grad_norm": 4.234414019437061, "learning_rate": 1.952960121939577e-05, "loss": 1.1658, "step": 2658 }, { "epoch": 0.3754589099124541, "grad_norm": 3.407028559838957, "learning_rate": 1.9529139053522937e-05, "loss": 0.9492, "step": 2659 }, { "epoch": 0.37560011296244, "grad_norm": 3.912165594715894, "learning_rate": 1.9528676666197026e-05, "loss": 1.1739, "step": 2660 }, { "epoch": 0.3757413160124259, "grad_norm": 4.051297316817544, "learning_rate": 1.9528214057428785e-05, "loss": 1.0469, "step": 2661 }, { "epoch": 0.37588251906241177, "grad_norm": 4.308649961399725, "learning_rate": 1.9527751227228964e-05, "loss": 1.3052, "step": 2662 }, { "epoch": 0.37602372211239765, "grad_norm": 4.089780724893491, "learning_rate": 1.952728817560832e-05, "loss": 1.0246, "step": 2663 }, { "epoch": 0.37616492516238353, "grad_norm": 3.125357231604796, "learning_rate": 1.9526824902577614e-05, "loss": 0.8718, "step": 2664 }, { "epoch": 0.37630612821236936, "grad_norm": 4.949633516736115, "learning_rate": 1.952636140814761e-05, "loss": 1.1369, "step": 2665 }, { "epoch": 0.37644733126235524, "grad_norm": 3.9762433984093333, "learning_rate": 1.9525897692329082e-05, "loss": 1.0834, "step": 2666 }, { "epoch": 0.3765885343123411, "grad_norm": 4.659754245044144, "learning_rate": 1.9525433755132805e-05, "loss": 1.3132, "step": 2667 }, { "epoch": 0.376729737362327, "grad_norm": 4.726953798130423, "learning_rate": 1.952496959656956e-05, "loss": 1.1566, "step": 2668 }, { "epoch": 0.3768709404123129, "grad_norm": 3.778798035419523, "learning_rate": 1.9524505216650136e-05, "loss": 1.0133, "step": 2669 }, { "epoch": 0.3770121434622988, "grad_norm": 4.003925433875248, "learning_rate": 1.9524040615385324e-05, "loss": 0.9351, "step": 2670 }, { "epoch": 0.37715334651228466, "grad_norm": 3.6668973005269336, "learning_rate": 1.9523575792785924e-05, "loss": 1.0866, "step": 2671 }, { "epoch": 0.37729454956227054, "grad_norm": 4.696904301565168, "learning_rate": 1.9523110748862733e-05, "loss": 1.3557, "step": 2672 }, { "epoch": 0.3774357526122564, "grad_norm": 3.9070941376213524, "learning_rate": 1.9522645483626558e-05, "loss": 0.9807, "step": 2673 }, { "epoch": 0.3775769556622423, "grad_norm": 4.328223610242269, "learning_rate": 1.952217999708822e-05, "loss": 1.1334, "step": 2674 }, { "epoch": 0.3777181587122282, "grad_norm": 3.8758848340098866, "learning_rate": 1.9521714289258527e-05, "loss": 1.0521, "step": 2675 }, { "epoch": 0.3778593617622141, "grad_norm": 4.594226276400707, "learning_rate": 1.952124836014831e-05, "loss": 1.1198, "step": 2676 }, { "epoch": 0.37800056481219996, "grad_norm": 3.355492383155113, "learning_rate": 1.952078220976839e-05, "loss": 0.8233, "step": 2677 }, { "epoch": 0.37814176786218584, "grad_norm": 4.039550637523467, "learning_rate": 1.9520315838129602e-05, "loss": 1.1269, "step": 2678 }, { "epoch": 0.3782829709121717, "grad_norm": 3.864121688104095, "learning_rate": 1.951984924524279e-05, "loss": 0.9341, "step": 2679 }, { "epoch": 0.3784241739621576, "grad_norm": 5.292133876301585, "learning_rate": 1.951938243111879e-05, "loss": 1.5232, "step": 2680 }, { "epoch": 0.3785653770121435, "grad_norm": 4.296730531760853, "learning_rate": 1.9518915395768455e-05, "loss": 1.179, "step": 2681 }, { "epoch": 0.3787065800621293, "grad_norm": 3.870423010863545, "learning_rate": 1.9518448139202632e-05, "loss": 1.0432, "step": 2682 }, { "epoch": 0.3788477831121152, "grad_norm": 3.5768396943707743, "learning_rate": 1.951798066143219e-05, "loss": 0.9423, "step": 2683 }, { "epoch": 0.3789889861621011, "grad_norm": 4.086096918811555, "learning_rate": 1.9517512962467987e-05, "loss": 1.0197, "step": 2684 }, { "epoch": 0.37913018921208697, "grad_norm": 4.918901831231184, "learning_rate": 1.9517045042320893e-05, "loss": 1.2252, "step": 2685 }, { "epoch": 0.37927139226207285, "grad_norm": 3.9410263627522113, "learning_rate": 1.951657690100178e-05, "loss": 1.0439, "step": 2686 }, { "epoch": 0.37941259531205873, "grad_norm": 4.779836024281969, "learning_rate": 1.951610853852153e-05, "loss": 1.1313, "step": 2687 }, { "epoch": 0.3795537983620446, "grad_norm": 3.622061123635568, "learning_rate": 1.951563995489103e-05, "loss": 0.997, "step": 2688 }, { "epoch": 0.3796950014120305, "grad_norm": 4.169836329608204, "learning_rate": 1.9515171150121167e-05, "loss": 1.3071, "step": 2689 }, { "epoch": 0.3798362044620164, "grad_norm": 4.374344681431315, "learning_rate": 1.9514702124222837e-05, "loss": 1.0597, "step": 2690 }, { "epoch": 0.37997740751200226, "grad_norm": 3.6603412495739334, "learning_rate": 1.9514232877206932e-05, "loss": 1.0816, "step": 2691 }, { "epoch": 0.38011861056198815, "grad_norm": 4.232412934918964, "learning_rate": 1.951376340908437e-05, "loss": 1.0337, "step": 2692 }, { "epoch": 0.38025981361197403, "grad_norm": 3.9488707622913743, "learning_rate": 1.9513293719866054e-05, "loss": 0.9889, "step": 2693 }, { "epoch": 0.3804010166619599, "grad_norm": 4.001883021703839, "learning_rate": 1.95128238095629e-05, "loss": 0.8783, "step": 2694 }, { "epoch": 0.3805422197119458, "grad_norm": 4.174075182615598, "learning_rate": 1.9512353678185828e-05, "loss": 1.0153, "step": 2695 }, { "epoch": 0.3806834227619317, "grad_norm": 3.516864017477731, "learning_rate": 1.9511883325745767e-05, "loss": 0.932, "step": 2696 }, { "epoch": 0.38082462581191756, "grad_norm": 3.770791065856057, "learning_rate": 1.9511412752253644e-05, "loss": 1.0378, "step": 2697 }, { "epoch": 0.38096582886190344, "grad_norm": 3.4299240307939307, "learning_rate": 1.9510941957720396e-05, "loss": 0.8851, "step": 2698 }, { "epoch": 0.38110703191188927, "grad_norm": 4.4304058437051435, "learning_rate": 1.9510470942156963e-05, "loss": 1.1107, "step": 2699 }, { "epoch": 0.38124823496187515, "grad_norm": 3.965970069558552, "learning_rate": 1.9509999705574293e-05, "loss": 0.8162, "step": 2700 }, { "epoch": 0.38138943801186104, "grad_norm": 5.602704184742451, "learning_rate": 1.950952824798334e-05, "loss": 1.3913, "step": 2701 }, { "epoch": 0.3815306410618469, "grad_norm": 4.553262964596167, "learning_rate": 1.950905656939505e-05, "loss": 1.0868, "step": 2702 }, { "epoch": 0.3816718441118328, "grad_norm": 4.421385307887014, "learning_rate": 1.95085846698204e-05, "loss": 0.9162, "step": 2703 }, { "epoch": 0.3818130471618187, "grad_norm": 3.7775288617615512, "learning_rate": 1.9508112549270346e-05, "loss": 1.0031, "step": 2704 }, { "epoch": 0.38195425021180457, "grad_norm": 4.1441089250265435, "learning_rate": 1.9507640207755863e-05, "loss": 0.9769, "step": 2705 }, { "epoch": 0.38209545326179045, "grad_norm": 4.938961726624924, "learning_rate": 1.9507167645287926e-05, "loss": 1.1947, "step": 2706 }, { "epoch": 0.38223665631177633, "grad_norm": 3.9623101715891833, "learning_rate": 1.950669486187752e-05, "loss": 1.1082, "step": 2707 }, { "epoch": 0.3823778593617622, "grad_norm": 4.532783941100174, "learning_rate": 1.950622185753563e-05, "loss": 1.3035, "step": 2708 }, { "epoch": 0.3825190624117481, "grad_norm": 4.338254266892728, "learning_rate": 1.950574863227325e-05, "loss": 0.9905, "step": 2709 }, { "epoch": 0.382660265461734, "grad_norm": 3.778152467797074, "learning_rate": 1.9505275186101378e-05, "loss": 1.0297, "step": 2710 }, { "epoch": 0.38280146851171987, "grad_norm": 4.4851040542862926, "learning_rate": 1.9504801519031015e-05, "loss": 1.2811, "step": 2711 }, { "epoch": 0.38294267156170575, "grad_norm": 3.9568407045620857, "learning_rate": 1.950432763107317e-05, "loss": 1.0075, "step": 2712 }, { "epoch": 0.38308387461169163, "grad_norm": 3.7533386973845566, "learning_rate": 1.950385352223885e-05, "loss": 1.0422, "step": 2713 }, { "epoch": 0.3832250776616775, "grad_norm": 4.265347833194422, "learning_rate": 1.9503379192539086e-05, "loss": 0.9976, "step": 2714 }, { "epoch": 0.3833662807116634, "grad_norm": 3.8381243027550553, "learning_rate": 1.950290464198489e-05, "loss": 1.0491, "step": 2715 }, { "epoch": 0.3835074837616492, "grad_norm": 4.470390815506896, "learning_rate": 1.9502429870587295e-05, "loss": 1.0129, "step": 2716 }, { "epoch": 0.3836486868116351, "grad_norm": 4.593178100202923, "learning_rate": 1.9501954878357335e-05, "loss": 1.131, "step": 2717 }, { "epoch": 0.383789889861621, "grad_norm": 5.430329039521456, "learning_rate": 1.9501479665306046e-05, "loss": 1.0151, "step": 2718 }, { "epoch": 0.3839310929116069, "grad_norm": 5.051734937013839, "learning_rate": 1.9501004231444475e-05, "loss": 1.1075, "step": 2719 }, { "epoch": 0.38407229596159276, "grad_norm": 4.615079885648788, "learning_rate": 1.9500528576783667e-05, "loss": 1.1841, "step": 2720 }, { "epoch": 0.38421349901157864, "grad_norm": 4.337096240779647, "learning_rate": 1.9500052701334676e-05, "loss": 1.1719, "step": 2721 }, { "epoch": 0.3843547020615645, "grad_norm": 3.3863351116251525, "learning_rate": 1.9499576605108564e-05, "loss": 0.8904, "step": 2722 }, { "epoch": 0.3844959051115504, "grad_norm": 4.460291499387634, "learning_rate": 1.9499100288116395e-05, "loss": 1.1331, "step": 2723 }, { "epoch": 0.3846371081615363, "grad_norm": 3.9318462124993157, "learning_rate": 1.949862375036924e-05, "loss": 1.0726, "step": 2724 }, { "epoch": 0.3847783112115222, "grad_norm": 3.7766029512745964, "learning_rate": 1.9498146991878168e-05, "loss": 0.9912, "step": 2725 }, { "epoch": 0.38491951426150806, "grad_norm": 4.4911537459112765, "learning_rate": 1.949767001265426e-05, "loss": 0.7385, "step": 2726 }, { "epoch": 0.38506071731149394, "grad_norm": 3.977308316926048, "learning_rate": 1.9497192812708606e-05, "loss": 1.0132, "step": 2727 }, { "epoch": 0.3852019203614798, "grad_norm": 4.581931776307272, "learning_rate": 1.949671539205229e-05, "loss": 1.1203, "step": 2728 }, { "epoch": 0.3853431234114657, "grad_norm": 4.699403601790278, "learning_rate": 1.9496237750696413e-05, "loss": 1.3438, "step": 2729 }, { "epoch": 0.3854843264614516, "grad_norm": 3.699279976222106, "learning_rate": 1.9495759888652072e-05, "loss": 0.908, "step": 2730 }, { "epoch": 0.38562552951143747, "grad_norm": 3.924015321526006, "learning_rate": 1.949528180593037e-05, "loss": 1.1464, "step": 2731 }, { "epoch": 0.38576673256142335, "grad_norm": 5.09779881025153, "learning_rate": 1.9494803502542415e-05, "loss": 1.5832, "step": 2732 }, { "epoch": 0.3859079356114092, "grad_norm": 4.856794529656477, "learning_rate": 1.9494324978499335e-05, "loss": 1.1704, "step": 2733 }, { "epoch": 0.38604913866139506, "grad_norm": 4.676851568014474, "learning_rate": 1.949384623381224e-05, "loss": 1.2581, "step": 2734 }, { "epoch": 0.38619034171138095, "grad_norm": 4.332530877567536, "learning_rate": 1.9493367268492258e-05, "loss": 1.0467, "step": 2735 }, { "epoch": 0.38633154476136683, "grad_norm": 3.0685108020725798, "learning_rate": 1.949288808255052e-05, "loss": 0.8526, "step": 2736 }, { "epoch": 0.3864727478113527, "grad_norm": 3.923528354617052, "learning_rate": 1.9492408675998162e-05, "loss": 1.1083, "step": 2737 }, { "epoch": 0.3866139508613386, "grad_norm": 4.278407818709807, "learning_rate": 1.9491929048846328e-05, "loss": 1.3086, "step": 2738 }, { "epoch": 0.3867551539113245, "grad_norm": 3.7099759402937247, "learning_rate": 1.9491449201106162e-05, "loss": 1.0408, "step": 2739 }, { "epoch": 0.38689635696131036, "grad_norm": 3.832341795392169, "learning_rate": 1.9490969132788815e-05, "loss": 0.9741, "step": 2740 }, { "epoch": 0.38703756001129624, "grad_norm": 4.141725444034112, "learning_rate": 1.9490488843905444e-05, "loss": 1.0957, "step": 2741 }, { "epoch": 0.3871787630612821, "grad_norm": 4.059847309148408, "learning_rate": 1.9490008334467212e-05, "loss": 1.1496, "step": 2742 }, { "epoch": 0.387319966111268, "grad_norm": 4.246293329019511, "learning_rate": 1.9489527604485284e-05, "loss": 1.0252, "step": 2743 }, { "epoch": 0.3874611691612539, "grad_norm": 3.4751086442631394, "learning_rate": 1.948904665397083e-05, "loss": 1.016, "step": 2744 }, { "epoch": 0.3876023722112398, "grad_norm": 3.9739716106378142, "learning_rate": 1.9488565482935035e-05, "loss": 1.0474, "step": 2745 }, { "epoch": 0.38774357526122566, "grad_norm": 4.82982226244476, "learning_rate": 1.948808409138907e-05, "loss": 1.4463, "step": 2746 }, { "epoch": 0.38788477831121154, "grad_norm": 3.6111002597187354, "learning_rate": 1.9487602479344136e-05, "loss": 0.9508, "step": 2747 }, { "epoch": 0.3880259813611974, "grad_norm": 4.63687148749335, "learning_rate": 1.948712064681141e-05, "loss": 1.289, "step": 2748 }, { "epoch": 0.3881671844111833, "grad_norm": 4.177795358177969, "learning_rate": 1.9486638593802102e-05, "loss": 1.1522, "step": 2749 }, { "epoch": 0.38830838746116914, "grad_norm": 4.015539170279566, "learning_rate": 1.9486156320327406e-05, "loss": 1.1319, "step": 2750 }, { "epoch": 0.388449590511155, "grad_norm": 3.6544564020451635, "learning_rate": 1.948567382639854e-05, "loss": 1.027, "step": 2751 }, { "epoch": 0.3885907935611409, "grad_norm": 3.774945154471758, "learning_rate": 1.9485191112026707e-05, "loss": 1.1077, "step": 2752 }, { "epoch": 0.3887319966111268, "grad_norm": 4.740124640826402, "learning_rate": 1.948470817722313e-05, "loss": 1.1409, "step": 2753 }, { "epoch": 0.38887319966111267, "grad_norm": 4.75264550585457, "learning_rate": 1.9484225021999032e-05, "loss": 1.1033, "step": 2754 }, { "epoch": 0.38901440271109855, "grad_norm": 4.302933897867006, "learning_rate": 1.9483741646365634e-05, "loss": 1.1271, "step": 2755 }, { "epoch": 0.38915560576108443, "grad_norm": 4.452942458369776, "learning_rate": 1.9483258050334183e-05, "loss": 1.2529, "step": 2756 }, { "epoch": 0.3892968088110703, "grad_norm": 4.160460243434189, "learning_rate": 1.948277423391591e-05, "loss": 1.0664, "step": 2757 }, { "epoch": 0.3894380118610562, "grad_norm": 3.808903172503447, "learning_rate": 1.9482290197122054e-05, "loss": 1.0117, "step": 2758 }, { "epoch": 0.3895792149110421, "grad_norm": 4.096770110305158, "learning_rate": 1.948180593996387e-05, "loss": 1.1152, "step": 2759 }, { "epoch": 0.38972041796102797, "grad_norm": 4.731694182616994, "learning_rate": 1.9481321462452617e-05, "loss": 1.2267, "step": 2760 }, { "epoch": 0.38986162101101385, "grad_norm": 3.5671029909026992, "learning_rate": 1.948083676459954e-05, "loss": 0.945, "step": 2761 }, { "epoch": 0.39000282406099973, "grad_norm": 3.933822707549875, "learning_rate": 1.9480351846415918e-05, "loss": 1.114, "step": 2762 }, { "epoch": 0.3901440271109856, "grad_norm": 4.236137168500127, "learning_rate": 1.947986670791301e-05, "loss": 0.8734, "step": 2763 }, { "epoch": 0.3902852301609715, "grad_norm": 4.359060024644206, "learning_rate": 1.9479381349102095e-05, "loss": 1.2369, "step": 2764 }, { "epoch": 0.3904264332109574, "grad_norm": 4.029332521176153, "learning_rate": 1.9478895769994447e-05, "loss": 0.9955, "step": 2765 }, { "epoch": 0.39056763626094326, "grad_norm": 4.386578709334208, "learning_rate": 1.947840997060136e-05, "loss": 1.1138, "step": 2766 }, { "epoch": 0.3907088393109291, "grad_norm": 4.1981754583420114, "learning_rate": 1.9477923950934117e-05, "loss": 1.1013, "step": 2767 }, { "epoch": 0.390850042360915, "grad_norm": 3.8325815635145277, "learning_rate": 1.9477437711004015e-05, "loss": 1.4762, "step": 2768 }, { "epoch": 0.39099124541090086, "grad_norm": 4.299665249574838, "learning_rate": 1.9476951250822352e-05, "loss": 1.1533, "step": 2769 }, { "epoch": 0.39113244846088674, "grad_norm": 3.3133993437449587, "learning_rate": 1.9476464570400434e-05, "loss": 0.8659, "step": 2770 }, { "epoch": 0.3912736515108726, "grad_norm": 4.457612392487415, "learning_rate": 1.9475977669749576e-05, "loss": 1.4018, "step": 2771 }, { "epoch": 0.3914148545608585, "grad_norm": 4.281947259081842, "learning_rate": 1.9475490548881083e-05, "loss": 1.0546, "step": 2772 }, { "epoch": 0.3915560576108444, "grad_norm": 4.47114227005312, "learning_rate": 1.947500320780629e-05, "loss": 1.0499, "step": 2773 }, { "epoch": 0.39169726066083027, "grad_norm": 3.6351366322365926, "learning_rate": 1.9474515646536507e-05, "loss": 0.9539, "step": 2774 }, { "epoch": 0.39183846371081615, "grad_norm": 3.360208719388863, "learning_rate": 1.9474027865083078e-05, "loss": 1.1307, "step": 2775 }, { "epoch": 0.39197966676080204, "grad_norm": 4.792750695891107, "learning_rate": 1.947353986345733e-05, "loss": 1.2572, "step": 2776 }, { "epoch": 0.3921208698107879, "grad_norm": 3.7234572040857072, "learning_rate": 1.9473051641670606e-05, "loss": 1.1238, "step": 2777 }, { "epoch": 0.3922620728607738, "grad_norm": 4.024809281706479, "learning_rate": 1.9472563199734254e-05, "loss": 1.1902, "step": 2778 }, { "epoch": 0.3924032759107597, "grad_norm": 4.32184941242279, "learning_rate": 1.9472074537659623e-05, "loss": 1.183, "step": 2779 }, { "epoch": 0.39254447896074557, "grad_norm": 3.973898061809334, "learning_rate": 1.9471585655458073e-05, "loss": 1.0463, "step": 2780 }, { "epoch": 0.39268568201073145, "grad_norm": 3.5382949625765256, "learning_rate": 1.947109655314096e-05, "loss": 0.9137, "step": 2781 }, { "epoch": 0.39282688506071733, "grad_norm": 3.659168018837145, "learning_rate": 1.9470607230719654e-05, "loss": 0.8778, "step": 2782 }, { "epoch": 0.3929680881107032, "grad_norm": 3.710724697036336, "learning_rate": 1.947011768820553e-05, "loss": 1.1154, "step": 2783 }, { "epoch": 0.39310929116068904, "grad_norm": 3.5645993734216552, "learning_rate": 1.9469627925609956e-05, "loss": 1.1494, "step": 2784 }, { "epoch": 0.39325049421067493, "grad_norm": 4.453936604512423, "learning_rate": 1.9469137942944322e-05, "loss": 1.23, "step": 2785 }, { "epoch": 0.3933916972606608, "grad_norm": 3.8497070601764034, "learning_rate": 1.946864774022001e-05, "loss": 1.0835, "step": 2786 }, { "epoch": 0.3935329003106467, "grad_norm": 3.7273806837807073, "learning_rate": 1.946815731744841e-05, "loss": 0.9714, "step": 2787 }, { "epoch": 0.3936741033606326, "grad_norm": 3.542739712473149, "learning_rate": 1.946766667464093e-05, "loss": 0.9233, "step": 2788 }, { "epoch": 0.39381530641061846, "grad_norm": 4.309760392052565, "learning_rate": 1.946717581180896e-05, "loss": 1.0419, "step": 2789 }, { "epoch": 0.39395650946060434, "grad_norm": 4.2747948005786816, "learning_rate": 1.9466684728963914e-05, "loss": 0.9659, "step": 2790 }, { "epoch": 0.3940977125105902, "grad_norm": 4.071373379838009, "learning_rate": 1.94661934261172e-05, "loss": 1.1733, "step": 2791 }, { "epoch": 0.3942389155605761, "grad_norm": 4.354308692603431, "learning_rate": 1.9465701903280246e-05, "loss": 1.1826, "step": 2792 }, { "epoch": 0.394380118610562, "grad_norm": 3.467250005279303, "learning_rate": 1.946521016046446e-05, "loss": 0.8859, "step": 2793 }, { "epoch": 0.3945213216605479, "grad_norm": 3.973150345311791, "learning_rate": 1.9464718197681284e-05, "loss": 1.1531, "step": 2794 }, { "epoch": 0.39466252471053376, "grad_norm": 4.75614069620667, "learning_rate": 1.9464226014942143e-05, "loss": 1.1953, "step": 2795 }, { "epoch": 0.39480372776051964, "grad_norm": 4.658797466289896, "learning_rate": 1.9463733612258476e-05, "loss": 1.3401, "step": 2796 }, { "epoch": 0.3949449308105055, "grad_norm": 4.870245341561487, "learning_rate": 1.9463240989641728e-05, "loss": 1.5336, "step": 2797 }, { "epoch": 0.3950861338604914, "grad_norm": 4.163220008997059, "learning_rate": 1.9462748147103342e-05, "loss": 1.1639, "step": 2798 }, { "epoch": 0.3952273369104773, "grad_norm": 3.711852407156884, "learning_rate": 1.946225508465478e-05, "loss": 0.9642, "step": 2799 }, { "epoch": 0.3953685399604632, "grad_norm": 9.58097065388552, "learning_rate": 1.9461761802307494e-05, "loss": 1.2129, "step": 2800 }, { "epoch": 0.395509743010449, "grad_norm": 4.717086295626161, "learning_rate": 1.9461268300072957e-05, "loss": 1.2061, "step": 2801 }, { "epoch": 0.3956509460604349, "grad_norm": 5.044536280719149, "learning_rate": 1.9460774577962622e-05, "loss": 1.151, "step": 2802 }, { "epoch": 0.39579214911042077, "grad_norm": 3.8343522998532236, "learning_rate": 1.9460280635987972e-05, "loss": 1.0645, "step": 2803 }, { "epoch": 0.39593335216040665, "grad_norm": 5.440166613264818, "learning_rate": 1.945978647416049e-05, "loss": 1.3921, "step": 2804 }, { "epoch": 0.39607455521039253, "grad_norm": 3.708363929608251, "learning_rate": 1.9459292092491654e-05, "loss": 0.957, "step": 2805 }, { "epoch": 0.3962157582603784, "grad_norm": 4.155079615706019, "learning_rate": 1.9458797490992954e-05, "loss": 1.2308, "step": 2806 }, { "epoch": 0.3963569613103643, "grad_norm": 4.4752003599324235, "learning_rate": 1.9458302669675885e-05, "loss": 1.0389, "step": 2807 }, { "epoch": 0.3964981643603502, "grad_norm": 4.684881116513056, "learning_rate": 1.9457807628551947e-05, "loss": 1.4132, "step": 2808 }, { "epoch": 0.39663936741033606, "grad_norm": 3.989504604936287, "learning_rate": 1.9457312367632645e-05, "loss": 1.1522, "step": 2809 }, { "epoch": 0.39678057046032195, "grad_norm": 3.9344980111803904, "learning_rate": 1.945681688692949e-05, "loss": 0.9876, "step": 2810 }, { "epoch": 0.39692177351030783, "grad_norm": 3.5406678018825977, "learning_rate": 1.945632118645399e-05, "loss": 0.7986, "step": 2811 }, { "epoch": 0.3970629765602937, "grad_norm": 4.108074557932551, "learning_rate": 1.9455825266217674e-05, "loss": 1.2487, "step": 2812 }, { "epoch": 0.3972041796102796, "grad_norm": 4.225168997024323, "learning_rate": 1.9455329126232062e-05, "loss": 1.2726, "step": 2813 }, { "epoch": 0.3973453826602655, "grad_norm": 4.390988225733527, "learning_rate": 1.945483276650868e-05, "loss": 1.0149, "step": 2814 }, { "epoch": 0.39748658571025136, "grad_norm": 4.336442748880578, "learning_rate": 1.945433618705907e-05, "loss": 1.0722, "step": 2815 }, { "epoch": 0.39762778876023724, "grad_norm": 4.246477202517171, "learning_rate": 1.945383938789477e-05, "loss": 1.1181, "step": 2816 }, { "epoch": 0.3977689918102231, "grad_norm": 4.372277116775641, "learning_rate": 1.945334236902733e-05, "loss": 1.022, "step": 2817 }, { "epoch": 0.39791019486020895, "grad_norm": 4.8401263117502795, "learning_rate": 1.945284513046829e-05, "loss": 1.1328, "step": 2818 }, { "epoch": 0.39805139791019484, "grad_norm": 4.292931467045822, "learning_rate": 1.945234767222921e-05, "loss": 1.0954, "step": 2819 }, { "epoch": 0.3981926009601807, "grad_norm": 4.72573299249462, "learning_rate": 1.945184999432166e-05, "loss": 1.2775, "step": 2820 }, { "epoch": 0.3983338040101666, "grad_norm": 3.8239463230501674, "learning_rate": 1.9451352096757194e-05, "loss": 1.0205, "step": 2821 }, { "epoch": 0.3984750070601525, "grad_norm": 4.041276156472654, "learning_rate": 1.9450853979547384e-05, "loss": 0.9573, "step": 2822 }, { "epoch": 0.39861621011013837, "grad_norm": 4.070560610445045, "learning_rate": 1.9450355642703812e-05, "loss": 1.2284, "step": 2823 }, { "epoch": 0.39875741316012425, "grad_norm": 3.9760097844388644, "learning_rate": 1.9449857086238058e-05, "loss": 1.1104, "step": 2824 }, { "epoch": 0.39889861621011014, "grad_norm": 3.938362948686599, "learning_rate": 1.9449358310161702e-05, "loss": 1.0475, "step": 2825 }, { "epoch": 0.399039819260096, "grad_norm": 4.504721162087163, "learning_rate": 1.9448859314486342e-05, "loss": 1.275, "step": 2826 }, { "epoch": 0.3991810223100819, "grad_norm": 4.150285804261682, "learning_rate": 1.9448360099223573e-05, "loss": 1.173, "step": 2827 }, { "epoch": 0.3993222253600678, "grad_norm": 5.231670709249456, "learning_rate": 1.9447860664384998e-05, "loss": 1.1332, "step": 2828 }, { "epoch": 0.39946342841005367, "grad_norm": 3.5230753210176435, "learning_rate": 1.944736100998222e-05, "loss": 0.9293, "step": 2829 }, { "epoch": 0.39960463146003955, "grad_norm": 4.727604239013494, "learning_rate": 1.9446861136026846e-05, "loss": 1.2187, "step": 2830 }, { "epoch": 0.39974583451002543, "grad_norm": 4.2897729467955195, "learning_rate": 1.9446361042530504e-05, "loss": 0.9592, "step": 2831 }, { "epoch": 0.3998870375600113, "grad_norm": 3.5500370772861713, "learning_rate": 1.9445860729504812e-05, "loss": 0.9962, "step": 2832 }, { "epoch": 0.4000282406099972, "grad_norm": 4.182788855234692, "learning_rate": 1.9445360196961394e-05, "loss": 1.2218, "step": 2833 }, { "epoch": 0.4001694436599831, "grad_norm": 3.505284317296871, "learning_rate": 1.9444859444911884e-05, "loss": 0.8898, "step": 2834 }, { "epoch": 0.4003106467099689, "grad_norm": 3.7217854577370897, "learning_rate": 1.9444358473367918e-05, "loss": 0.9976, "step": 2835 }, { "epoch": 0.4004518497599548, "grad_norm": 4.456964792485029, "learning_rate": 1.9443857282341144e-05, "loss": 1.2514, "step": 2836 }, { "epoch": 0.4005930528099407, "grad_norm": 3.8733581802950092, "learning_rate": 1.9443355871843204e-05, "loss": 0.9771, "step": 2837 }, { "epoch": 0.40073425585992656, "grad_norm": 4.569326207442759, "learning_rate": 1.944285424188575e-05, "loss": 1.0039, "step": 2838 }, { "epoch": 0.40087545890991244, "grad_norm": 3.599581856348858, "learning_rate": 1.9442352392480442e-05, "loss": 0.9974, "step": 2839 }, { "epoch": 0.4010166619598983, "grad_norm": 4.822634572211196, "learning_rate": 1.9441850323638944e-05, "loss": 1.2405, "step": 2840 }, { "epoch": 0.4011578650098842, "grad_norm": 5.556602725747065, "learning_rate": 1.944134803537292e-05, "loss": 1.4219, "step": 2841 }, { "epoch": 0.4012990680598701, "grad_norm": 4.638659780718478, "learning_rate": 1.9440845527694047e-05, "loss": 1.0865, "step": 2842 }, { "epoch": 0.401440271109856, "grad_norm": 3.661355102017519, "learning_rate": 1.9440342800614e-05, "loss": 0.8872, "step": 2843 }, { "epoch": 0.40158147415984186, "grad_norm": 5.124125297052814, "learning_rate": 1.9439839854144463e-05, "loss": 1.3008, "step": 2844 }, { "epoch": 0.40172267720982774, "grad_norm": 5.2145852159988495, "learning_rate": 1.9439336688297124e-05, "loss": 1.1957, "step": 2845 }, { "epoch": 0.4018638802598136, "grad_norm": 4.525556129189602, "learning_rate": 1.9438833303083677e-05, "loss": 1.1, "step": 2846 }, { "epoch": 0.4020050833097995, "grad_norm": 4.116332338388515, "learning_rate": 1.9438329698515823e-05, "loss": 1.1821, "step": 2847 }, { "epoch": 0.4021462863597854, "grad_norm": 4.103742148760553, "learning_rate": 1.943782587460526e-05, "loss": 1.2015, "step": 2848 }, { "epoch": 0.40228748940977127, "grad_norm": 4.341750961649565, "learning_rate": 1.94373218313637e-05, "loss": 1.1189, "step": 2849 }, { "epoch": 0.40242869245975715, "grad_norm": 3.3873804460373695, "learning_rate": 1.9436817568802854e-05, "loss": 0.8193, "step": 2850 }, { "epoch": 0.40256989550974304, "grad_norm": 4.016785386950191, "learning_rate": 1.943631308693445e-05, "loss": 1.0794, "step": 2851 }, { "epoch": 0.40271109855972886, "grad_norm": 4.319167640626143, "learning_rate": 1.94358083857702e-05, "loss": 1.1174, "step": 2852 }, { "epoch": 0.40285230160971475, "grad_norm": 3.311247592128335, "learning_rate": 1.943530346532184e-05, "loss": 0.9365, "step": 2853 }, { "epoch": 0.40299350465970063, "grad_norm": 3.6004888360968095, "learning_rate": 1.9434798325601098e-05, "loss": 0.9965, "step": 2854 }, { "epoch": 0.4031347077096865, "grad_norm": 3.511507114216554, "learning_rate": 1.943429296661972e-05, "loss": 0.9672, "step": 2855 }, { "epoch": 0.4032759107596724, "grad_norm": 3.989606697578757, "learning_rate": 1.9433787388389453e-05, "loss": 1.2496, "step": 2856 }, { "epoch": 0.4034171138096583, "grad_norm": 4.270706904458701, "learning_rate": 1.9433281590922036e-05, "loss": 1.2814, "step": 2857 }, { "epoch": 0.40355831685964416, "grad_norm": 4.289290271299601, "learning_rate": 1.943277557422923e-05, "loss": 1.0291, "step": 2858 }, { "epoch": 0.40369951990963004, "grad_norm": 3.922499527858311, "learning_rate": 1.9432269338322793e-05, "loss": 1.1003, "step": 2859 }, { "epoch": 0.4038407229596159, "grad_norm": 3.8491093208243687, "learning_rate": 1.943176288321449e-05, "loss": 1.1666, "step": 2860 }, { "epoch": 0.4039819260096018, "grad_norm": 3.825980070495842, "learning_rate": 1.943125620891609e-05, "loss": 1.1526, "step": 2861 }, { "epoch": 0.4041231290595877, "grad_norm": 3.8334076999297, "learning_rate": 1.943074931543937e-05, "loss": 1.1317, "step": 2862 }, { "epoch": 0.4042643321095736, "grad_norm": 4.337298530672957, "learning_rate": 1.9430242202796107e-05, "loss": 1.1099, "step": 2863 }, { "epoch": 0.40440553515955946, "grad_norm": 3.4343828981332836, "learning_rate": 1.942973487099809e-05, "loss": 0.8236, "step": 2864 }, { "epoch": 0.40454673820954534, "grad_norm": 3.786103422509321, "learning_rate": 1.9429227320057106e-05, "loss": 1.1006, "step": 2865 }, { "epoch": 0.4046879412595312, "grad_norm": 4.487701523468622, "learning_rate": 1.9428719549984955e-05, "loss": 1.1669, "step": 2866 }, { "epoch": 0.4048291443095171, "grad_norm": 3.4445239823811558, "learning_rate": 1.9428211560793428e-05, "loss": 0.8673, "step": 2867 }, { "epoch": 0.404970347359503, "grad_norm": 3.702158668768986, "learning_rate": 1.9427703352494335e-05, "loss": 0.9227, "step": 2868 }, { "epoch": 0.4051115504094888, "grad_norm": 5.46220658963223, "learning_rate": 1.9427194925099494e-05, "loss": 1.2239, "step": 2869 }, { "epoch": 0.4052527534594747, "grad_norm": 3.6225140965531155, "learning_rate": 1.942668627862071e-05, "loss": 0.9566, "step": 2870 }, { "epoch": 0.4053939565094606, "grad_norm": 4.337107348316009, "learning_rate": 1.942617741306981e-05, "loss": 1.1838, "step": 2871 }, { "epoch": 0.40553515955944647, "grad_norm": 4.565524519449637, "learning_rate": 1.9425668328458616e-05, "loss": 1.3793, "step": 2872 }, { "epoch": 0.40567636260943235, "grad_norm": 3.86252950189058, "learning_rate": 1.942515902479896e-05, "loss": 1.1304, "step": 2873 }, { "epoch": 0.40581756565941823, "grad_norm": 4.108341347455858, "learning_rate": 1.942464950210268e-05, "loss": 1.0401, "step": 2874 }, { "epoch": 0.4059587687094041, "grad_norm": 4.762919246132147, "learning_rate": 1.942413976038162e-05, "loss": 1.3263, "step": 2875 }, { "epoch": 0.40609997175939, "grad_norm": 4.137159423555265, "learning_rate": 1.9423629799647618e-05, "loss": 1.0961, "step": 2876 }, { "epoch": 0.4062411748093759, "grad_norm": 3.6896545280011157, "learning_rate": 1.9423119619912527e-05, "loss": 1.0562, "step": 2877 }, { "epoch": 0.40638237785936177, "grad_norm": 5.367575830697878, "learning_rate": 1.9422609221188208e-05, "loss": 1.3097, "step": 2878 }, { "epoch": 0.40652358090934765, "grad_norm": 3.845062530908352, "learning_rate": 1.9422098603486515e-05, "loss": 1.1083, "step": 2879 }, { "epoch": 0.40666478395933353, "grad_norm": 3.676070379789297, "learning_rate": 1.942158776681933e-05, "loss": 0.8073, "step": 2880 }, { "epoch": 0.4068059870093194, "grad_norm": 4.434320226597917, "learning_rate": 1.9421076711198506e-05, "loss": 1.2405, "step": 2881 }, { "epoch": 0.4069471900593053, "grad_norm": 4.32857986896634, "learning_rate": 1.942056543663593e-05, "loss": 1.2995, "step": 2882 }, { "epoch": 0.4070883931092912, "grad_norm": 4.413173199569783, "learning_rate": 1.942005394314348e-05, "loss": 1.1787, "step": 2883 }, { "epoch": 0.40722959615927706, "grad_norm": 4.857200222535491, "learning_rate": 1.941954223073305e-05, "loss": 1.2984, "step": 2884 }, { "epoch": 0.40737079920926295, "grad_norm": 3.517980566567667, "learning_rate": 1.941903029941652e-05, "loss": 1.0472, "step": 2885 }, { "epoch": 0.4075120022592488, "grad_norm": 4.235916496034987, "learning_rate": 1.94185181492058e-05, "loss": 1.1288, "step": 2886 }, { "epoch": 0.40765320530923466, "grad_norm": 4.057923225045238, "learning_rate": 1.9418005780112777e-05, "loss": 1.1307, "step": 2887 }, { "epoch": 0.40779440835922054, "grad_norm": 4.214396380290513, "learning_rate": 1.9417493192149376e-05, "loss": 1.2268, "step": 2888 }, { "epoch": 0.4079356114092064, "grad_norm": 4.038566942763581, "learning_rate": 1.9416980385327498e-05, "loss": 1.0289, "step": 2889 }, { "epoch": 0.4080768144591923, "grad_norm": 3.91936775596543, "learning_rate": 1.941646735965906e-05, "loss": 1.0895, "step": 2890 }, { "epoch": 0.4082180175091782, "grad_norm": 3.6719061902776464, "learning_rate": 1.941595411515599e-05, "loss": 0.9377, "step": 2891 }, { "epoch": 0.40835922055916407, "grad_norm": 3.4112088096717845, "learning_rate": 1.941544065183021e-05, "loss": 0.9002, "step": 2892 }, { "epoch": 0.40850042360914995, "grad_norm": 4.095534536524793, "learning_rate": 1.9414926969693656e-05, "loss": 0.9946, "step": 2893 }, { "epoch": 0.40864162665913584, "grad_norm": 4.0611089448990985, "learning_rate": 1.9414413068758266e-05, "loss": 1.0269, "step": 2894 }, { "epoch": 0.4087828297091217, "grad_norm": 3.8096730364427076, "learning_rate": 1.9413898949035984e-05, "loss": 1.1793, "step": 2895 }, { "epoch": 0.4089240327591076, "grad_norm": 4.492082158315833, "learning_rate": 1.9413384610538752e-05, "loss": 1.0685, "step": 2896 }, { "epoch": 0.4090652358090935, "grad_norm": 5.487680047657643, "learning_rate": 1.941287005327853e-05, "loss": 1.1822, "step": 2897 }, { "epoch": 0.40920643885907937, "grad_norm": 3.795545089931177, "learning_rate": 1.941235527726727e-05, "loss": 0.8969, "step": 2898 }, { "epoch": 0.40934764190906525, "grad_norm": 4.03234949224482, "learning_rate": 1.9411840282516942e-05, "loss": 1.1299, "step": 2899 }, { "epoch": 0.40948884495905113, "grad_norm": 3.7970327476469086, "learning_rate": 1.941132506903951e-05, "loss": 1.0857, "step": 2900 }, { "epoch": 0.409630048009037, "grad_norm": 4.625579191005492, "learning_rate": 1.9410809636846944e-05, "loss": 1.0396, "step": 2901 }, { "epoch": 0.4097712510590229, "grad_norm": 4.480743071899812, "learning_rate": 1.9410293985951233e-05, "loss": 1.1204, "step": 2902 }, { "epoch": 0.40991245410900873, "grad_norm": 3.605369476357036, "learning_rate": 1.9409778116364348e-05, "loss": 0.9553, "step": 2903 }, { "epoch": 0.4100536571589946, "grad_norm": 3.7506194647636253, "learning_rate": 1.9409262028098285e-05, "loss": 0.9916, "step": 2904 }, { "epoch": 0.4101948602089805, "grad_norm": 4.040127056152281, "learning_rate": 1.9408745721165036e-05, "loss": 1.1525, "step": 2905 }, { "epoch": 0.4103360632589664, "grad_norm": 4.279249903489633, "learning_rate": 1.94082291955766e-05, "loss": 1.1097, "step": 2906 }, { "epoch": 0.41047726630895226, "grad_norm": 4.112230179182728, "learning_rate": 1.940771245134498e-05, "loss": 0.9315, "step": 2907 }, { "epoch": 0.41061846935893814, "grad_norm": 4.589461858156398, "learning_rate": 1.9407195488482185e-05, "loss": 1.1118, "step": 2908 }, { "epoch": 0.410759672408924, "grad_norm": 4.845963577400727, "learning_rate": 1.9406678307000232e-05, "loss": 1.1199, "step": 2909 }, { "epoch": 0.4109008754589099, "grad_norm": 3.2962672664968475, "learning_rate": 1.9406160906911137e-05, "loss": 0.8427, "step": 2910 }, { "epoch": 0.4110420785088958, "grad_norm": 3.8762616371397773, "learning_rate": 1.940564328822692e-05, "loss": 1.0769, "step": 2911 }, { "epoch": 0.4111832815588817, "grad_norm": 3.6760579977394547, "learning_rate": 1.9405125450959623e-05, "loss": 0.947, "step": 2912 }, { "epoch": 0.41132448460886756, "grad_norm": 4.205177159808833, "learning_rate": 1.9404607395121266e-05, "loss": 0.9743, "step": 2913 }, { "epoch": 0.41146568765885344, "grad_norm": 3.9307308571303965, "learning_rate": 1.94040891207239e-05, "loss": 1.0163, "step": 2914 }, { "epoch": 0.4116068907088393, "grad_norm": 4.162641929604948, "learning_rate": 1.940357062777956e-05, "loss": 0.988, "step": 2915 }, { "epoch": 0.4117480937588252, "grad_norm": 5.304032545216394, "learning_rate": 1.9403051916300296e-05, "loss": 1.1511, "step": 2916 }, { "epoch": 0.4118892968088111, "grad_norm": 5.967033691460604, "learning_rate": 1.940253298629817e-05, "loss": 1.1816, "step": 2917 }, { "epoch": 0.412030499858797, "grad_norm": 3.7326198418494068, "learning_rate": 1.9402013837785242e-05, "loss": 0.9557, "step": 2918 }, { "epoch": 0.41217170290878286, "grad_norm": 3.7592119236457977, "learning_rate": 1.940149447077357e-05, "loss": 0.9411, "step": 2919 }, { "epoch": 0.4123129059587687, "grad_norm": 4.62032473246964, "learning_rate": 1.9400974885275226e-05, "loss": 0.9074, "step": 2920 }, { "epoch": 0.41245410900875457, "grad_norm": 4.440049454858506, "learning_rate": 1.9400455081302287e-05, "loss": 1.162, "step": 2921 }, { "epoch": 0.41259531205874045, "grad_norm": 3.7492538552338694, "learning_rate": 1.939993505886683e-05, "loss": 0.974, "step": 2922 }, { "epoch": 0.41273651510872633, "grad_norm": 3.8826384689302667, "learning_rate": 1.9399414817980945e-05, "loss": 1.0394, "step": 2923 }, { "epoch": 0.4128777181587122, "grad_norm": 4.3725984902259105, "learning_rate": 1.9398894358656713e-05, "loss": 1.0699, "step": 2924 }, { "epoch": 0.4130189212086981, "grad_norm": 3.793479845934427, "learning_rate": 1.9398373680906242e-05, "loss": 0.9866, "step": 2925 }, { "epoch": 0.413160124258684, "grad_norm": 4.1472125644515625, "learning_rate": 1.939785278474162e-05, "loss": 1.1752, "step": 2926 }, { "epoch": 0.41330132730866986, "grad_norm": 4.519306232436554, "learning_rate": 1.9397331670174958e-05, "loss": 1.2904, "step": 2927 }, { "epoch": 0.41344253035865575, "grad_norm": 4.253264277881394, "learning_rate": 1.9396810337218373e-05, "loss": 1.134, "step": 2928 }, { "epoch": 0.41358373340864163, "grad_norm": 4.070477433674152, "learning_rate": 1.9396288785883968e-05, "loss": 1.1479, "step": 2929 }, { "epoch": 0.4137249364586275, "grad_norm": 3.938217226718001, "learning_rate": 1.939576701618387e-05, "loss": 1.1257, "step": 2930 }, { "epoch": 0.4138661395086134, "grad_norm": 3.982918332204933, "learning_rate": 1.9395245028130205e-05, "loss": 1.1336, "step": 2931 }, { "epoch": 0.4140073425585993, "grad_norm": 3.9098492968515135, "learning_rate": 1.9394722821735105e-05, "loss": 1.1502, "step": 2932 }, { "epoch": 0.41414854560858516, "grad_norm": 3.731447089150116, "learning_rate": 1.93942003970107e-05, "loss": 0.9934, "step": 2933 }, { "epoch": 0.41428974865857104, "grad_norm": 3.630236914862187, "learning_rate": 1.9393677753969137e-05, "loss": 0.976, "step": 2934 }, { "epoch": 0.4144309517085569, "grad_norm": 3.9682240211213013, "learning_rate": 1.939315489262256e-05, "loss": 1.0208, "step": 2935 }, { "epoch": 0.4145721547585428, "grad_norm": 4.4969947139892055, "learning_rate": 1.939263181298312e-05, "loss": 1.0882, "step": 2936 }, { "epoch": 0.41471335780852864, "grad_norm": 4.644199723029871, "learning_rate": 1.9392108515062973e-05, "loss": 1.1258, "step": 2937 }, { "epoch": 0.4148545608585145, "grad_norm": 3.5982982846682816, "learning_rate": 1.939158499887428e-05, "loss": 1.0078, "step": 2938 }, { "epoch": 0.4149957639085004, "grad_norm": 3.8268172344085145, "learning_rate": 1.9391061264429207e-05, "loss": 0.936, "step": 2939 }, { "epoch": 0.4151369669584863, "grad_norm": 4.660959823668172, "learning_rate": 1.9390537311739927e-05, "loss": 1.4282, "step": 2940 }, { "epoch": 0.41527817000847217, "grad_norm": 4.14310241056, "learning_rate": 1.9390013140818612e-05, "loss": 1.268, "step": 2941 }, { "epoch": 0.41541937305845805, "grad_norm": 5.6555251825391295, "learning_rate": 1.938948875167745e-05, "loss": 1.2468, "step": 2942 }, { "epoch": 0.41556057610844394, "grad_norm": 3.7502422429615314, "learning_rate": 1.9388964144328626e-05, "loss": 1.0252, "step": 2943 }, { "epoch": 0.4157017791584298, "grad_norm": 3.98705285787874, "learning_rate": 1.9388439318784328e-05, "loss": 1.1134, "step": 2944 }, { "epoch": 0.4158429822084157, "grad_norm": 3.6240863322419505, "learning_rate": 1.9387914275056754e-05, "loss": 0.9367, "step": 2945 }, { "epoch": 0.4159841852584016, "grad_norm": 3.553166782745452, "learning_rate": 1.9387389013158108e-05, "loss": 0.9547, "step": 2946 }, { "epoch": 0.41612538830838747, "grad_norm": 3.981078488280875, "learning_rate": 1.9386863533100597e-05, "loss": 1.1791, "step": 2947 }, { "epoch": 0.41626659135837335, "grad_norm": 3.376660750677664, "learning_rate": 1.9386337834896428e-05, "loss": 0.8798, "step": 2948 }, { "epoch": 0.41640779440835923, "grad_norm": 4.26818289019022, "learning_rate": 1.9385811918557822e-05, "loss": 0.993, "step": 2949 }, { "epoch": 0.4165489974583451, "grad_norm": 4.536825714719432, "learning_rate": 1.9385285784097e-05, "loss": 1.1402, "step": 2950 }, { "epoch": 0.416690200508331, "grad_norm": 3.6106692925528727, "learning_rate": 1.9384759431526192e-05, "loss": 0.7852, "step": 2951 }, { "epoch": 0.4168314035583169, "grad_norm": 4.147406182716625, "learning_rate": 1.9384232860857627e-05, "loss": 1.1421, "step": 2952 }, { "epoch": 0.41697260660830276, "grad_norm": 4.885322473280678, "learning_rate": 1.938370607210354e-05, "loss": 1.3687, "step": 2953 }, { "epoch": 0.4171138096582886, "grad_norm": 4.159471897154181, "learning_rate": 1.938317906527618e-05, "loss": 0.9791, "step": 2954 }, { "epoch": 0.4172550127082745, "grad_norm": 4.1498407242052595, "learning_rate": 1.938265184038779e-05, "loss": 1.0724, "step": 2955 }, { "epoch": 0.41739621575826036, "grad_norm": 4.5948087712234065, "learning_rate": 1.938212439745062e-05, "loss": 1.1297, "step": 2956 }, { "epoch": 0.41753741880824624, "grad_norm": 4.893705377001965, "learning_rate": 1.9381596736476936e-05, "loss": 1.0395, "step": 2957 }, { "epoch": 0.4176786218582321, "grad_norm": 4.1482513911701595, "learning_rate": 1.9381068857478994e-05, "loss": 1.1519, "step": 2958 }, { "epoch": 0.417819824908218, "grad_norm": 4.757438876955242, "learning_rate": 1.938054076046906e-05, "loss": 1.3424, "step": 2959 }, { "epoch": 0.4179610279582039, "grad_norm": 4.995694320864768, "learning_rate": 1.938001244545941e-05, "loss": 1.2273, "step": 2960 }, { "epoch": 0.4181022310081898, "grad_norm": 3.786883865536849, "learning_rate": 1.9379483912462326e-05, "loss": 1.0798, "step": 2961 }, { "epoch": 0.41824343405817566, "grad_norm": 3.5285318571271076, "learning_rate": 1.9378955161490086e-05, "loss": 1.0306, "step": 2962 }, { "epoch": 0.41838463710816154, "grad_norm": 4.084789505881134, "learning_rate": 1.9378426192554975e-05, "loss": 1.0592, "step": 2963 }, { "epoch": 0.4185258401581474, "grad_norm": 3.6749399432449517, "learning_rate": 1.937789700566929e-05, "loss": 0.8817, "step": 2964 }, { "epoch": 0.4186670432081333, "grad_norm": 4.001618647336214, "learning_rate": 1.9377367600845333e-05, "loss": 0.9592, "step": 2965 }, { "epoch": 0.4188082462581192, "grad_norm": 3.716854461116892, "learning_rate": 1.93768379780954e-05, "loss": 1.1373, "step": 2966 }, { "epoch": 0.41894944930810507, "grad_norm": 5.1639864738998575, "learning_rate": 1.9376308137431802e-05, "loss": 1.3133, "step": 2967 }, { "epoch": 0.41909065235809095, "grad_norm": 3.092885078183632, "learning_rate": 1.937577807886685e-05, "loss": 0.7712, "step": 2968 }, { "epoch": 0.41923185540807684, "grad_norm": 5.78927822030806, "learning_rate": 1.9375247802412867e-05, "loss": 1.385, "step": 2969 }, { "epoch": 0.4193730584580627, "grad_norm": 3.7088723792850042, "learning_rate": 1.9374717308082172e-05, "loss": 1.0394, "step": 2970 }, { "epoch": 0.41951426150804855, "grad_norm": 4.356329954290291, "learning_rate": 1.9374186595887096e-05, "loss": 1.1356, "step": 2971 }, { "epoch": 0.41965546455803443, "grad_norm": 3.8486158356130757, "learning_rate": 1.9373655665839973e-05, "loss": 1.1013, "step": 2972 }, { "epoch": 0.4197966676080203, "grad_norm": 3.6979239248404605, "learning_rate": 1.937312451795314e-05, "loss": 1.0834, "step": 2973 }, { "epoch": 0.4199378706580062, "grad_norm": 4.2480244243582765, "learning_rate": 1.937259315223894e-05, "loss": 1.2392, "step": 2974 }, { "epoch": 0.4200790737079921, "grad_norm": 3.821583404521679, "learning_rate": 1.937206156870973e-05, "loss": 0.868, "step": 2975 }, { "epoch": 0.42022027675797796, "grad_norm": 3.4317261341639234, "learning_rate": 1.937152976737785e-05, "loss": 0.8321, "step": 2976 }, { "epoch": 0.42036147980796384, "grad_norm": 4.4689300338247, "learning_rate": 1.9370997748255665e-05, "loss": 1.3988, "step": 2977 }, { "epoch": 0.42050268285794973, "grad_norm": 4.343532653965976, "learning_rate": 1.937046551135554e-05, "loss": 1.1517, "step": 2978 }, { "epoch": 0.4206438859079356, "grad_norm": 4.110860584375025, "learning_rate": 1.936993305668984e-05, "loss": 0.9158, "step": 2979 }, { "epoch": 0.4207850889579215, "grad_norm": 3.8065787826575934, "learning_rate": 1.9369400384270948e-05, "loss": 1.0486, "step": 2980 }, { "epoch": 0.4209262920079074, "grad_norm": 3.541050117612057, "learning_rate": 1.936886749411124e-05, "loss": 0.9584, "step": 2981 }, { "epoch": 0.42106749505789326, "grad_norm": 4.376819041779908, "learning_rate": 1.9368334386223092e-05, "loss": 1.3686, "step": 2982 }, { "epoch": 0.42120869810787914, "grad_norm": 4.022992484473648, "learning_rate": 1.93678010606189e-05, "loss": 1.2296, "step": 2983 }, { "epoch": 0.421349901157865, "grad_norm": 3.587734342463129, "learning_rate": 1.9367267517311057e-05, "loss": 0.9388, "step": 2984 }, { "epoch": 0.4214911042078509, "grad_norm": 4.75393160614606, "learning_rate": 1.936673375631196e-05, "loss": 1.2229, "step": 2985 }, { "epoch": 0.4216323072578368, "grad_norm": 3.5073132722201636, "learning_rate": 1.9366199777634018e-05, "loss": 1.048, "step": 2986 }, { "epoch": 0.4217735103078227, "grad_norm": 3.910440585809235, "learning_rate": 1.936566558128964e-05, "loss": 0.978, "step": 2987 }, { "epoch": 0.4219147133578085, "grad_norm": 4.666741046696743, "learning_rate": 1.9365131167291237e-05, "loss": 1.1699, "step": 2988 }, { "epoch": 0.4220559164077944, "grad_norm": 4.5289714222021535, "learning_rate": 1.936459653565123e-05, "loss": 1.2689, "step": 2989 }, { "epoch": 0.42219711945778027, "grad_norm": 4.762993725661353, "learning_rate": 1.9364061686382042e-05, "loss": 1.2597, "step": 2990 }, { "epoch": 0.42233832250776615, "grad_norm": 4.069427959080039, "learning_rate": 1.9363526619496106e-05, "loss": 0.8957, "step": 2991 }, { "epoch": 0.42247952555775203, "grad_norm": 3.3439384686161384, "learning_rate": 1.9362991335005853e-05, "loss": 0.9248, "step": 2992 }, { "epoch": 0.4226207286077379, "grad_norm": 4.225208176149682, "learning_rate": 1.9362455832923726e-05, "loss": 1.1362, "step": 2993 }, { "epoch": 0.4227619316577238, "grad_norm": 3.639864023243686, "learning_rate": 1.9361920113262172e-05, "loss": 1.0212, "step": 2994 }, { "epoch": 0.4229031347077097, "grad_norm": 4.369128058571234, "learning_rate": 1.9361384176033637e-05, "loss": 1.3446, "step": 2995 }, { "epoch": 0.42304433775769557, "grad_norm": 4.327988460038719, "learning_rate": 1.9360848021250573e-05, "loss": 1.0235, "step": 2996 }, { "epoch": 0.42318554080768145, "grad_norm": 4.366791415942107, "learning_rate": 1.9360311648925448e-05, "loss": 1.197, "step": 2997 }, { "epoch": 0.42332674385766733, "grad_norm": 4.050730950424255, "learning_rate": 1.935977505907072e-05, "loss": 1.1757, "step": 2998 }, { "epoch": 0.4234679469076532, "grad_norm": 4.0717476681005085, "learning_rate": 1.9359238251698862e-05, "loss": 1.0177, "step": 2999 }, { "epoch": 0.4236091499576391, "grad_norm": 5.236325766803049, "learning_rate": 1.935870122682235e-05, "loss": 1.0793, "step": 3000 }, { "epoch": 0.423750353007625, "grad_norm": 3.4543363743118687, "learning_rate": 1.935816398445366e-05, "loss": 0.935, "step": 3001 }, { "epoch": 0.42389155605761086, "grad_norm": 4.594292267265184, "learning_rate": 1.9357626524605286e-05, "loss": 1.426, "step": 3002 }, { "epoch": 0.42403275910759675, "grad_norm": 3.2856845464798066, "learning_rate": 1.9357088847289705e-05, "loss": 0.9806, "step": 3003 }, { "epoch": 0.42417396215758263, "grad_norm": 4.356371456307121, "learning_rate": 1.935655095251943e-05, "loss": 1.0419, "step": 3004 }, { "epoch": 0.42431516520756846, "grad_norm": 3.2771613212080495, "learning_rate": 1.9356012840306945e-05, "loss": 0.8717, "step": 3005 }, { "epoch": 0.42445636825755434, "grad_norm": 4.722649611407031, "learning_rate": 1.9355474510664763e-05, "loss": 1.0947, "step": 3006 }, { "epoch": 0.4245975713075402, "grad_norm": 3.985001307257452, "learning_rate": 1.9354935963605395e-05, "loss": 1.0849, "step": 3007 }, { "epoch": 0.4247387743575261, "grad_norm": 4.034451347047195, "learning_rate": 1.9354397199141356e-05, "loss": 0.9316, "step": 3008 }, { "epoch": 0.424879977407512, "grad_norm": 3.9485174388475808, "learning_rate": 1.935385821728516e-05, "loss": 1.0944, "step": 3009 }, { "epoch": 0.42502118045749787, "grad_norm": 3.7002010891142385, "learning_rate": 1.9353319018049346e-05, "loss": 0.9603, "step": 3010 }, { "epoch": 0.42516238350748375, "grad_norm": 4.910970112165826, "learning_rate": 1.9352779601446435e-05, "loss": 1.1746, "step": 3011 }, { "epoch": 0.42530358655746964, "grad_norm": 3.7963204765837193, "learning_rate": 1.9352239967488965e-05, "loss": 1.0846, "step": 3012 }, { "epoch": 0.4254447896074555, "grad_norm": 4.570854899293397, "learning_rate": 1.9351700116189474e-05, "loss": 1.1241, "step": 3013 }, { "epoch": 0.4255859926574414, "grad_norm": 4.915726820841134, "learning_rate": 1.9351160047560516e-05, "loss": 1.2272, "step": 3014 }, { "epoch": 0.4257271957074273, "grad_norm": 4.134927119536651, "learning_rate": 1.9350619761614634e-05, "loss": 1.0089, "step": 3015 }, { "epoch": 0.42586839875741317, "grad_norm": 3.6027544377206686, "learning_rate": 1.935007925836439e-05, "loss": 0.7604, "step": 3016 }, { "epoch": 0.42600960180739905, "grad_norm": 3.935618371132091, "learning_rate": 1.9349538537822342e-05, "loss": 1.0387, "step": 3017 }, { "epoch": 0.42615080485738493, "grad_norm": 4.136742954892019, "learning_rate": 1.9348997600001052e-05, "loss": 1.2162, "step": 3018 }, { "epoch": 0.4262920079073708, "grad_norm": 3.907553560672167, "learning_rate": 1.9348456444913098e-05, "loss": 1.0481, "step": 3019 }, { "epoch": 0.4264332109573567, "grad_norm": 3.41435332853642, "learning_rate": 1.934791507257105e-05, "loss": 0.8571, "step": 3020 }, { "epoch": 0.4265744140073426, "grad_norm": 3.12967297619145, "learning_rate": 1.9347373482987497e-05, "loss": 0.7906, "step": 3021 }, { "epoch": 0.4267156170573284, "grad_norm": 3.463511263098065, "learning_rate": 1.934683167617502e-05, "loss": 0.853, "step": 3022 }, { "epoch": 0.4268568201073143, "grad_norm": 3.8317183693774783, "learning_rate": 1.9346289652146212e-05, "loss": 0.9446, "step": 3023 }, { "epoch": 0.4269980231573002, "grad_norm": 3.5875320589148276, "learning_rate": 1.9345747410913666e-05, "loss": 0.9279, "step": 3024 }, { "epoch": 0.42713922620728606, "grad_norm": 4.345646340930474, "learning_rate": 1.934520495248999e-05, "loss": 1.1124, "step": 3025 }, { "epoch": 0.42728042925727194, "grad_norm": 4.747252224652471, "learning_rate": 1.9344662276887787e-05, "loss": 1.1809, "step": 3026 }, { "epoch": 0.4274216323072578, "grad_norm": 3.4596524485931672, "learning_rate": 1.9344119384119665e-05, "loss": 0.8293, "step": 3027 }, { "epoch": 0.4275628353572437, "grad_norm": 5.000673678340825, "learning_rate": 1.9343576274198246e-05, "loss": 1.3002, "step": 3028 }, { "epoch": 0.4277040384072296, "grad_norm": 4.259529042781315, "learning_rate": 1.934303294713615e-05, "loss": 0.8908, "step": 3029 }, { "epoch": 0.4278452414572155, "grad_norm": 3.7582437419678008, "learning_rate": 1.9342489402945997e-05, "loss": 1.0599, "step": 3030 }, { "epoch": 0.42798644450720136, "grad_norm": 3.883050328807419, "learning_rate": 1.9341945641640432e-05, "loss": 1.1235, "step": 3031 }, { "epoch": 0.42812764755718724, "grad_norm": 4.255648148703228, "learning_rate": 1.9341401663232083e-05, "loss": 1.2607, "step": 3032 }, { "epoch": 0.4282688506071731, "grad_norm": 3.9189560493364577, "learning_rate": 1.9340857467733595e-05, "loss": 0.9827, "step": 3033 }, { "epoch": 0.428410053657159, "grad_norm": 3.9164510829482806, "learning_rate": 1.934031305515761e-05, "loss": 1.0906, "step": 3034 }, { "epoch": 0.4285512567071449, "grad_norm": 3.45283831547893, "learning_rate": 1.9339768425516786e-05, "loss": 0.9213, "step": 3035 }, { "epoch": 0.4286924597571308, "grad_norm": 3.997385854545787, "learning_rate": 1.933922357882378e-05, "loss": 1.1353, "step": 3036 }, { "epoch": 0.42883366280711666, "grad_norm": 3.937757684435129, "learning_rate": 1.9338678515091243e-05, "loss": 1.1365, "step": 3037 }, { "epoch": 0.42897486585710254, "grad_norm": 3.2627494341419583, "learning_rate": 1.933813323433186e-05, "loss": 0.8759, "step": 3038 }, { "epoch": 0.42911606890708837, "grad_norm": 3.8664787103024207, "learning_rate": 1.9337587736558286e-05, "loss": 1.0441, "step": 3039 }, { "epoch": 0.42925727195707425, "grad_norm": 4.549203146952161, "learning_rate": 1.933704202178321e-05, "loss": 1.2241, "step": 3040 }, { "epoch": 0.42939847500706013, "grad_norm": 3.670039295681777, "learning_rate": 1.9336496090019307e-05, "loss": 1.0967, "step": 3041 }, { "epoch": 0.429539678057046, "grad_norm": 4.163687334596823, "learning_rate": 1.9335949941279267e-05, "loss": 1.1813, "step": 3042 }, { "epoch": 0.4296808811070319, "grad_norm": 4.433612787254471, "learning_rate": 1.9335403575575787e-05, "loss": 1.3643, "step": 3043 }, { "epoch": 0.4298220841570178, "grad_norm": 4.825446832411553, "learning_rate": 1.9334856992921555e-05, "loss": 1.1961, "step": 3044 }, { "epoch": 0.42996328720700366, "grad_norm": 3.7880358755130525, "learning_rate": 1.9334310193329276e-05, "loss": 0.9831, "step": 3045 }, { "epoch": 0.43010449025698955, "grad_norm": 3.86781106719311, "learning_rate": 1.9333763176811663e-05, "loss": 1.1971, "step": 3046 }, { "epoch": 0.43024569330697543, "grad_norm": 3.801751242889334, "learning_rate": 1.9333215943381425e-05, "loss": 0.978, "step": 3047 }, { "epoch": 0.4303868963569613, "grad_norm": 3.825655518979746, "learning_rate": 1.933266849305128e-05, "loss": 1.0388, "step": 3048 }, { "epoch": 0.4305280994069472, "grad_norm": 3.671328006227849, "learning_rate": 1.9332120825833948e-05, "loss": 0.9056, "step": 3049 }, { "epoch": 0.4306693024569331, "grad_norm": 3.6053679729218024, "learning_rate": 1.9331572941742157e-05, "loss": 1.0565, "step": 3050 }, { "epoch": 0.43081050550691896, "grad_norm": 3.747455967972404, "learning_rate": 1.933102484078864e-05, "loss": 1.0793, "step": 3051 }, { "epoch": 0.43095170855690484, "grad_norm": 3.172952655342485, "learning_rate": 1.9330476522986136e-05, "loss": 0.8129, "step": 3052 }, { "epoch": 0.4310929116068907, "grad_norm": 4.242937112399398, "learning_rate": 1.932992798834739e-05, "loss": 1.1198, "step": 3053 }, { "epoch": 0.4312341146568766, "grad_norm": 3.7048135181453277, "learning_rate": 1.9329379236885145e-05, "loss": 0.9356, "step": 3054 }, { "epoch": 0.4313753177068625, "grad_norm": 4.173652898442035, "learning_rate": 1.9328830268612155e-05, "loss": 1.0386, "step": 3055 }, { "epoch": 0.4315165207568483, "grad_norm": 4.644994445099839, "learning_rate": 1.932828108354118e-05, "loss": 1.244, "step": 3056 }, { "epoch": 0.4316577238068342, "grad_norm": 3.4156460497909555, "learning_rate": 1.932773168168498e-05, "loss": 1.0108, "step": 3057 }, { "epoch": 0.4317989268568201, "grad_norm": 3.4952582420354483, "learning_rate": 1.9327182063056325e-05, "loss": 0.9767, "step": 3058 }, { "epoch": 0.43194012990680597, "grad_norm": 3.5005816237212275, "learning_rate": 1.932663222766799e-05, "loss": 0.8376, "step": 3059 }, { "epoch": 0.43208133295679185, "grad_norm": 3.970449591273115, "learning_rate": 1.9326082175532744e-05, "loss": 1.1309, "step": 3060 }, { "epoch": 0.43222253600677774, "grad_norm": 3.9419487217418596, "learning_rate": 1.9325531906663377e-05, "loss": 1.2564, "step": 3061 }, { "epoch": 0.4323637390567636, "grad_norm": 4.119349468406248, "learning_rate": 1.932498142107268e-05, "loss": 1.2772, "step": 3062 }, { "epoch": 0.4325049421067495, "grad_norm": 4.359452465240006, "learning_rate": 1.9324430718773436e-05, "loss": 1.0907, "step": 3063 }, { "epoch": 0.4326461451567354, "grad_norm": 3.846599677665212, "learning_rate": 1.9323879799778452e-05, "loss": 0.9929, "step": 3064 }, { "epoch": 0.43278734820672127, "grad_norm": 4.157127194315688, "learning_rate": 1.9323328664100527e-05, "loss": 1.1051, "step": 3065 }, { "epoch": 0.43292855125670715, "grad_norm": 3.82641484847605, "learning_rate": 1.9322777311752473e-05, "loss": 1.1926, "step": 3066 }, { "epoch": 0.43306975430669303, "grad_norm": 3.6778994874121422, "learning_rate": 1.93222257427471e-05, "loss": 0.9989, "step": 3067 }, { "epoch": 0.4332109573566789, "grad_norm": 4.2429486436424035, "learning_rate": 1.9321673957097226e-05, "loss": 0.9301, "step": 3068 }, { "epoch": 0.4333521604066648, "grad_norm": 3.645462968627209, "learning_rate": 1.9321121954815675e-05, "loss": 0.9476, "step": 3069 }, { "epoch": 0.4334933634566507, "grad_norm": 3.989111425830674, "learning_rate": 1.9320569735915273e-05, "loss": 1.0978, "step": 3070 }, { "epoch": 0.43363456650663657, "grad_norm": 4.784009524915928, "learning_rate": 1.932001730040886e-05, "loss": 1.1225, "step": 3071 }, { "epoch": 0.43377576955662245, "grad_norm": 4.604163182369936, "learning_rate": 1.9319464648309265e-05, "loss": 0.9683, "step": 3072 }, { "epoch": 0.4339169726066083, "grad_norm": 3.9060449651149365, "learning_rate": 1.9318911779629337e-05, "loss": 0.8944, "step": 3073 }, { "epoch": 0.43405817565659416, "grad_norm": 4.20274958217526, "learning_rate": 1.9318358694381926e-05, "loss": 1.0341, "step": 3074 }, { "epoch": 0.43419937870658004, "grad_norm": 4.065063222993628, "learning_rate": 1.9317805392579886e-05, "loss": 1.1294, "step": 3075 }, { "epoch": 0.4343405817565659, "grad_norm": 3.7964382943949677, "learning_rate": 1.9317251874236066e-05, "loss": 1.1115, "step": 3076 }, { "epoch": 0.4344817848065518, "grad_norm": 4.222133006828667, "learning_rate": 1.931669813936334e-05, "loss": 1.0976, "step": 3077 }, { "epoch": 0.4346229878565377, "grad_norm": 3.442718841787935, "learning_rate": 1.931614418797457e-05, "loss": 0.9777, "step": 3078 }, { "epoch": 0.4347641909065236, "grad_norm": 4.399964640953782, "learning_rate": 1.9315590020082637e-05, "loss": 1.0787, "step": 3079 }, { "epoch": 0.43490539395650946, "grad_norm": 3.4758442010782633, "learning_rate": 1.9315035635700412e-05, "loss": 0.9132, "step": 3080 }, { "epoch": 0.43504659700649534, "grad_norm": 4.157572424347774, "learning_rate": 1.9314481034840783e-05, "loss": 1.1864, "step": 3081 }, { "epoch": 0.4351878000564812, "grad_norm": 3.458396471551333, "learning_rate": 1.9313926217516637e-05, "loss": 0.8976, "step": 3082 }, { "epoch": 0.4353290031064671, "grad_norm": 3.5817117213904512, "learning_rate": 1.9313371183740868e-05, "loss": 0.8821, "step": 3083 }, { "epoch": 0.435470206156453, "grad_norm": 3.805290334510746, "learning_rate": 1.9312815933526375e-05, "loss": 0.998, "step": 3084 }, { "epoch": 0.43561140920643887, "grad_norm": 3.896199253742902, "learning_rate": 1.931226046688606e-05, "loss": 1.0963, "step": 3085 }, { "epoch": 0.43575261225642475, "grad_norm": 4.434914638021939, "learning_rate": 1.9311704783832835e-05, "loss": 1.3411, "step": 3086 }, { "epoch": 0.43589381530641064, "grad_norm": 3.79571125628213, "learning_rate": 1.9311148884379616e-05, "loss": 1.0854, "step": 3087 }, { "epoch": 0.4360350183563965, "grad_norm": 3.682735872139414, "learning_rate": 1.9310592768539315e-05, "loss": 0.9987, "step": 3088 }, { "epoch": 0.4361762214063824, "grad_norm": 4.081997895492289, "learning_rate": 1.9310036436324857e-05, "loss": 0.9855, "step": 3089 }, { "epoch": 0.43631742445636823, "grad_norm": 3.6621109158471827, "learning_rate": 1.9309479887749175e-05, "loss": 0.9773, "step": 3090 }, { "epoch": 0.4364586275063541, "grad_norm": 3.887623603044863, "learning_rate": 1.93089231228252e-05, "loss": 1.1032, "step": 3091 }, { "epoch": 0.43659983055634, "grad_norm": 3.3838150639669, "learning_rate": 1.930836614156587e-05, "loss": 0.8962, "step": 3092 }, { "epoch": 0.4367410336063259, "grad_norm": 3.2860702019130885, "learning_rate": 1.9307808943984132e-05, "loss": 0.8299, "step": 3093 }, { "epoch": 0.43688223665631176, "grad_norm": 3.190590588823567, "learning_rate": 1.9307251530092937e-05, "loss": 0.8649, "step": 3094 }, { "epoch": 0.43702343970629765, "grad_norm": 4.188406345952829, "learning_rate": 1.9306693899905232e-05, "loss": 0.9387, "step": 3095 }, { "epoch": 0.43716464275628353, "grad_norm": 3.0633340570693552, "learning_rate": 1.930613605343398e-05, "loss": 0.8007, "step": 3096 }, { "epoch": 0.4373058458062694, "grad_norm": 4.238478260569049, "learning_rate": 1.9305577990692148e-05, "loss": 1.2214, "step": 3097 }, { "epoch": 0.4374470488562553, "grad_norm": 3.6520101148347135, "learning_rate": 1.93050197116927e-05, "loss": 1.0285, "step": 3098 }, { "epoch": 0.4375882519062412, "grad_norm": 3.431828767800515, "learning_rate": 1.9304461216448612e-05, "loss": 0.9831, "step": 3099 }, { "epoch": 0.43772945495622706, "grad_norm": 3.4110434426749006, "learning_rate": 1.9303902504972866e-05, "loss": 0.8503, "step": 3100 }, { "epoch": 0.43787065800621294, "grad_norm": 3.9110158690603223, "learning_rate": 1.9303343577278442e-05, "loss": 1.0785, "step": 3101 }, { "epoch": 0.4380118610561988, "grad_norm": 4.232109840186461, "learning_rate": 1.9302784433378333e-05, "loss": 1.2743, "step": 3102 }, { "epoch": 0.4381530641061847, "grad_norm": 3.986420497115517, "learning_rate": 1.930222507328553e-05, "loss": 0.9571, "step": 3103 }, { "epoch": 0.4382942671561706, "grad_norm": 3.7563424477687417, "learning_rate": 1.9301665497013034e-05, "loss": 1.057, "step": 3104 }, { "epoch": 0.4384354702061565, "grad_norm": 5.4073795662568545, "learning_rate": 1.930110570457385e-05, "loss": 1.052, "step": 3105 }, { "epoch": 0.43857667325614236, "grad_norm": 4.2123607959689835, "learning_rate": 1.9300545695980985e-05, "loss": 1.1207, "step": 3106 }, { "epoch": 0.4387178763061282, "grad_norm": 5.140158271535641, "learning_rate": 1.929998547124745e-05, "loss": 1.4517, "step": 3107 }, { "epoch": 0.43885907935611407, "grad_norm": 4.623403812100402, "learning_rate": 1.929942503038628e-05, "loss": 1.2624, "step": 3108 }, { "epoch": 0.43900028240609995, "grad_norm": 4.211892215808612, "learning_rate": 1.9298864373410477e-05, "loss": 1.1706, "step": 3109 }, { "epoch": 0.43914148545608583, "grad_norm": 3.717323900919158, "learning_rate": 1.9298303500333088e-05, "loss": 0.9268, "step": 3110 }, { "epoch": 0.4392826885060717, "grad_norm": 3.7191573374826246, "learning_rate": 1.929774241116714e-05, "loss": 0.9278, "step": 3111 }, { "epoch": 0.4394238915560576, "grad_norm": 3.6876162709137055, "learning_rate": 1.9297181105925675e-05, "loss": 1.029, "step": 3112 }, { "epoch": 0.4395650946060435, "grad_norm": 3.9312474403043147, "learning_rate": 1.9296619584621737e-05, "loss": 1.2042, "step": 3113 }, { "epoch": 0.43970629765602937, "grad_norm": 3.852238466815394, "learning_rate": 1.929605784726837e-05, "loss": 1.0399, "step": 3114 }, { "epoch": 0.43984750070601525, "grad_norm": 3.4566880499485877, "learning_rate": 1.9295495893878638e-05, "loss": 0.9795, "step": 3115 }, { "epoch": 0.43998870375600113, "grad_norm": 4.113201618084296, "learning_rate": 1.9294933724465593e-05, "loss": 1.3562, "step": 3116 }, { "epoch": 0.440129906805987, "grad_norm": 3.8468804061149426, "learning_rate": 1.9294371339042305e-05, "loss": 1.1798, "step": 3117 }, { "epoch": 0.4402711098559729, "grad_norm": 4.35408255915857, "learning_rate": 1.9293808737621837e-05, "loss": 1.0999, "step": 3118 }, { "epoch": 0.4404123129059588, "grad_norm": 4.129514889494459, "learning_rate": 1.929324592021727e-05, "loss": 1.0395, "step": 3119 }, { "epoch": 0.44055351595594466, "grad_norm": 3.8047420571519686, "learning_rate": 1.9292682886841683e-05, "loss": 1.0139, "step": 3120 }, { "epoch": 0.44069471900593055, "grad_norm": 3.9663320981329457, "learning_rate": 1.9292119637508157e-05, "loss": 0.9917, "step": 3121 }, { "epoch": 0.44083592205591643, "grad_norm": 3.586973470481621, "learning_rate": 1.9291556172229784e-05, "loss": 0.9907, "step": 3122 }, { "epoch": 0.4409771251059023, "grad_norm": 4.442665727267057, "learning_rate": 1.9290992491019657e-05, "loss": 1.0727, "step": 3123 }, { "epoch": 0.44111832815588814, "grad_norm": 4.094689875073246, "learning_rate": 1.929042859389088e-05, "loss": 1.0971, "step": 3124 }, { "epoch": 0.441259531205874, "grad_norm": 3.6442186111302775, "learning_rate": 1.928986448085655e-05, "loss": 1.0681, "step": 3125 }, { "epoch": 0.4414007342558599, "grad_norm": 3.4861332755296544, "learning_rate": 1.9289300151929784e-05, "loss": 0.9432, "step": 3126 }, { "epoch": 0.4415419373058458, "grad_norm": 4.248028679586546, "learning_rate": 1.9288735607123695e-05, "loss": 0.9943, "step": 3127 }, { "epoch": 0.44168314035583167, "grad_norm": 3.705672576117425, "learning_rate": 1.9288170846451402e-05, "loss": 0.9565, "step": 3128 }, { "epoch": 0.44182434340581755, "grad_norm": 4.355428299227021, "learning_rate": 1.928760586992603e-05, "loss": 1.1321, "step": 3129 }, { "epoch": 0.44196554645580344, "grad_norm": 3.9657743822931932, "learning_rate": 1.928704067756071e-05, "loss": 1.1105, "step": 3130 }, { "epoch": 0.4421067495057893, "grad_norm": 3.894343404317645, "learning_rate": 1.9286475269368574e-05, "loss": 1.0944, "step": 3131 }, { "epoch": 0.4422479525557752, "grad_norm": 3.9497216808509483, "learning_rate": 1.928590964536276e-05, "loss": 1.0607, "step": 3132 }, { "epoch": 0.4423891556057611, "grad_norm": 3.454483643893276, "learning_rate": 1.9285343805556418e-05, "loss": 0.8495, "step": 3133 }, { "epoch": 0.44253035865574697, "grad_norm": 3.8400205721348146, "learning_rate": 1.9284777749962696e-05, "loss": 1.1792, "step": 3134 }, { "epoch": 0.44267156170573285, "grad_norm": 3.8687367168419704, "learning_rate": 1.928421147859475e-05, "loss": 1.1146, "step": 3135 }, { "epoch": 0.44281276475571874, "grad_norm": 4.504090587302897, "learning_rate": 1.928364499146574e-05, "loss": 1.0815, "step": 3136 }, { "epoch": 0.4429539678057046, "grad_norm": 3.9472332764295297, "learning_rate": 1.9283078288588826e-05, "loss": 0.8455, "step": 3137 }, { "epoch": 0.4430951708556905, "grad_norm": 4.642613878242063, "learning_rate": 1.9282511369977185e-05, "loss": 1.2216, "step": 3138 }, { "epoch": 0.4432363739056764, "grad_norm": 3.710934033699281, "learning_rate": 1.9281944235643986e-05, "loss": 0.92, "step": 3139 }, { "epoch": 0.44337757695566227, "grad_norm": 3.5441624949324897, "learning_rate": 1.9281376885602412e-05, "loss": 0.9521, "step": 3140 }, { "epoch": 0.4435187800056481, "grad_norm": 4.0213182031788905, "learning_rate": 1.928080931986565e-05, "loss": 1.1498, "step": 3141 }, { "epoch": 0.443659983055634, "grad_norm": 3.5433944095505976, "learning_rate": 1.9280241538446885e-05, "loss": 1.0138, "step": 3142 }, { "epoch": 0.44380118610561986, "grad_norm": 3.738954619398003, "learning_rate": 1.9279673541359313e-05, "loss": 0.9401, "step": 3143 }, { "epoch": 0.44394238915560574, "grad_norm": 4.276428807787635, "learning_rate": 1.927910532861614e-05, "loss": 1.194, "step": 3144 }, { "epoch": 0.4440835922055916, "grad_norm": 4.546884445078243, "learning_rate": 1.9278536900230564e-05, "loss": 1.0508, "step": 3145 }, { "epoch": 0.4442247952555775, "grad_norm": 3.7622834397976104, "learning_rate": 1.9277968256215794e-05, "loss": 1.0597, "step": 3146 }, { "epoch": 0.4443659983055634, "grad_norm": 4.1101496165031595, "learning_rate": 1.9277399396585054e-05, "loss": 0.9759, "step": 3147 }, { "epoch": 0.4445072013555493, "grad_norm": 3.896317784091958, "learning_rate": 1.9276830321351558e-05, "loss": 1.2466, "step": 3148 }, { "epoch": 0.44464840440553516, "grad_norm": 3.8148628675246576, "learning_rate": 1.927626103052853e-05, "loss": 1.2526, "step": 3149 }, { "epoch": 0.44478960745552104, "grad_norm": 4.048905476139718, "learning_rate": 1.9275691524129203e-05, "loss": 1.0715, "step": 3150 }, { "epoch": 0.4449308105055069, "grad_norm": 3.4410239215511442, "learning_rate": 1.927512180216681e-05, "loss": 1.0097, "step": 3151 }, { "epoch": 0.4450720135554928, "grad_norm": 3.4607608419963447, "learning_rate": 1.9274551864654593e-05, "loss": 1.0933, "step": 3152 }, { "epoch": 0.4452132166054787, "grad_norm": 4.46369376071989, "learning_rate": 1.9273981711605793e-05, "loss": 1.1699, "step": 3153 }, { "epoch": 0.4453544196554646, "grad_norm": 4.210818690772342, "learning_rate": 1.9273411343033667e-05, "loss": 1.2356, "step": 3154 }, { "epoch": 0.44549562270545046, "grad_norm": 4.389419686466223, "learning_rate": 1.9272840758951464e-05, "loss": 0.9513, "step": 3155 }, { "epoch": 0.44563682575543634, "grad_norm": 3.891153503625258, "learning_rate": 1.9272269959372444e-05, "loss": 1.043, "step": 3156 }, { "epoch": 0.4457780288054222, "grad_norm": 4.208935098734498, "learning_rate": 1.927169894430988e-05, "loss": 1.1196, "step": 3157 }, { "epoch": 0.44591923185540805, "grad_norm": 3.620125914607161, "learning_rate": 1.9271127713777033e-05, "loss": 1.0904, "step": 3158 }, { "epoch": 0.44606043490539393, "grad_norm": 5.048032495465287, "learning_rate": 1.9270556267787184e-05, "loss": 1.5335, "step": 3159 }, { "epoch": 0.4462016379553798, "grad_norm": 4.419334627418588, "learning_rate": 1.926998460635361e-05, "loss": 1.0454, "step": 3160 }, { "epoch": 0.4463428410053657, "grad_norm": 3.872187238600624, "learning_rate": 1.9269412729489597e-05, "loss": 1.1314, "step": 3161 }, { "epoch": 0.4464840440553516, "grad_norm": 3.336098507424238, "learning_rate": 1.9268840637208436e-05, "loss": 1.0939, "step": 3162 }, { "epoch": 0.44662524710533746, "grad_norm": 4.1961448539736885, "learning_rate": 1.9268268329523422e-05, "loss": 1.0461, "step": 3163 }, { "epoch": 0.44676645015532335, "grad_norm": 3.999357295496932, "learning_rate": 1.926769580644785e-05, "loss": 1.1098, "step": 3164 }, { "epoch": 0.44690765320530923, "grad_norm": 3.336350848220422, "learning_rate": 1.9267123067995035e-05, "loss": 0.9214, "step": 3165 }, { "epoch": 0.4470488562552951, "grad_norm": 3.812111391485075, "learning_rate": 1.926655011417828e-05, "loss": 1.1797, "step": 3166 }, { "epoch": 0.447190059305281, "grad_norm": 4.202307440587516, "learning_rate": 1.92659769450109e-05, "loss": 1.0052, "step": 3167 }, { "epoch": 0.4473312623552669, "grad_norm": 3.878800013574339, "learning_rate": 1.9265403560506223e-05, "loss": 0.9326, "step": 3168 }, { "epoch": 0.44747246540525276, "grad_norm": 3.83858370828189, "learning_rate": 1.9264829960677564e-05, "loss": 1.0843, "step": 3169 }, { "epoch": 0.44761366845523864, "grad_norm": 4.029127778150835, "learning_rate": 1.9264256145538262e-05, "loss": 1.1457, "step": 3170 }, { "epoch": 0.4477548715052245, "grad_norm": 4.195250814615195, "learning_rate": 1.9263682115101644e-05, "loss": 1.0376, "step": 3171 }, { "epoch": 0.4478960745552104, "grad_norm": 3.555874022346718, "learning_rate": 1.926310786938106e-05, "loss": 1.0378, "step": 3172 }, { "epoch": 0.4480372776051963, "grad_norm": 4.780502820428922, "learning_rate": 1.9262533408389842e-05, "loss": 1.1452, "step": 3173 }, { "epoch": 0.4481784806551822, "grad_norm": 4.249107807085745, "learning_rate": 1.9261958732141352e-05, "loss": 1.0874, "step": 3174 }, { "epoch": 0.448319683705168, "grad_norm": 3.966767663947841, "learning_rate": 1.9261383840648943e-05, "loss": 1.0838, "step": 3175 }, { "epoch": 0.4484608867551539, "grad_norm": 3.273933104918183, "learning_rate": 1.926080873392597e-05, "loss": 0.9047, "step": 3176 }, { "epoch": 0.44860208980513977, "grad_norm": 4.252825690251586, "learning_rate": 1.92602334119858e-05, "loss": 1.3435, "step": 3177 }, { "epoch": 0.44874329285512565, "grad_norm": 4.014431225672729, "learning_rate": 1.925965787484181e-05, "loss": 0.9438, "step": 3178 }, { "epoch": 0.44888449590511154, "grad_norm": 4.702349910312314, "learning_rate": 1.9259082122507365e-05, "loss": 1.1884, "step": 3179 }, { "epoch": 0.4490256989550974, "grad_norm": 3.6141768518181725, "learning_rate": 1.9258506154995854e-05, "loss": 0.8488, "step": 3180 }, { "epoch": 0.4491669020050833, "grad_norm": 4.015523625208543, "learning_rate": 1.9257929972320653e-05, "loss": 1.0322, "step": 3181 }, { "epoch": 0.4493081050550692, "grad_norm": 3.4938026643625673, "learning_rate": 1.9257353574495164e-05, "loss": 1.0651, "step": 3182 }, { "epoch": 0.44944930810505507, "grad_norm": 3.4102269969836176, "learning_rate": 1.9256776961532773e-05, "loss": 0.8418, "step": 3183 }, { "epoch": 0.44959051115504095, "grad_norm": 5.2259054057796686, "learning_rate": 1.925620013344688e-05, "loss": 1.4204, "step": 3184 }, { "epoch": 0.44973171420502683, "grad_norm": 4.621444602251797, "learning_rate": 1.92556230902509e-05, "loss": 1.1346, "step": 3185 }, { "epoch": 0.4498729172550127, "grad_norm": 4.677880473354801, "learning_rate": 1.925504583195823e-05, "loss": 1.2539, "step": 3186 }, { "epoch": 0.4500141203049986, "grad_norm": 4.905825545203969, "learning_rate": 1.9254468358582293e-05, "loss": 1.2135, "step": 3187 }, { "epoch": 0.4501553233549845, "grad_norm": 3.5688093651142174, "learning_rate": 1.925389067013651e-05, "loss": 0.987, "step": 3188 }, { "epoch": 0.45029652640497037, "grad_norm": 3.473850155130419, "learning_rate": 1.9253312766634308e-05, "loss": 1.0975, "step": 3189 }, { "epoch": 0.45043772945495625, "grad_norm": 4.176480677601507, "learning_rate": 1.925273464808911e-05, "loss": 0.9384, "step": 3190 }, { "epoch": 0.45057893250494213, "grad_norm": 4.151353910240842, "learning_rate": 1.9252156314514353e-05, "loss": 1.1771, "step": 3191 }, { "epoch": 0.450720135554928, "grad_norm": 4.212822469303504, "learning_rate": 1.925157776592348e-05, "loss": 1.0241, "step": 3192 }, { "epoch": 0.45086133860491384, "grad_norm": 4.138883514757742, "learning_rate": 1.9250999002329937e-05, "loss": 1.2045, "step": 3193 }, { "epoch": 0.4510025416548997, "grad_norm": 4.450799634740918, "learning_rate": 1.925042002374717e-05, "loss": 1.2044, "step": 3194 }, { "epoch": 0.4511437447048856, "grad_norm": 3.8158990033837963, "learning_rate": 1.9249840830188636e-05, "loss": 1.0626, "step": 3195 }, { "epoch": 0.4512849477548715, "grad_norm": 3.5693973545212283, "learning_rate": 1.9249261421667796e-05, "loss": 0.9432, "step": 3196 }, { "epoch": 0.4514261508048574, "grad_norm": 3.776950976413844, "learning_rate": 1.9248681798198115e-05, "loss": 1.0015, "step": 3197 }, { "epoch": 0.45156735385484326, "grad_norm": 3.6542122613715495, "learning_rate": 1.9248101959793066e-05, "loss": 1.111, "step": 3198 }, { "epoch": 0.45170855690482914, "grad_norm": 4.157102983928154, "learning_rate": 1.924752190646612e-05, "loss": 1.0073, "step": 3199 }, { "epoch": 0.451849759954815, "grad_norm": 4.324627321850429, "learning_rate": 1.924694163823076e-05, "loss": 1.1751, "step": 3200 }, { "epoch": 0.4519909630048009, "grad_norm": 4.329472017987981, "learning_rate": 1.9246361155100466e-05, "loss": 1.2319, "step": 3201 }, { "epoch": 0.4521321660547868, "grad_norm": 3.8949344853269987, "learning_rate": 1.9245780457088736e-05, "loss": 0.8497, "step": 3202 }, { "epoch": 0.45227336910477267, "grad_norm": 4.076618400045525, "learning_rate": 1.924519954420906e-05, "loss": 1.1222, "step": 3203 }, { "epoch": 0.45241457215475855, "grad_norm": 3.422944268458539, "learning_rate": 1.9244618416474938e-05, "loss": 0.8355, "step": 3204 }, { "epoch": 0.45255577520474444, "grad_norm": 3.6937382444021916, "learning_rate": 1.9244037073899876e-05, "loss": 1.2263, "step": 3205 }, { "epoch": 0.4526969782547303, "grad_norm": 3.540297081990537, "learning_rate": 1.9243455516497388e-05, "loss": 0.9721, "step": 3206 }, { "epoch": 0.4528381813047162, "grad_norm": 3.2691056635944826, "learning_rate": 1.924287374428098e-05, "loss": 0.9816, "step": 3207 }, { "epoch": 0.4529793843547021, "grad_norm": 4.34274123705014, "learning_rate": 1.924229175726418e-05, "loss": 1.1461, "step": 3208 }, { "epoch": 0.45312058740468797, "grad_norm": 5.786347674953957, "learning_rate": 1.9241709555460514e-05, "loss": 0.8701, "step": 3209 }, { "epoch": 0.4532617904546738, "grad_norm": 3.8476425170383246, "learning_rate": 1.9241127138883508e-05, "loss": 1.0792, "step": 3210 }, { "epoch": 0.4534029935046597, "grad_norm": 3.4908081564891686, "learning_rate": 1.9240544507546696e-05, "loss": 1.0428, "step": 3211 }, { "epoch": 0.45354419655464556, "grad_norm": 4.68394375366079, "learning_rate": 1.9239961661463623e-05, "loss": 1.3825, "step": 3212 }, { "epoch": 0.45368539960463145, "grad_norm": 4.0428243504724435, "learning_rate": 1.923937860064783e-05, "loss": 1.0483, "step": 3213 }, { "epoch": 0.45382660265461733, "grad_norm": 3.411877559676967, "learning_rate": 1.9238795325112867e-05, "loss": 0.8916, "step": 3214 }, { "epoch": 0.4539678057046032, "grad_norm": 4.400073803298403, "learning_rate": 1.9238211834872293e-05, "loss": 0.9855, "step": 3215 }, { "epoch": 0.4541090087545891, "grad_norm": 3.8278615298839997, "learning_rate": 1.9237628129939665e-05, "loss": 1.1385, "step": 3216 }, { "epoch": 0.454250211804575, "grad_norm": 3.828211544190985, "learning_rate": 1.923704421032855e-05, "loss": 1.2312, "step": 3217 }, { "epoch": 0.45439141485456086, "grad_norm": 3.7602848217758735, "learning_rate": 1.9236460076052515e-05, "loss": 0.9298, "step": 3218 }, { "epoch": 0.45453261790454674, "grad_norm": 3.391953023647811, "learning_rate": 1.923587572712514e-05, "loss": 0.9322, "step": 3219 }, { "epoch": 0.4546738209545326, "grad_norm": 3.914184702358827, "learning_rate": 1.9235291163559996e-05, "loss": 1.0262, "step": 3220 }, { "epoch": 0.4548150240045185, "grad_norm": 3.6848019564135193, "learning_rate": 1.9234706385370677e-05, "loss": 1.0193, "step": 3221 }, { "epoch": 0.4549562270545044, "grad_norm": 3.977238516663836, "learning_rate": 1.923412139257077e-05, "loss": 0.9245, "step": 3222 }, { "epoch": 0.4550974301044903, "grad_norm": 3.3967267589522443, "learning_rate": 1.923353618517387e-05, "loss": 0.8006, "step": 3223 }, { "epoch": 0.45523863315447616, "grad_norm": 4.487494452625829, "learning_rate": 1.9232950763193576e-05, "loss": 1.1628, "step": 3224 }, { "epoch": 0.45537983620446204, "grad_norm": 3.4242316200166485, "learning_rate": 1.9232365126643494e-05, "loss": 1.0291, "step": 3225 }, { "epoch": 0.4555210392544479, "grad_norm": 3.6325689754040855, "learning_rate": 1.9231779275537233e-05, "loss": 0.9864, "step": 3226 }, { "epoch": 0.45566224230443375, "grad_norm": 5.153986691590836, "learning_rate": 1.923119320988841e-05, "loss": 0.9568, "step": 3227 }, { "epoch": 0.45580344535441963, "grad_norm": 3.5827889421150916, "learning_rate": 1.923060692971064e-05, "loss": 1.0245, "step": 3228 }, { "epoch": 0.4559446484044055, "grad_norm": 4.048807565714518, "learning_rate": 1.9230020435017553e-05, "loss": 1.0826, "step": 3229 }, { "epoch": 0.4560858514543914, "grad_norm": 3.8985654337209708, "learning_rate": 1.9229433725822776e-05, "loss": 1.2116, "step": 3230 }, { "epoch": 0.4562270545043773, "grad_norm": 3.494945232905836, "learning_rate": 1.9228846802139947e-05, "loss": 0.8652, "step": 3231 }, { "epoch": 0.45636825755436317, "grad_norm": 3.694960939011435, "learning_rate": 1.9228259663982705e-05, "loss": 1.0792, "step": 3232 }, { "epoch": 0.45650946060434905, "grad_norm": 3.6436933758651238, "learning_rate": 1.9227672311364692e-05, "loss": 0.9096, "step": 3233 }, { "epoch": 0.45665066365433493, "grad_norm": 4.147361020507568, "learning_rate": 1.922708474429956e-05, "loss": 1.3041, "step": 3234 }, { "epoch": 0.4567918667043208, "grad_norm": 3.6368159879778172, "learning_rate": 1.9226496962800967e-05, "loss": 0.9192, "step": 3235 }, { "epoch": 0.4569330697543067, "grad_norm": 3.761061322999015, "learning_rate": 1.9225908966882563e-05, "loss": 0.906, "step": 3236 }, { "epoch": 0.4570742728042926, "grad_norm": 3.5783071665991844, "learning_rate": 1.9225320756558023e-05, "loss": 0.8649, "step": 3237 }, { "epoch": 0.45721547585427846, "grad_norm": 3.4016674278946257, "learning_rate": 1.922473233184101e-05, "loss": 0.9599, "step": 3238 }, { "epoch": 0.45735667890426435, "grad_norm": 3.4131824281945202, "learning_rate": 1.9224143692745207e-05, "loss": 1.072, "step": 3239 }, { "epoch": 0.45749788195425023, "grad_norm": 3.2038719797443207, "learning_rate": 1.922355483928428e-05, "loss": 0.8694, "step": 3240 }, { "epoch": 0.4576390850042361, "grad_norm": 6.298507128308925, "learning_rate": 1.9222965771471926e-05, "loss": 1.1447, "step": 3241 }, { "epoch": 0.457780288054222, "grad_norm": 3.8624347210468044, "learning_rate": 1.922237648932183e-05, "loss": 1.0609, "step": 3242 }, { "epoch": 0.4579214911042079, "grad_norm": 3.762926437835544, "learning_rate": 1.922178699284769e-05, "loss": 1.1322, "step": 3243 }, { "epoch": 0.4580626941541937, "grad_norm": 4.122545857131325, "learning_rate": 1.92211972820632e-05, "loss": 0.9244, "step": 3244 }, { "epoch": 0.4582038972041796, "grad_norm": 3.7140080514467635, "learning_rate": 1.9220607356982072e-05, "loss": 1.1503, "step": 3245 }, { "epoch": 0.45834510025416547, "grad_norm": 3.870346059549802, "learning_rate": 1.9220017217618006e-05, "loss": 1.0078, "step": 3246 }, { "epoch": 0.45848630330415135, "grad_norm": 3.837957507562975, "learning_rate": 1.921942686398472e-05, "loss": 0.9192, "step": 3247 }, { "epoch": 0.45862750635413724, "grad_norm": 3.0234868647361375, "learning_rate": 1.921883629609594e-05, "loss": 0.8122, "step": 3248 }, { "epoch": 0.4587687094041231, "grad_norm": 3.60289038908179, "learning_rate": 1.9218245513965384e-05, "loss": 1.1266, "step": 3249 }, { "epoch": 0.458909912454109, "grad_norm": 4.022192950712532, "learning_rate": 1.9217654517606786e-05, "loss": 1.0814, "step": 3250 }, { "epoch": 0.4590511155040949, "grad_norm": 3.8035203402327795, "learning_rate": 1.9217063307033873e-05, "loss": 1.1504, "step": 3251 }, { "epoch": 0.45919231855408077, "grad_norm": 2.873008313082648, "learning_rate": 1.921647188226039e-05, "loss": 0.5885, "step": 3252 }, { "epoch": 0.45933352160406665, "grad_norm": 3.645328013036918, "learning_rate": 1.9215880243300082e-05, "loss": 1.0346, "step": 3253 }, { "epoch": 0.45947472465405254, "grad_norm": 4.140143924945763, "learning_rate": 1.92152883901667e-05, "loss": 0.98, "step": 3254 }, { "epoch": 0.4596159277040384, "grad_norm": 4.649721394491685, "learning_rate": 1.921469632287399e-05, "loss": 1.1418, "step": 3255 }, { "epoch": 0.4597571307540243, "grad_norm": 3.73452511273414, "learning_rate": 1.921410404143572e-05, "loss": 1.1916, "step": 3256 }, { "epoch": 0.4598983338040102, "grad_norm": 3.053882117152354, "learning_rate": 1.921351154586565e-05, "loss": 0.9649, "step": 3257 }, { "epoch": 0.46003953685399607, "grad_norm": 3.7891000619228676, "learning_rate": 1.9212918836177555e-05, "loss": 1.2185, "step": 3258 }, { "epoch": 0.46018073990398195, "grad_norm": 3.5687267693323674, "learning_rate": 1.9212325912385202e-05, "loss": 0.8575, "step": 3259 }, { "epoch": 0.46032194295396783, "grad_norm": 3.4753484635705605, "learning_rate": 1.9211732774502372e-05, "loss": 0.8893, "step": 3260 }, { "epoch": 0.46046314600395366, "grad_norm": 3.4974104775920845, "learning_rate": 1.9211139422542853e-05, "loss": 0.9122, "step": 3261 }, { "epoch": 0.46060434905393954, "grad_norm": 3.855428089650259, "learning_rate": 1.921054585652043e-05, "loss": 1.0834, "step": 3262 }, { "epoch": 0.4607455521039254, "grad_norm": 3.9315202051609317, "learning_rate": 1.92099520764489e-05, "loss": 0.9906, "step": 3263 }, { "epoch": 0.4608867551539113, "grad_norm": 4.95147550440065, "learning_rate": 1.920935808234206e-05, "loss": 1.0549, "step": 3264 }, { "epoch": 0.4610279582038972, "grad_norm": 3.3676060998763693, "learning_rate": 1.920876387421372e-05, "loss": 0.9737, "step": 3265 }, { "epoch": 0.4611691612538831, "grad_norm": 3.17282493529078, "learning_rate": 1.9208169452077678e-05, "loss": 0.8634, "step": 3266 }, { "epoch": 0.46131036430386896, "grad_norm": 3.485659920294613, "learning_rate": 1.9207574815947757e-05, "loss": 0.9477, "step": 3267 }, { "epoch": 0.46145156735385484, "grad_norm": 3.997013210249467, "learning_rate": 1.9206979965837775e-05, "loss": 1.0195, "step": 3268 }, { "epoch": 0.4615927704038407, "grad_norm": 3.2430318444835815, "learning_rate": 1.920638490176155e-05, "loss": 0.9415, "step": 3269 }, { "epoch": 0.4617339734538266, "grad_norm": 3.569377441483567, "learning_rate": 1.9205789623732923e-05, "loss": 1.0643, "step": 3270 }, { "epoch": 0.4618751765038125, "grad_norm": 3.565221312060194, "learning_rate": 1.920519413176572e-05, "loss": 1.0857, "step": 3271 }, { "epoch": 0.4620163795537984, "grad_norm": 4.101146110350876, "learning_rate": 1.9204598425873773e-05, "loss": 1.2772, "step": 3272 }, { "epoch": 0.46215758260378426, "grad_norm": 3.805401357939564, "learning_rate": 1.9204002506070944e-05, "loss": 1.1069, "step": 3273 }, { "epoch": 0.46229878565377014, "grad_norm": 4.044722092141315, "learning_rate": 1.9203406372371065e-05, "loss": 1.411, "step": 3274 }, { "epoch": 0.462439988703756, "grad_norm": 3.7655287109120072, "learning_rate": 1.9202810024787998e-05, "loss": 1.1042, "step": 3275 }, { "epoch": 0.4625811917537419, "grad_norm": 4.322226676382161, "learning_rate": 1.92022134633356e-05, "loss": 1.1562, "step": 3276 }, { "epoch": 0.4627223948037278, "grad_norm": 3.233857206635366, "learning_rate": 1.920161668802774e-05, "loss": 1.0165, "step": 3277 }, { "epoch": 0.4628635978537136, "grad_norm": 3.777777701762446, "learning_rate": 1.9201019698878272e-05, "loss": 1.1327, "step": 3278 }, { "epoch": 0.4630048009036995, "grad_norm": 4.224321182739448, "learning_rate": 1.920042249590109e-05, "loss": 0.9552, "step": 3279 }, { "epoch": 0.4631460039536854, "grad_norm": 3.234864432949824, "learning_rate": 1.919982507911006e-05, "loss": 1.0041, "step": 3280 }, { "epoch": 0.46328720700367126, "grad_norm": 4.439937662265556, "learning_rate": 1.9199227448519065e-05, "loss": 1.4331, "step": 3281 }, { "epoch": 0.46342841005365715, "grad_norm": 3.667322510482209, "learning_rate": 1.9198629604141996e-05, "loss": 0.9611, "step": 3282 }, { "epoch": 0.46356961310364303, "grad_norm": 3.304941181394408, "learning_rate": 1.919803154599275e-05, "loss": 0.9377, "step": 3283 }, { "epoch": 0.4637108161536289, "grad_norm": 3.962774236512375, "learning_rate": 1.9197433274085225e-05, "loss": 1.1008, "step": 3284 }, { "epoch": 0.4638520192036148, "grad_norm": 3.603592865761391, "learning_rate": 1.9196834788433323e-05, "loss": 0.9999, "step": 3285 }, { "epoch": 0.4639932222536007, "grad_norm": 4.158152705938979, "learning_rate": 1.919623608905095e-05, "loss": 1.092, "step": 3286 }, { "epoch": 0.46413442530358656, "grad_norm": 3.726925852976475, "learning_rate": 1.919563717595202e-05, "loss": 0.9415, "step": 3287 }, { "epoch": 0.46427562835357244, "grad_norm": 5.409670889653258, "learning_rate": 1.9195038049150455e-05, "loss": 1.3175, "step": 3288 }, { "epoch": 0.46441683140355833, "grad_norm": 3.906924641959383, "learning_rate": 1.919443870866018e-05, "loss": 1.0551, "step": 3289 }, { "epoch": 0.4645580344535442, "grad_norm": 3.3237005905549597, "learning_rate": 1.919383915449512e-05, "loss": 0.9839, "step": 3290 }, { "epoch": 0.4646992375035301, "grad_norm": 4.38796995181864, "learning_rate": 1.9193239386669203e-05, "loss": 1.1641, "step": 3291 }, { "epoch": 0.464840440553516, "grad_norm": 4.140249326545954, "learning_rate": 1.9192639405196377e-05, "loss": 1.1435, "step": 3292 }, { "epoch": 0.46498164360350186, "grad_norm": 4.202280684250058, "learning_rate": 1.919203921009058e-05, "loss": 1.2459, "step": 3293 }, { "epoch": 0.46512284665348774, "grad_norm": 3.775757864882608, "learning_rate": 1.9191438801365763e-05, "loss": 1.0507, "step": 3294 }, { "epoch": 0.46526404970347357, "grad_norm": 3.6886057590269186, "learning_rate": 1.9190838179035873e-05, "loss": 1.1869, "step": 3295 }, { "epoch": 0.46540525275345945, "grad_norm": 3.8683924393917333, "learning_rate": 1.919023734311488e-05, "loss": 1.1382, "step": 3296 }, { "epoch": 0.46554645580344534, "grad_norm": 3.3853266904557278, "learning_rate": 1.9189636293616733e-05, "loss": 1.074, "step": 3297 }, { "epoch": 0.4656876588534312, "grad_norm": 3.631061322892458, "learning_rate": 1.918903503055541e-05, "loss": 1.0372, "step": 3298 }, { "epoch": 0.4658288619034171, "grad_norm": 3.3759101287018254, "learning_rate": 1.9188433553944885e-05, "loss": 0.9557, "step": 3299 }, { "epoch": 0.465970064953403, "grad_norm": 4.3798646728298385, "learning_rate": 1.918783186379913e-05, "loss": 0.9693, "step": 3300 }, { "epoch": 0.46611126800338887, "grad_norm": 4.331400534055562, "learning_rate": 1.9187229960132128e-05, "loss": 1.5027, "step": 3301 }, { "epoch": 0.46625247105337475, "grad_norm": 4.720676014817762, "learning_rate": 1.9186627842957873e-05, "loss": 1.195, "step": 3302 }, { "epoch": 0.46639367410336063, "grad_norm": 3.6493280427964048, "learning_rate": 1.9186025512290352e-05, "loss": 0.9998, "step": 3303 }, { "epoch": 0.4665348771533465, "grad_norm": 4.510650857367312, "learning_rate": 1.9185422968143566e-05, "loss": 1.187, "step": 3304 }, { "epoch": 0.4666760802033324, "grad_norm": 4.006477593473193, "learning_rate": 1.9184820210531517e-05, "loss": 1.0461, "step": 3305 }, { "epoch": 0.4668172832533183, "grad_norm": 4.389180114036907, "learning_rate": 1.9184217239468213e-05, "loss": 1.1824, "step": 3306 }, { "epoch": 0.46695848630330417, "grad_norm": 3.1148121885518014, "learning_rate": 1.9183614054967666e-05, "loss": 0.8554, "step": 3307 }, { "epoch": 0.46709968935329005, "grad_norm": 3.5454517097745555, "learning_rate": 1.9183010657043894e-05, "loss": 1.1337, "step": 3308 }, { "epoch": 0.46724089240327593, "grad_norm": 3.6391331391301347, "learning_rate": 1.9182407045710923e-05, "loss": 1.0003, "step": 3309 }, { "epoch": 0.4673820954532618, "grad_norm": 3.909022071053452, "learning_rate": 1.9181803220982776e-05, "loss": 1.1596, "step": 3310 }, { "epoch": 0.4675232985032477, "grad_norm": 3.6685462954574475, "learning_rate": 1.9181199182873488e-05, "loss": 0.9759, "step": 3311 }, { "epoch": 0.4676645015532335, "grad_norm": 4.084127123071047, "learning_rate": 1.9180594931397094e-05, "loss": 0.9751, "step": 3312 }, { "epoch": 0.4678057046032194, "grad_norm": 3.651758091998305, "learning_rate": 1.917999046656764e-05, "loss": 0.8351, "step": 3313 }, { "epoch": 0.4679469076532053, "grad_norm": 3.37359982088584, "learning_rate": 1.9179385788399176e-05, "loss": 1.0016, "step": 3314 }, { "epoch": 0.4680881107031912, "grad_norm": 3.8193073034472342, "learning_rate": 1.917878089690574e-05, "loss": 0.7642, "step": 3315 }, { "epoch": 0.46822931375317706, "grad_norm": 4.243218305957036, "learning_rate": 1.917817579210141e-05, "loss": 1.1405, "step": 3316 }, { "epoch": 0.46837051680316294, "grad_norm": 3.7397283469993203, "learning_rate": 1.9177570474000236e-05, "loss": 1.0378, "step": 3317 }, { "epoch": 0.4685117198531488, "grad_norm": 3.8524499008169655, "learning_rate": 1.9176964942616286e-05, "loss": 0.9126, "step": 3318 }, { "epoch": 0.4686529229031347, "grad_norm": 4.358334008192972, "learning_rate": 1.9176359197963634e-05, "loss": 0.9826, "step": 3319 }, { "epoch": 0.4687941259531206, "grad_norm": 3.146033549140663, "learning_rate": 1.917575324005636e-05, "loss": 0.8685, "step": 3320 }, { "epoch": 0.46893532900310647, "grad_norm": 3.5361657760773944, "learning_rate": 1.9175147068908543e-05, "loss": 0.9492, "step": 3321 }, { "epoch": 0.46907653205309235, "grad_norm": 4.050771780955828, "learning_rate": 1.9174540684534267e-05, "loss": 1.1089, "step": 3322 }, { "epoch": 0.46921773510307824, "grad_norm": 4.031123374446859, "learning_rate": 1.9173934086947626e-05, "loss": 1.1549, "step": 3323 }, { "epoch": 0.4693589381530641, "grad_norm": 3.6808566040096102, "learning_rate": 1.9173327276162724e-05, "loss": 0.9391, "step": 3324 }, { "epoch": 0.46950014120305, "grad_norm": 3.414192561757775, "learning_rate": 1.9172720252193657e-05, "loss": 0.9217, "step": 3325 }, { "epoch": 0.4696413442530359, "grad_norm": 3.534271114506234, "learning_rate": 1.917211301505453e-05, "loss": 1.0627, "step": 3326 }, { "epoch": 0.46978254730302177, "grad_norm": 3.1371871383904257, "learning_rate": 1.9171505564759463e-05, "loss": 0.8956, "step": 3327 }, { "epoch": 0.46992375035300765, "grad_norm": 3.1148931676990053, "learning_rate": 1.9170897901322563e-05, "loss": 0.9354, "step": 3328 }, { "epoch": 0.4700649534029935, "grad_norm": 3.7373959335949323, "learning_rate": 1.9170290024757958e-05, "loss": 0.9131, "step": 3329 }, { "epoch": 0.47020615645297936, "grad_norm": 4.379979492777393, "learning_rate": 1.916968193507977e-05, "loss": 1.2751, "step": 3330 }, { "epoch": 0.47034735950296525, "grad_norm": 4.096582047031259, "learning_rate": 1.916907363230214e-05, "loss": 0.9532, "step": 3331 }, { "epoch": 0.47048856255295113, "grad_norm": 4.223248714636114, "learning_rate": 1.9168465116439196e-05, "loss": 1.3101, "step": 3332 }, { "epoch": 0.470629765602937, "grad_norm": 3.678626427008912, "learning_rate": 1.9167856387505077e-05, "loss": 1.1846, "step": 3333 }, { "epoch": 0.4707709686529229, "grad_norm": 3.431736025451667, "learning_rate": 1.916724744551394e-05, "loss": 0.8871, "step": 3334 }, { "epoch": 0.4709121717029088, "grad_norm": 4.16266559232517, "learning_rate": 1.916663829047993e-05, "loss": 1.145, "step": 3335 }, { "epoch": 0.47105337475289466, "grad_norm": 3.7829351922468515, "learning_rate": 1.9166028922417208e-05, "loss": 1.0593, "step": 3336 }, { "epoch": 0.47119457780288054, "grad_norm": 4.4505326592376795, "learning_rate": 1.916541934133993e-05, "loss": 1.2792, "step": 3337 }, { "epoch": 0.4713357808528664, "grad_norm": 4.345264219268654, "learning_rate": 1.9164809547262262e-05, "loss": 1.1158, "step": 3338 }, { "epoch": 0.4714769839028523, "grad_norm": 4.263010309211308, "learning_rate": 1.9164199540198382e-05, "loss": 1.1482, "step": 3339 }, { "epoch": 0.4716181869528382, "grad_norm": 3.9620576835071293, "learning_rate": 1.916358932016246e-05, "loss": 1.1323, "step": 3340 }, { "epoch": 0.4717593900028241, "grad_norm": 3.8253566121026914, "learning_rate": 1.916297888716868e-05, "loss": 0.9799, "step": 3341 }, { "epoch": 0.47190059305280996, "grad_norm": 3.868849443730833, "learning_rate": 1.916236824123123e-05, "loss": 1.1715, "step": 3342 }, { "epoch": 0.47204179610279584, "grad_norm": 3.4331809920474283, "learning_rate": 1.9161757382364295e-05, "loss": 1.0173, "step": 3343 }, { "epoch": 0.4721829991527817, "grad_norm": 3.3121973968804315, "learning_rate": 1.916114631058208e-05, "loss": 0.8776, "step": 3344 }, { "epoch": 0.4723242022027676, "grad_norm": 3.6196048207023357, "learning_rate": 1.9160535025898777e-05, "loss": 1.0114, "step": 3345 }, { "epoch": 0.47246540525275343, "grad_norm": 3.841139430324358, "learning_rate": 1.91599235283286e-05, "loss": 0.976, "step": 3346 }, { "epoch": 0.4726066083027393, "grad_norm": 3.2535983588129977, "learning_rate": 1.9159311817885756e-05, "loss": 0.924, "step": 3347 }, { "epoch": 0.4727478113527252, "grad_norm": 4.207795702308853, "learning_rate": 1.9158699894584456e-05, "loss": 1.0479, "step": 3348 }, { "epoch": 0.4728890144027111, "grad_norm": 3.7235157780424117, "learning_rate": 1.915808775843893e-05, "loss": 0.9604, "step": 3349 }, { "epoch": 0.47303021745269697, "grad_norm": 3.3869835210676302, "learning_rate": 1.9157475409463396e-05, "loss": 1.0391, "step": 3350 }, { "epoch": 0.47317142050268285, "grad_norm": 3.771252396405334, "learning_rate": 1.915686284767209e-05, "loss": 1.1861, "step": 3351 }, { "epoch": 0.47331262355266873, "grad_norm": 4.913802258641976, "learning_rate": 1.915625007307925e-05, "loss": 0.9709, "step": 3352 }, { "epoch": 0.4734538266026546, "grad_norm": 4.169855488151496, "learning_rate": 1.915563708569911e-05, "loss": 1.2919, "step": 3353 }, { "epoch": 0.4735950296526405, "grad_norm": 4.841798757193291, "learning_rate": 1.9155023885545914e-05, "loss": 1.3269, "step": 3354 }, { "epoch": 0.4737362327026264, "grad_norm": 4.344741116668644, "learning_rate": 1.915441047263392e-05, "loss": 1.195, "step": 3355 }, { "epoch": 0.47387743575261226, "grad_norm": 5.05795557088238, "learning_rate": 1.915379684697738e-05, "loss": 0.9277, "step": 3356 }, { "epoch": 0.47401863880259815, "grad_norm": 4.0369855445860425, "learning_rate": 1.9153183008590556e-05, "loss": 1.1293, "step": 3357 }, { "epoch": 0.47415984185258403, "grad_norm": 4.10432725311996, "learning_rate": 1.915256895748771e-05, "loss": 1.0313, "step": 3358 }, { "epoch": 0.4743010449025699, "grad_norm": 4.338154144495249, "learning_rate": 1.9151954693683114e-05, "loss": 1.3287, "step": 3359 }, { "epoch": 0.4744422479525558, "grad_norm": 4.6749347006831155, "learning_rate": 1.9151340217191042e-05, "loss": 1.166, "step": 3360 }, { "epoch": 0.4745834510025417, "grad_norm": 3.657026285979219, "learning_rate": 1.915072552802578e-05, "loss": 1.0037, "step": 3361 }, { "epoch": 0.47472465405252756, "grad_norm": 3.7791820910598624, "learning_rate": 1.9150110626201604e-05, "loss": 0.9711, "step": 3362 }, { "epoch": 0.4748658571025134, "grad_norm": 3.5461768543719345, "learning_rate": 1.914949551173281e-05, "loss": 1.0482, "step": 3363 }, { "epoch": 0.47500706015249927, "grad_norm": 4.8062586768195645, "learning_rate": 1.9148880184633695e-05, "loss": 1.3734, "step": 3364 }, { "epoch": 0.47514826320248515, "grad_norm": 4.374910670456668, "learning_rate": 1.9148264644918552e-05, "loss": 1.1175, "step": 3365 }, { "epoch": 0.47528946625247104, "grad_norm": 4.026065751800876, "learning_rate": 1.914764889260169e-05, "loss": 1.1894, "step": 3366 }, { "epoch": 0.4754306693024569, "grad_norm": 3.7920521662779607, "learning_rate": 1.914703292769742e-05, "loss": 1.0866, "step": 3367 }, { "epoch": 0.4755718723524428, "grad_norm": 5.109543316912532, "learning_rate": 1.914641675022005e-05, "loss": 1.3375, "step": 3368 }, { "epoch": 0.4757130754024287, "grad_norm": 4.096015476329699, "learning_rate": 1.914580036018391e-05, "loss": 0.8131, "step": 3369 }, { "epoch": 0.47585427845241457, "grad_norm": 3.739770391142123, "learning_rate": 1.914518375760332e-05, "loss": 1.1147, "step": 3370 }, { "epoch": 0.47599548150240045, "grad_norm": 3.429525721763435, "learning_rate": 1.9144566942492605e-05, "loss": 0.97, "step": 3371 }, { "epoch": 0.47613668455238634, "grad_norm": 4.086845182527498, "learning_rate": 1.9143949914866106e-05, "loss": 0.8889, "step": 3372 }, { "epoch": 0.4762778876023722, "grad_norm": 4.007898923481087, "learning_rate": 1.914333267473816e-05, "loss": 1.1152, "step": 3373 }, { "epoch": 0.4764190906523581, "grad_norm": 3.5293601277830806, "learning_rate": 1.914271522212311e-05, "loss": 0.8811, "step": 3374 }, { "epoch": 0.476560293702344, "grad_norm": 3.6795014599239586, "learning_rate": 1.914209755703531e-05, "loss": 0.9058, "step": 3375 }, { "epoch": 0.47670149675232987, "grad_norm": 3.1694543516812894, "learning_rate": 1.9141479679489107e-05, "loss": 0.9004, "step": 3376 }, { "epoch": 0.47684269980231575, "grad_norm": 3.4168555152900706, "learning_rate": 1.9140861589498866e-05, "loss": 0.7709, "step": 3377 }, { "epoch": 0.47698390285230163, "grad_norm": 3.0760120411511913, "learning_rate": 1.914024328707895e-05, "loss": 0.7947, "step": 3378 }, { "epoch": 0.4771251059022875, "grad_norm": 4.9695802275087715, "learning_rate": 1.9139624772243724e-05, "loss": 1.1513, "step": 3379 }, { "epoch": 0.47726630895227334, "grad_norm": 3.2886750952298045, "learning_rate": 1.9139006045007567e-05, "loss": 0.9815, "step": 3380 }, { "epoch": 0.4774075120022592, "grad_norm": 3.6092828861646953, "learning_rate": 1.913838710538486e-05, "loss": 0.8794, "step": 3381 }, { "epoch": 0.4775487150522451, "grad_norm": 3.9928262874300344, "learning_rate": 1.913776795338998e-05, "loss": 1.0127, "step": 3382 }, { "epoch": 0.477689918102231, "grad_norm": 4.057518777495451, "learning_rate": 1.9137148589037314e-05, "loss": 0.9195, "step": 3383 }, { "epoch": 0.4778311211522169, "grad_norm": 3.7191258122122197, "learning_rate": 1.9136529012341268e-05, "loss": 0.8754, "step": 3384 }, { "epoch": 0.47797232420220276, "grad_norm": 4.162240763158855, "learning_rate": 1.913590922331623e-05, "loss": 1.3772, "step": 3385 }, { "epoch": 0.47811352725218864, "grad_norm": 4.587661154824074, "learning_rate": 1.9135289221976608e-05, "loss": 1.2216, "step": 3386 }, { "epoch": 0.4782547303021745, "grad_norm": 3.6660419428148123, "learning_rate": 1.9134669008336814e-05, "loss": 0.9977, "step": 3387 }, { "epoch": 0.4783959333521604, "grad_norm": 3.920102964329545, "learning_rate": 1.9134048582411253e-05, "loss": 1.1895, "step": 3388 }, { "epoch": 0.4785371364021463, "grad_norm": 3.5015718995174887, "learning_rate": 1.9133427944214348e-05, "loss": 0.9435, "step": 3389 }, { "epoch": 0.4786783394521322, "grad_norm": 3.4630652288161676, "learning_rate": 1.9132807093760523e-05, "loss": 0.8616, "step": 3390 }, { "epoch": 0.47881954250211806, "grad_norm": 3.9356124631918106, "learning_rate": 1.9132186031064203e-05, "loss": 1.092, "step": 3391 }, { "epoch": 0.47896074555210394, "grad_norm": 3.371554290552784, "learning_rate": 1.9131564756139824e-05, "loss": 1.0282, "step": 3392 }, { "epoch": 0.4791019486020898, "grad_norm": 3.3672220502779506, "learning_rate": 1.9130943269001826e-05, "loss": 1.0422, "step": 3393 }, { "epoch": 0.4792431516520757, "grad_norm": 3.4616552002763203, "learning_rate": 1.9130321569664646e-05, "loss": 0.9711, "step": 3394 }, { "epoch": 0.4793843547020616, "grad_norm": 3.2863179515317205, "learning_rate": 1.9129699658142738e-05, "loss": 0.9863, "step": 3395 }, { "epoch": 0.47952555775204747, "grad_norm": 3.292545804068655, "learning_rate": 1.9129077534450556e-05, "loss": 0.9525, "step": 3396 }, { "epoch": 0.4796667608020333, "grad_norm": 4.005660207315239, "learning_rate": 1.912845519860255e-05, "loss": 1.2757, "step": 3397 }, { "epoch": 0.4798079638520192, "grad_norm": 3.480320998315098, "learning_rate": 1.912783265061319e-05, "loss": 1.0295, "step": 3398 }, { "epoch": 0.47994916690200506, "grad_norm": 3.851292362227884, "learning_rate": 1.9127209890496942e-05, "loss": 0.9082, "step": 3399 }, { "epoch": 0.48009036995199095, "grad_norm": 3.722702756613766, "learning_rate": 1.9126586918268275e-05, "loss": 0.9807, "step": 3400 }, { "epoch": 0.48023157300197683, "grad_norm": 3.0578769712701255, "learning_rate": 1.912596373394167e-05, "loss": 0.9171, "step": 3401 }, { "epoch": 0.4803727760519627, "grad_norm": 3.847021197440844, "learning_rate": 1.9125340337531612e-05, "loss": 0.902, "step": 3402 }, { "epoch": 0.4805139791019486, "grad_norm": 4.155291127453842, "learning_rate": 1.912471672905258e-05, "loss": 1.2731, "step": 3403 }, { "epoch": 0.4806551821519345, "grad_norm": 4.1879517239443835, "learning_rate": 1.912409290851908e-05, "loss": 1.0017, "step": 3404 }, { "epoch": 0.48079638520192036, "grad_norm": 3.575517224720626, "learning_rate": 1.9123468875945594e-05, "loss": 0.8847, "step": 3405 }, { "epoch": 0.48093758825190625, "grad_norm": 3.6504548416653932, "learning_rate": 1.9122844631346632e-05, "loss": 1.0449, "step": 3406 }, { "epoch": 0.48107879130189213, "grad_norm": 4.349637777887388, "learning_rate": 1.9122220174736706e-05, "loss": 1.2529, "step": 3407 }, { "epoch": 0.481219994351878, "grad_norm": 3.7312052831405973, "learning_rate": 1.9121595506130317e-05, "loss": 0.9394, "step": 3408 }, { "epoch": 0.4813611974018639, "grad_norm": 3.5449884862195407, "learning_rate": 1.912097062554199e-05, "loss": 1.0042, "step": 3409 }, { "epoch": 0.4815024004518498, "grad_norm": 3.8421657485544785, "learning_rate": 1.9120345532986243e-05, "loss": 1.1366, "step": 3410 }, { "epoch": 0.48164360350183566, "grad_norm": 3.902178560780491, "learning_rate": 1.9119720228477607e-05, "loss": 1.0216, "step": 3411 }, { "epoch": 0.48178480655182154, "grad_norm": 3.680504901300528, "learning_rate": 1.911909471203061e-05, "loss": 1.0282, "step": 3412 }, { "epoch": 0.4819260096018074, "grad_norm": 4.018592863261293, "learning_rate": 1.911846898365979e-05, "loss": 1.202, "step": 3413 }, { "epoch": 0.48206721265179325, "grad_norm": 3.034031123555134, "learning_rate": 1.911784304337969e-05, "loss": 0.7451, "step": 3414 }, { "epoch": 0.48220841570177914, "grad_norm": 3.778542591206886, "learning_rate": 1.9117216891204856e-05, "loss": 1.0307, "step": 3415 }, { "epoch": 0.482349618751765, "grad_norm": 3.8424665883673006, "learning_rate": 1.911659052714984e-05, "loss": 1.1489, "step": 3416 }, { "epoch": 0.4824908218017509, "grad_norm": 3.831411086354194, "learning_rate": 1.9115963951229194e-05, "loss": 0.9748, "step": 3417 }, { "epoch": 0.4826320248517368, "grad_norm": 4.234331546700752, "learning_rate": 1.911533716345748e-05, "loss": 1.2547, "step": 3418 }, { "epoch": 0.48277322790172267, "grad_norm": 3.418024359677057, "learning_rate": 1.9114710163849273e-05, "loss": 0.9555, "step": 3419 }, { "epoch": 0.48291443095170855, "grad_norm": 3.529881097803363, "learning_rate": 1.9114082952419134e-05, "loss": 1.1858, "step": 3420 }, { "epoch": 0.48305563400169443, "grad_norm": 3.371579821420477, "learning_rate": 1.9113455529181645e-05, "loss": 1.0823, "step": 3421 }, { "epoch": 0.4831968370516803, "grad_norm": 3.923038124732678, "learning_rate": 1.9112827894151386e-05, "loss": 0.9391, "step": 3422 }, { "epoch": 0.4833380401016662, "grad_norm": 3.401025704207994, "learning_rate": 1.911220004734294e-05, "loss": 1.005, "step": 3423 }, { "epoch": 0.4834792431516521, "grad_norm": 4.420814529132933, "learning_rate": 1.9111571988770903e-05, "loss": 1.1184, "step": 3424 }, { "epoch": 0.48362044620163797, "grad_norm": 3.885034489127312, "learning_rate": 1.9110943718449867e-05, "loss": 1.1469, "step": 3425 }, { "epoch": 0.48376164925162385, "grad_norm": 4.410805753695287, "learning_rate": 1.9110315236394434e-05, "loss": 1.4134, "step": 3426 }, { "epoch": 0.48390285230160973, "grad_norm": 4.326184314873575, "learning_rate": 1.910968654261921e-05, "loss": 1.0962, "step": 3427 }, { "epoch": 0.4840440553515956, "grad_norm": 3.2805236328412475, "learning_rate": 1.9109057637138805e-05, "loss": 0.8128, "step": 3428 }, { "epoch": 0.4841852584015815, "grad_norm": 3.785453894308553, "learning_rate": 1.9108428519967832e-05, "loss": 0.9858, "step": 3429 }, { "epoch": 0.4843264614515674, "grad_norm": 4.010849863576899, "learning_rate": 1.9107799191120913e-05, "loss": 1.0224, "step": 3430 }, { "epoch": 0.4844676645015532, "grad_norm": 3.848826125711394, "learning_rate": 1.910716965061268e-05, "loss": 0.9441, "step": 3431 }, { "epoch": 0.4846088675515391, "grad_norm": 3.7065182647356822, "learning_rate": 1.910653989845775e-05, "loss": 0.9439, "step": 3432 }, { "epoch": 0.484750070601525, "grad_norm": 4.122174485099892, "learning_rate": 1.9105909934670768e-05, "loss": 1.186, "step": 3433 }, { "epoch": 0.48489127365151086, "grad_norm": 3.578719099847629, "learning_rate": 1.9105279759266376e-05, "loss": 1.2185, "step": 3434 }, { "epoch": 0.48503247670149674, "grad_norm": 3.6809531029288336, "learning_rate": 1.910464937225921e-05, "loss": 0.9385, "step": 3435 }, { "epoch": 0.4851736797514826, "grad_norm": 3.929430668705745, "learning_rate": 1.9104018773663924e-05, "loss": 1.1098, "step": 3436 }, { "epoch": 0.4853148828014685, "grad_norm": 3.9409411212468095, "learning_rate": 1.910338796349518e-05, "loss": 1.1119, "step": 3437 }, { "epoch": 0.4854560858514544, "grad_norm": 3.8260548324995924, "learning_rate": 1.9102756941767625e-05, "loss": 0.9604, "step": 3438 }, { "epoch": 0.48559728890144027, "grad_norm": 3.9404208700750605, "learning_rate": 1.910212570849593e-05, "loss": 0.9844, "step": 3439 }, { "epoch": 0.48573849195142615, "grad_norm": 4.126591035097861, "learning_rate": 1.9101494263694764e-05, "loss": 1.1435, "step": 3440 }, { "epoch": 0.48587969500141204, "grad_norm": 3.645127778470133, "learning_rate": 1.9100862607378807e-05, "loss": 1.2501, "step": 3441 }, { "epoch": 0.4860208980513979, "grad_norm": 3.822226688723868, "learning_rate": 1.9100230739562728e-05, "loss": 1.0962, "step": 3442 }, { "epoch": 0.4861621011013838, "grad_norm": 5.007249045939249, "learning_rate": 1.9099598660261217e-05, "loss": 1.1519, "step": 3443 }, { "epoch": 0.4863033041513697, "grad_norm": 3.964620345455004, "learning_rate": 1.9098966369488967e-05, "loss": 1.183, "step": 3444 }, { "epoch": 0.48644450720135557, "grad_norm": 3.812646627025024, "learning_rate": 1.9098333867260667e-05, "loss": 1.084, "step": 3445 }, { "epoch": 0.48658571025134145, "grad_norm": 3.697637291498036, "learning_rate": 1.9097701153591015e-05, "loss": 0.7982, "step": 3446 }, { "epoch": 0.48672691330132734, "grad_norm": 3.9138375830367225, "learning_rate": 1.9097068228494716e-05, "loss": 1.135, "step": 3447 }, { "epoch": 0.48686811635131316, "grad_norm": 4.349520294198776, "learning_rate": 1.9096435091986485e-05, "loss": 1.0692, "step": 3448 }, { "epoch": 0.48700931940129905, "grad_norm": 3.955283551908681, "learning_rate": 1.909580174408103e-05, "loss": 0.9699, "step": 3449 }, { "epoch": 0.48715052245128493, "grad_norm": 3.5521365260904094, "learning_rate": 1.9095168184793067e-05, "loss": 0.9232, "step": 3450 }, { "epoch": 0.4872917255012708, "grad_norm": 3.931929272189284, "learning_rate": 1.9094534414137323e-05, "loss": 0.964, "step": 3451 }, { "epoch": 0.4874329285512567, "grad_norm": 3.6327513392963153, "learning_rate": 1.9093900432128532e-05, "loss": 1.0156, "step": 3452 }, { "epoch": 0.4875741316012426, "grad_norm": 4.503291832442723, "learning_rate": 1.909326623878142e-05, "loss": 1.0626, "step": 3453 }, { "epoch": 0.48771533465122846, "grad_norm": 4.143719888525588, "learning_rate": 1.9092631834110723e-05, "loss": 0.9787, "step": 3454 }, { "epoch": 0.48785653770121434, "grad_norm": 3.9092818547648758, "learning_rate": 1.9091997218131195e-05, "loss": 0.9621, "step": 3455 }, { "epoch": 0.4879977407512002, "grad_norm": 3.530222528244613, "learning_rate": 1.9091362390857578e-05, "loss": 1.168, "step": 3456 }, { "epoch": 0.4881389438011861, "grad_norm": 3.675581947173463, "learning_rate": 1.9090727352304622e-05, "loss": 1.0877, "step": 3457 }, { "epoch": 0.488280146851172, "grad_norm": 4.970543667609957, "learning_rate": 1.909009210248709e-05, "loss": 1.437, "step": 3458 }, { "epoch": 0.4884213499011579, "grad_norm": 4.062471857437775, "learning_rate": 1.9089456641419745e-05, "loss": 1.0596, "step": 3459 }, { "epoch": 0.48856255295114376, "grad_norm": 3.9983954462824354, "learning_rate": 1.9088820969117348e-05, "loss": 1.0598, "step": 3460 }, { "epoch": 0.48870375600112964, "grad_norm": 3.1835296124825794, "learning_rate": 1.9088185085594682e-05, "loss": 0.9755, "step": 3461 }, { "epoch": 0.4888449590511155, "grad_norm": 3.4085720454931336, "learning_rate": 1.9087548990866515e-05, "loss": 1.0636, "step": 3462 }, { "epoch": 0.4889861621011014, "grad_norm": 4.062680833969174, "learning_rate": 1.9086912684947638e-05, "loss": 1.14, "step": 3463 }, { "epoch": 0.4891273651510873, "grad_norm": 3.9214652401637013, "learning_rate": 1.9086276167852834e-05, "loss": 0.9294, "step": 3464 }, { "epoch": 0.4892685682010731, "grad_norm": 3.5280094152266086, "learning_rate": 1.9085639439596895e-05, "loss": 0.914, "step": 3465 }, { "epoch": 0.489409771251059, "grad_norm": 3.1130402571183082, "learning_rate": 1.908500250019462e-05, "loss": 0.9503, "step": 3466 }, { "epoch": 0.4895509743010449, "grad_norm": 4.117890568018956, "learning_rate": 1.908436534966081e-05, "loss": 1.1695, "step": 3467 }, { "epoch": 0.48969217735103077, "grad_norm": 3.311755863819589, "learning_rate": 1.908372798801027e-05, "loss": 0.8892, "step": 3468 }, { "epoch": 0.48983338040101665, "grad_norm": 4.144551027499583, "learning_rate": 1.9083090415257817e-05, "loss": 1.1622, "step": 3469 }, { "epoch": 0.48997458345100253, "grad_norm": 3.7120132939994677, "learning_rate": 1.9082452631418265e-05, "loss": 0.9346, "step": 3470 }, { "epoch": 0.4901157865009884, "grad_norm": 3.186344405483788, "learning_rate": 1.9081814636506438e-05, "loss": 0.811, "step": 3471 }, { "epoch": 0.4902569895509743, "grad_norm": 3.5160990970341492, "learning_rate": 1.9081176430537158e-05, "loss": 0.9254, "step": 3472 }, { "epoch": 0.4903981926009602, "grad_norm": 3.9141559781379365, "learning_rate": 1.908053801352526e-05, "loss": 0.993, "step": 3473 }, { "epoch": 0.49053939565094606, "grad_norm": 4.008458564745851, "learning_rate": 1.9079899385485583e-05, "loss": 1.1116, "step": 3474 }, { "epoch": 0.49068059870093195, "grad_norm": 4.204329478909524, "learning_rate": 1.907926054643296e-05, "loss": 1.0946, "step": 3475 }, { "epoch": 0.49082180175091783, "grad_norm": 4.130585278010223, "learning_rate": 1.907862149638225e-05, "loss": 0.9859, "step": 3476 }, { "epoch": 0.4909630048009037, "grad_norm": 4.26339885166948, "learning_rate": 1.9077982235348294e-05, "loss": 1.0285, "step": 3477 }, { "epoch": 0.4911042078508896, "grad_norm": 3.3491681739287373, "learning_rate": 1.907734276334595e-05, "loss": 0.9969, "step": 3478 }, { "epoch": 0.4912454109008755, "grad_norm": 4.281517881135393, "learning_rate": 1.9076703080390082e-05, "loss": 1.2935, "step": 3479 }, { "epoch": 0.49138661395086136, "grad_norm": 3.8894579194064605, "learning_rate": 1.907606318649555e-05, "loss": 1.2278, "step": 3480 }, { "epoch": 0.49152781700084724, "grad_norm": 4.105243828152826, "learning_rate": 1.907542308167724e-05, "loss": 0.9711, "step": 3481 }, { "epoch": 0.49166902005083307, "grad_norm": 4.597564693004422, "learning_rate": 1.9074782765950007e-05, "loss": 1.207, "step": 3482 }, { "epoch": 0.49181022310081896, "grad_norm": 4.064432349557505, "learning_rate": 1.9074142239328745e-05, "loss": 1.1267, "step": 3483 }, { "epoch": 0.49195142615080484, "grad_norm": 4.389503086047374, "learning_rate": 1.9073501501828335e-05, "loss": 1.1076, "step": 3484 }, { "epoch": 0.4920926292007907, "grad_norm": 4.398777556596936, "learning_rate": 1.907286055346367e-05, "loss": 1.1999, "step": 3485 }, { "epoch": 0.4922338322507766, "grad_norm": 4.059877403803943, "learning_rate": 1.9072219394249644e-05, "loss": 1.2649, "step": 3486 }, { "epoch": 0.4923750353007625, "grad_norm": 3.7989982541735827, "learning_rate": 1.9071578024201156e-05, "loss": 1.0269, "step": 3487 }, { "epoch": 0.49251623835074837, "grad_norm": 3.980681190322074, "learning_rate": 1.9070936443333113e-05, "loss": 1.1437, "step": 3488 }, { "epoch": 0.49265744140073425, "grad_norm": 3.9641709581235163, "learning_rate": 1.9070294651660423e-05, "loss": 1.1662, "step": 3489 }, { "epoch": 0.49279864445072014, "grad_norm": 3.2399245304851214, "learning_rate": 1.9069652649198004e-05, "loss": 0.8165, "step": 3490 }, { "epoch": 0.492939847500706, "grad_norm": 4.246085967397897, "learning_rate": 1.9069010435960774e-05, "loss": 1.2267, "step": 3491 }, { "epoch": 0.4930810505506919, "grad_norm": 3.218082038625546, "learning_rate": 1.906836801196366e-05, "loss": 0.9061, "step": 3492 }, { "epoch": 0.4932222536006778, "grad_norm": 3.9609534062687937, "learning_rate": 1.9067725377221592e-05, "loss": 0.9325, "step": 3493 }, { "epoch": 0.49336345665066367, "grad_norm": 3.591459705043712, "learning_rate": 1.9067082531749496e-05, "loss": 1.0049, "step": 3494 }, { "epoch": 0.49350465970064955, "grad_norm": 3.867334613421313, "learning_rate": 1.9066439475562323e-05, "loss": 0.9205, "step": 3495 }, { "epoch": 0.49364586275063543, "grad_norm": 3.8024562222819047, "learning_rate": 1.9065796208675005e-05, "loss": 1.0396, "step": 3496 }, { "epoch": 0.4937870658006213, "grad_norm": 3.7371667694146304, "learning_rate": 1.9065152731102503e-05, "loss": 1.0713, "step": 3497 }, { "epoch": 0.4939282688506072, "grad_norm": 3.6762293501513152, "learning_rate": 1.9064509042859767e-05, "loss": 0.9022, "step": 3498 }, { "epoch": 0.494069471900593, "grad_norm": 3.6523766949980465, "learning_rate": 1.9063865143961753e-05, "loss": 1.0318, "step": 3499 }, { "epoch": 0.4942106749505789, "grad_norm": 4.507429588435184, "learning_rate": 1.906322103442343e-05, "loss": 1.0702, "step": 3500 }, { "epoch": 0.4943518780005648, "grad_norm": 3.9423261362209607, "learning_rate": 1.9062576714259764e-05, "loss": 1.0949, "step": 3501 }, { "epoch": 0.4944930810505507, "grad_norm": 3.535695174321498, "learning_rate": 1.9061932183485726e-05, "loss": 0.946, "step": 3502 }, { "epoch": 0.49463428410053656, "grad_norm": 3.6129699721214856, "learning_rate": 1.9061287442116302e-05, "loss": 1.1201, "step": 3503 }, { "epoch": 0.49477548715052244, "grad_norm": 3.631276742502082, "learning_rate": 1.906064249016647e-05, "loss": 0.9405, "step": 3504 }, { "epoch": 0.4949166902005083, "grad_norm": 3.3224384105335725, "learning_rate": 1.9059997327651218e-05, "loss": 0.9827, "step": 3505 }, { "epoch": 0.4950578932504942, "grad_norm": 3.634825223274179, "learning_rate": 1.905935195458554e-05, "loss": 0.9433, "step": 3506 }, { "epoch": 0.4951990963004801, "grad_norm": 4.80886697204205, "learning_rate": 1.9058706370984436e-05, "loss": 1.3736, "step": 3507 }, { "epoch": 0.495340299350466, "grad_norm": 3.6857950412910703, "learning_rate": 1.9058060576862912e-05, "loss": 1.0958, "step": 3508 }, { "epoch": 0.49548150240045186, "grad_norm": 4.333364207396368, "learning_rate": 1.9057414572235967e-05, "loss": 1.2163, "step": 3509 }, { "epoch": 0.49562270545043774, "grad_norm": 4.009949911406543, "learning_rate": 1.905676835711862e-05, "loss": 1.1628, "step": 3510 }, { "epoch": 0.4957639085004236, "grad_norm": 3.2443932876879185, "learning_rate": 1.905612193152589e-05, "loss": 0.8506, "step": 3511 }, { "epoch": 0.4959051115504095, "grad_norm": 3.5896949468709267, "learning_rate": 1.9055475295472792e-05, "loss": 0.9275, "step": 3512 }, { "epoch": 0.4960463146003954, "grad_norm": 3.951651347864501, "learning_rate": 1.9054828448974363e-05, "loss": 1.1606, "step": 3513 }, { "epoch": 0.49618751765038127, "grad_norm": 3.5598371236198965, "learning_rate": 1.905418139204563e-05, "loss": 1.067, "step": 3514 }, { "epoch": 0.49632872070036715, "grad_norm": 4.771926602626114, "learning_rate": 1.9053534124701633e-05, "loss": 1.2652, "step": 3515 }, { "epoch": 0.496469923750353, "grad_norm": 3.633456462245338, "learning_rate": 1.9052886646957413e-05, "loss": 1.0038, "step": 3516 }, { "epoch": 0.49661112680033886, "grad_norm": 4.6212961538377195, "learning_rate": 1.905223895882802e-05, "loss": 1.422, "step": 3517 }, { "epoch": 0.49675232985032475, "grad_norm": 4.364820781011969, "learning_rate": 1.9051591060328496e-05, "loss": 1.0444, "step": 3518 }, { "epoch": 0.49689353290031063, "grad_norm": 3.442508120621266, "learning_rate": 1.9050942951473908e-05, "loss": 0.9931, "step": 3519 }, { "epoch": 0.4970347359502965, "grad_norm": 3.91237014968559, "learning_rate": 1.9050294632279317e-05, "loss": 1.0998, "step": 3520 }, { "epoch": 0.4971759390002824, "grad_norm": 4.341196239450135, "learning_rate": 1.904964610275978e-05, "loss": 0.9505, "step": 3521 }, { "epoch": 0.4973171420502683, "grad_norm": 3.9505485808056036, "learning_rate": 1.9048997362930384e-05, "loss": 1.0686, "step": 3522 }, { "epoch": 0.49745834510025416, "grad_norm": 4.446619469893406, "learning_rate": 1.9048348412806192e-05, "loss": 1.2636, "step": 3523 }, { "epoch": 0.49759954815024005, "grad_norm": 4.172522867131995, "learning_rate": 1.9047699252402294e-05, "loss": 0.9826, "step": 3524 }, { "epoch": 0.49774075120022593, "grad_norm": 3.264221217507679, "learning_rate": 1.9047049881733773e-05, "loss": 0.7252, "step": 3525 }, { "epoch": 0.4978819542502118, "grad_norm": 4.214037969973725, "learning_rate": 1.9046400300815716e-05, "loss": 1.2387, "step": 3526 }, { "epoch": 0.4980231573001977, "grad_norm": 3.418599086811761, "learning_rate": 1.9045750509663224e-05, "loss": 1.0176, "step": 3527 }, { "epoch": 0.4981643603501836, "grad_norm": 3.4531695618825715, "learning_rate": 1.90451005082914e-05, "loss": 1.0781, "step": 3528 }, { "epoch": 0.49830556340016946, "grad_norm": 3.4471045042538404, "learning_rate": 1.9044450296715344e-05, "loss": 0.9268, "step": 3529 }, { "epoch": 0.49844676645015534, "grad_norm": 3.0908321685109867, "learning_rate": 1.904379987495017e-05, "loss": 0.8522, "step": 3530 }, { "epoch": 0.4985879695001412, "grad_norm": 3.8485659624660973, "learning_rate": 1.9043149243010993e-05, "loss": 1.1068, "step": 3531 }, { "epoch": 0.4987291725501271, "grad_norm": 3.4495626847320295, "learning_rate": 1.904249840091293e-05, "loss": 0.9449, "step": 3532 }, { "epoch": 0.49887037560011294, "grad_norm": 4.086130604075075, "learning_rate": 1.904184734867111e-05, "loss": 1.2545, "step": 3533 }, { "epoch": 0.4990115786500988, "grad_norm": 4.0830848190317255, "learning_rate": 1.9041196086300666e-05, "loss": 1.3101, "step": 3534 }, { "epoch": 0.4991527817000847, "grad_norm": 3.5406241563020293, "learning_rate": 1.9040544613816725e-05, "loss": 0.9975, "step": 3535 }, { "epoch": 0.4992939847500706, "grad_norm": 4.18772346173357, "learning_rate": 1.9039892931234434e-05, "loss": 1.1118, "step": 3536 }, { "epoch": 0.49943518780005647, "grad_norm": 3.131087775256141, "learning_rate": 1.9039241038568935e-05, "loss": 0.7823, "step": 3537 }, { "epoch": 0.49957639085004235, "grad_norm": 4.114747029399947, "learning_rate": 1.903858893583538e-05, "loss": 0.9766, "step": 3538 }, { "epoch": 0.49971759390002823, "grad_norm": 3.6890885027596374, "learning_rate": 1.903793662304892e-05, "loss": 0.9695, "step": 3539 }, { "epoch": 0.4998587969500141, "grad_norm": 3.7762448283380086, "learning_rate": 1.9037284100224714e-05, "loss": 1.0643, "step": 3540 }, { "epoch": 0.5, "grad_norm": 3.530554222585456, "learning_rate": 1.903663136737793e-05, "loss": 0.9581, "step": 3541 }, { "epoch": 0.5001412030499859, "grad_norm": 3.892862330399702, "learning_rate": 1.9035978424523737e-05, "loss": 1.0917, "step": 3542 }, { "epoch": 0.5002824060999718, "grad_norm": 3.686781474447699, "learning_rate": 1.9035325271677306e-05, "loss": 0.8535, "step": 3543 }, { "epoch": 0.5004236091499576, "grad_norm": 3.6155627638875756, "learning_rate": 1.903467190885382e-05, "loss": 1.0753, "step": 3544 }, { "epoch": 0.5005648121999435, "grad_norm": 3.692662502399999, "learning_rate": 1.9034018336068457e-05, "loss": 1.0927, "step": 3545 }, { "epoch": 0.5007060152499294, "grad_norm": 3.5902172480880545, "learning_rate": 1.903336455333641e-05, "loss": 1.0147, "step": 3546 }, { "epoch": 0.5008472182999153, "grad_norm": 3.9720285326043787, "learning_rate": 1.9032710560672875e-05, "loss": 1.0484, "step": 3547 }, { "epoch": 0.5009884213499012, "grad_norm": 4.4071176828064695, "learning_rate": 1.9032056358093048e-05, "loss": 1.0061, "step": 3548 }, { "epoch": 0.5011296243998871, "grad_norm": 3.6824348178668176, "learning_rate": 1.9031401945612127e-05, "loss": 1.0175, "step": 3549 }, { "epoch": 0.501270827449873, "grad_norm": 3.4601397019895455, "learning_rate": 1.903074732324533e-05, "loss": 1.0654, "step": 3550 }, { "epoch": 0.5014120304998588, "grad_norm": 3.5378765846449385, "learning_rate": 1.9030092491007863e-05, "loss": 0.9111, "step": 3551 }, { "epoch": 0.5015532335498447, "grad_norm": 4.108914465178884, "learning_rate": 1.9029437448914945e-05, "loss": 0.983, "step": 3552 }, { "epoch": 0.5016944365998306, "grad_norm": 3.9997638963203324, "learning_rate": 1.9028782196981802e-05, "loss": 0.9203, "step": 3553 }, { "epoch": 0.5018356396498165, "grad_norm": 3.876315596043194, "learning_rate": 1.902812673522366e-05, "loss": 1.0369, "step": 3554 }, { "epoch": 0.5019768426998024, "grad_norm": 3.984966909708275, "learning_rate": 1.902747106365575e-05, "loss": 0.9605, "step": 3555 }, { "epoch": 0.5021180457497882, "grad_norm": 3.9704851841359874, "learning_rate": 1.9026815182293315e-05, "loss": 0.9971, "step": 3556 }, { "epoch": 0.5022592487997741, "grad_norm": 3.569795539072415, "learning_rate": 1.902615909115159e-05, "loss": 0.925, "step": 3557 }, { "epoch": 0.50240045184976, "grad_norm": 4.129289270568844, "learning_rate": 1.9025502790245824e-05, "loss": 0.8938, "step": 3558 }, { "epoch": 0.5025416548997458, "grad_norm": 3.9926097051049236, "learning_rate": 1.9024846279591275e-05, "loss": 0.9191, "step": 3559 }, { "epoch": 0.5026828579497317, "grad_norm": 3.9061456542334803, "learning_rate": 1.9024189559203193e-05, "loss": 1.0419, "step": 3560 }, { "epoch": 0.5028240609997175, "grad_norm": 4.678394329802124, "learning_rate": 1.9023532629096844e-05, "loss": 1.1198, "step": 3561 }, { "epoch": 0.5029652640497034, "grad_norm": 2.9282890208204475, "learning_rate": 1.9022875489287496e-05, "loss": 0.8027, "step": 3562 }, { "epoch": 0.5031064670996893, "grad_norm": 3.9690187115056714, "learning_rate": 1.902221813979042e-05, "loss": 1.1128, "step": 3563 }, { "epoch": 0.5032476701496752, "grad_norm": 3.775291411198648, "learning_rate": 1.9021560580620883e-05, "loss": 1.0603, "step": 3564 }, { "epoch": 0.5033888731996611, "grad_norm": 4.489152972502885, "learning_rate": 1.902090281179418e-05, "loss": 0.9991, "step": 3565 }, { "epoch": 0.503530076249647, "grad_norm": 4.305584358448502, "learning_rate": 1.902024483332559e-05, "loss": 1.2794, "step": 3566 }, { "epoch": 0.5036712792996328, "grad_norm": 3.8206103977795234, "learning_rate": 1.901958664523041e-05, "loss": 1.1267, "step": 3567 }, { "epoch": 0.5038124823496187, "grad_norm": 4.1385194589939, "learning_rate": 1.9018928247523924e-05, "loss": 1.0367, "step": 3568 }, { "epoch": 0.5039536853996046, "grad_norm": 5.049527986890978, "learning_rate": 1.9018269640221443e-05, "loss": 1.4399, "step": 3569 }, { "epoch": 0.5040948884495905, "grad_norm": 3.7904470790721705, "learning_rate": 1.901761082333827e-05, "loss": 0.9269, "step": 3570 }, { "epoch": 0.5042360914995764, "grad_norm": 4.172804105610269, "learning_rate": 1.901695179688972e-05, "loss": 1.2578, "step": 3571 }, { "epoch": 0.5043772945495623, "grad_norm": 3.8644725119199137, "learning_rate": 1.90162925608911e-05, "loss": 1.2958, "step": 3572 }, { "epoch": 0.5045184975995481, "grad_norm": 3.3924696389548075, "learning_rate": 1.9015633115357737e-05, "loss": 0.917, "step": 3573 }, { "epoch": 0.504659700649534, "grad_norm": 4.024868096526031, "learning_rate": 1.901497346030495e-05, "loss": 0.939, "step": 3574 }, { "epoch": 0.5048009036995199, "grad_norm": 4.473623334099498, "learning_rate": 1.9014313595748078e-05, "loss": 1.173, "step": 3575 }, { "epoch": 0.5049421067495058, "grad_norm": 3.540645539908897, "learning_rate": 1.9013653521702448e-05, "loss": 1.0481, "step": 3576 }, { "epoch": 0.5050833097994917, "grad_norm": 4.159408889706492, "learning_rate": 1.9012993238183405e-05, "loss": 1.2286, "step": 3577 }, { "epoch": 0.5052245128494776, "grad_norm": 3.337832299402863, "learning_rate": 1.901233274520629e-05, "loss": 0.8567, "step": 3578 }, { "epoch": 0.5053657158994634, "grad_norm": 3.3248648775172303, "learning_rate": 1.9011672042786456e-05, "loss": 0.9377, "step": 3579 }, { "epoch": 0.5055069189494493, "grad_norm": 3.7209863149721945, "learning_rate": 1.9011011130939254e-05, "loss": 1.0953, "step": 3580 }, { "epoch": 0.5056481219994352, "grad_norm": 4.025402292113507, "learning_rate": 1.901035000968004e-05, "loss": 0.9544, "step": 3581 }, { "epoch": 0.5057893250494211, "grad_norm": 3.3383966473815394, "learning_rate": 1.900968867902419e-05, "loss": 0.9679, "step": 3582 }, { "epoch": 0.505930528099407, "grad_norm": 3.413119006583441, "learning_rate": 1.900902713898707e-05, "loss": 1.0655, "step": 3583 }, { "epoch": 0.5060717311493929, "grad_norm": 4.549256375682376, "learning_rate": 1.9008365389584042e-05, "loss": 1.2091, "step": 3584 }, { "epoch": 0.5062129341993787, "grad_norm": 4.1394813296709865, "learning_rate": 1.9007703430830494e-05, "loss": 1.2071, "step": 3585 }, { "epoch": 0.5063541372493646, "grad_norm": 4.18179382130419, "learning_rate": 1.900704126274181e-05, "loss": 1.122, "step": 3586 }, { "epoch": 0.5064953402993505, "grad_norm": 3.439329744744783, "learning_rate": 1.9006378885333376e-05, "loss": 1.0286, "step": 3587 }, { "epoch": 0.5066365433493364, "grad_norm": 3.61832909203157, "learning_rate": 1.9005716298620585e-05, "loss": 0.752, "step": 3588 }, { "epoch": 0.5067777463993223, "grad_norm": 4.1270002863447495, "learning_rate": 1.900505350261884e-05, "loss": 1.3398, "step": 3589 }, { "epoch": 0.5069189494493082, "grad_norm": 3.4495490856261917, "learning_rate": 1.9004390497343536e-05, "loss": 0.839, "step": 3590 }, { "epoch": 0.507060152499294, "grad_norm": 3.5863203874984544, "learning_rate": 1.900372728281009e-05, "loss": 0.9984, "step": 3591 }, { "epoch": 0.5072013555492799, "grad_norm": 3.8961479407224235, "learning_rate": 1.9003063859033906e-05, "loss": 1.0233, "step": 3592 }, { "epoch": 0.5073425585992657, "grad_norm": 3.779657379395044, "learning_rate": 1.900240022603041e-05, "loss": 0.9989, "step": 3593 }, { "epoch": 0.5074837616492516, "grad_norm": 3.7650598122332855, "learning_rate": 1.9001736383815023e-05, "loss": 1.0585, "step": 3594 }, { "epoch": 0.5076249646992375, "grad_norm": 3.7421243864633733, "learning_rate": 1.9001072332403162e-05, "loss": 1.0768, "step": 3595 }, { "epoch": 0.5077661677492233, "grad_norm": 3.0453181459687686, "learning_rate": 1.900040807181027e-05, "loss": 0.8341, "step": 3596 }, { "epoch": 0.5079073707992092, "grad_norm": 3.654008327793602, "learning_rate": 1.8999743602051786e-05, "loss": 1.0276, "step": 3597 }, { "epoch": 0.5080485738491951, "grad_norm": 3.096320218006398, "learning_rate": 1.8999078923143142e-05, "loss": 0.9073, "step": 3598 }, { "epoch": 0.508189776899181, "grad_norm": 3.7654146464714833, "learning_rate": 1.899841403509979e-05, "loss": 1.0303, "step": 3599 }, { "epoch": 0.5083309799491669, "grad_norm": 3.8831456959938784, "learning_rate": 1.8997748937937188e-05, "loss": 1.0492, "step": 3600 }, { "epoch": 0.5084721829991528, "grad_norm": 3.930889411527443, "learning_rate": 1.8997083631670783e-05, "loss": 1.0238, "step": 3601 }, { "epoch": 0.5086133860491386, "grad_norm": 5.272065722120395, "learning_rate": 1.899641811631604e-05, "loss": 1.2447, "step": 3602 }, { "epoch": 0.5087545890991245, "grad_norm": 3.4308054223790707, "learning_rate": 1.8995752391888423e-05, "loss": 0.978, "step": 3603 }, { "epoch": 0.5088957921491104, "grad_norm": 3.3000191890233888, "learning_rate": 1.8995086458403408e-05, "loss": 0.9274, "step": 3604 }, { "epoch": 0.5090369951990963, "grad_norm": 4.573607066623763, "learning_rate": 1.899442031587647e-05, "loss": 1.039, "step": 3605 }, { "epoch": 0.5091781982490822, "grad_norm": 4.157466779268365, "learning_rate": 1.8993753964323086e-05, "loss": 1.3027, "step": 3606 }, { "epoch": 0.509319401299068, "grad_norm": 4.258603319206109, "learning_rate": 1.8993087403758743e-05, "loss": 1.1855, "step": 3607 }, { "epoch": 0.5094606043490539, "grad_norm": 3.382662288388237, "learning_rate": 1.8992420634198934e-05, "loss": 0.9887, "step": 3608 }, { "epoch": 0.5096018073990398, "grad_norm": 3.340747209817989, "learning_rate": 1.899175365565915e-05, "loss": 0.9784, "step": 3609 }, { "epoch": 0.5097430104490257, "grad_norm": 3.666786189171731, "learning_rate": 1.8991086468154897e-05, "loss": 1.1034, "step": 3610 }, { "epoch": 0.5098842134990116, "grad_norm": 3.045839866758312, "learning_rate": 1.899041907170168e-05, "loss": 0.893, "step": 3611 }, { "epoch": 0.5100254165489975, "grad_norm": 3.8525195889236055, "learning_rate": 1.8989751466315004e-05, "loss": 1.0713, "step": 3612 }, { "epoch": 0.5101666195989834, "grad_norm": 3.854384120283142, "learning_rate": 1.8989083652010385e-05, "loss": 1.0453, "step": 3613 }, { "epoch": 0.5103078226489692, "grad_norm": 4.215942711172723, "learning_rate": 1.8988415628803345e-05, "loss": 1.1505, "step": 3614 }, { "epoch": 0.5104490256989551, "grad_norm": 3.838085119754798, "learning_rate": 1.8987747396709405e-05, "loss": 1.0882, "step": 3615 }, { "epoch": 0.510590228748941, "grad_norm": 3.7185975291506845, "learning_rate": 1.8987078955744103e-05, "loss": 1.1034, "step": 3616 }, { "epoch": 0.5107314317989269, "grad_norm": 4.298931135377946, "learning_rate": 1.8986410305922963e-05, "loss": 1.2667, "step": 3617 }, { "epoch": 0.5108726348489128, "grad_norm": 3.2628869877192557, "learning_rate": 1.898574144726153e-05, "loss": 0.8813, "step": 3618 }, { "epoch": 0.5110138378988986, "grad_norm": 4.606306841342923, "learning_rate": 1.8985072379775346e-05, "loss": 0.8743, "step": 3619 }, { "epoch": 0.5111550409488845, "grad_norm": 4.083191276203403, "learning_rate": 1.8984403103479957e-05, "loss": 0.8632, "step": 3620 }, { "epoch": 0.5112962439988704, "grad_norm": 3.7484423362005144, "learning_rate": 1.8983733618390924e-05, "loss": 1.0854, "step": 3621 }, { "epoch": 0.5114374470488563, "grad_norm": 3.8231476452207036, "learning_rate": 1.89830639245238e-05, "loss": 1.077, "step": 3622 }, { "epoch": 0.5115786500988422, "grad_norm": 3.6690232008708965, "learning_rate": 1.898239402189415e-05, "loss": 1.1201, "step": 3623 }, { "epoch": 0.5117198531488281, "grad_norm": 3.8897334918718873, "learning_rate": 1.8981723910517546e-05, "loss": 0.9572, "step": 3624 }, { "epoch": 0.511861056198814, "grad_norm": 4.231071624321432, "learning_rate": 1.8981053590409552e-05, "loss": 1.226, "step": 3625 }, { "epoch": 0.5120022592487998, "grad_norm": 4.129271785268568, "learning_rate": 1.8980383061585752e-05, "loss": 0.9597, "step": 3626 }, { "epoch": 0.5121434622987856, "grad_norm": 3.4234180987772356, "learning_rate": 1.8979712324061727e-05, "loss": 0.9068, "step": 3627 }, { "epoch": 0.5122846653487715, "grad_norm": 4.00505946940984, "learning_rate": 1.8979041377853068e-05, "loss": 1.074, "step": 3628 }, { "epoch": 0.5124258683987574, "grad_norm": 3.374078592567029, "learning_rate": 1.8978370222975364e-05, "loss": 0.9391, "step": 3629 }, { "epoch": 0.5125670714487432, "grad_norm": 3.263779818995175, "learning_rate": 1.8977698859444217e-05, "loss": 0.991, "step": 3630 }, { "epoch": 0.5127082744987291, "grad_norm": 3.8164569059674727, "learning_rate": 1.8977027287275224e-05, "loss": 1.0413, "step": 3631 }, { "epoch": 0.512849477548715, "grad_norm": 3.470165928657235, "learning_rate": 1.8976355506483988e-05, "loss": 0.9671, "step": 3632 }, { "epoch": 0.5129906805987009, "grad_norm": 4.146004560834866, "learning_rate": 1.8975683517086132e-05, "loss": 1.0802, "step": 3633 }, { "epoch": 0.5131318836486868, "grad_norm": 4.284048360764937, "learning_rate": 1.8975011319097264e-05, "loss": 0.9566, "step": 3634 }, { "epoch": 0.5132730866986727, "grad_norm": 4.077226256946177, "learning_rate": 1.8974338912533015e-05, "loss": 1.2519, "step": 3635 }, { "epoch": 0.5134142897486585, "grad_norm": 4.044465943899905, "learning_rate": 1.8973666297409e-05, "loss": 0.989, "step": 3636 }, { "epoch": 0.5135554927986444, "grad_norm": 3.6787283735921896, "learning_rate": 1.897299347374086e-05, "loss": 1.1405, "step": 3637 }, { "epoch": 0.5136966958486303, "grad_norm": 4.450199282463527, "learning_rate": 1.8972320441544224e-05, "loss": 1.3281, "step": 3638 }, { "epoch": 0.5138378988986162, "grad_norm": 4.275791173659126, "learning_rate": 1.8971647200834736e-05, "loss": 1.2003, "step": 3639 }, { "epoch": 0.5139791019486021, "grad_norm": 4.097974109983107, "learning_rate": 1.897097375162804e-05, "loss": 1.2619, "step": 3640 }, { "epoch": 0.514120304998588, "grad_norm": 3.527892740027429, "learning_rate": 1.897030009393979e-05, "loss": 0.8972, "step": 3641 }, { "epoch": 0.5142615080485738, "grad_norm": 3.633855302089347, "learning_rate": 1.896962622778564e-05, "loss": 1.0984, "step": 3642 }, { "epoch": 0.5144027110985597, "grad_norm": 3.824745569351625, "learning_rate": 1.896895215318125e-05, "loss": 1.0004, "step": 3643 }, { "epoch": 0.5145439141485456, "grad_norm": 4.445059663507518, "learning_rate": 1.8968277870142283e-05, "loss": 1.3118, "step": 3644 }, { "epoch": 0.5146851171985315, "grad_norm": 3.411235476935329, "learning_rate": 1.8967603378684415e-05, "loss": 0.9442, "step": 3645 }, { "epoch": 0.5148263202485174, "grad_norm": 3.9768302899986874, "learning_rate": 1.8966928678823317e-05, "loss": 1.035, "step": 3646 }, { "epoch": 0.5149675232985033, "grad_norm": 3.7281461238433495, "learning_rate": 1.8966253770574668e-05, "loss": 1.1572, "step": 3647 }, { "epoch": 0.5151087263484891, "grad_norm": 3.6161528196762234, "learning_rate": 1.8965578653954152e-05, "loss": 0.9144, "step": 3648 }, { "epoch": 0.515249929398475, "grad_norm": 3.071135831040302, "learning_rate": 1.8964903328977463e-05, "loss": 0.6992, "step": 3649 }, { "epoch": 0.5153911324484609, "grad_norm": 4.124959480505632, "learning_rate": 1.896422779566029e-05, "loss": 1.0834, "step": 3650 }, { "epoch": 0.5155323354984468, "grad_norm": 4.08747495440362, "learning_rate": 1.8963552054018335e-05, "loss": 1.1953, "step": 3651 }, { "epoch": 0.5156735385484327, "grad_norm": 3.5177213996057417, "learning_rate": 1.8962876104067303e-05, "loss": 1.0634, "step": 3652 }, { "epoch": 0.5158147415984186, "grad_norm": 3.840847992051843, "learning_rate": 1.8962199945822898e-05, "loss": 1.01, "step": 3653 }, { "epoch": 0.5159559446484044, "grad_norm": 4.726897451854293, "learning_rate": 1.8961523579300836e-05, "loss": 1.1062, "step": 3654 }, { "epoch": 0.5160971476983903, "grad_norm": 5.377347012260199, "learning_rate": 1.896084700451684e-05, "loss": 1.3303, "step": 3655 }, { "epoch": 0.5162383507483762, "grad_norm": 3.8526661156892885, "learning_rate": 1.896017022148663e-05, "loss": 1.1655, "step": 3656 }, { "epoch": 0.5163795537983621, "grad_norm": 4.000957704211775, "learning_rate": 1.895949323022593e-05, "loss": 0.9976, "step": 3657 }, { "epoch": 0.516520756848348, "grad_norm": 3.945864479731758, "learning_rate": 1.895881603075048e-05, "loss": 1.0242, "step": 3658 }, { "epoch": 0.5166619598983339, "grad_norm": 4.451788436215483, "learning_rate": 1.895813862307601e-05, "loss": 1.2664, "step": 3659 }, { "epoch": 0.5168031629483197, "grad_norm": 3.6535719166643714, "learning_rate": 1.8957461007218272e-05, "loss": 1.1365, "step": 3660 }, { "epoch": 0.5169443659983055, "grad_norm": 4.302465856894494, "learning_rate": 1.8956783183193007e-05, "loss": 1.0524, "step": 3661 }, { "epoch": 0.5170855690482914, "grad_norm": 3.907881880938853, "learning_rate": 1.8956105151015966e-05, "loss": 1.1253, "step": 3662 }, { "epoch": 0.5172267720982773, "grad_norm": 3.8202265077123196, "learning_rate": 1.8955426910702912e-05, "loss": 1.0908, "step": 3663 }, { "epoch": 0.5173679751482632, "grad_norm": 4.753180861108541, "learning_rate": 1.8954748462269604e-05, "loss": 1.1679, "step": 3664 }, { "epoch": 0.517509178198249, "grad_norm": 4.748993879229028, "learning_rate": 1.895406980573181e-05, "loss": 1.2624, "step": 3665 }, { "epoch": 0.5176503812482349, "grad_norm": 3.2670633510673674, "learning_rate": 1.89533909411053e-05, "loss": 1.2222, "step": 3666 }, { "epoch": 0.5177915842982208, "grad_norm": 3.7355853854399856, "learning_rate": 1.895271186840585e-05, "loss": 1.0391, "step": 3667 }, { "epoch": 0.5179327873482067, "grad_norm": 3.827090780939897, "learning_rate": 1.895203258764924e-05, "loss": 0.9972, "step": 3668 }, { "epoch": 0.5180739903981926, "grad_norm": 4.02470609056982, "learning_rate": 1.8951353098851267e-05, "loss": 1.0301, "step": 3669 }, { "epoch": 0.5182151934481785, "grad_norm": 4.841136148060212, "learning_rate": 1.8950673402027706e-05, "loss": 1.204, "step": 3670 }, { "epoch": 0.5183563964981643, "grad_norm": 3.6355305985753796, "learning_rate": 1.8949993497194365e-05, "loss": 0.8103, "step": 3671 }, { "epoch": 0.5184975995481502, "grad_norm": 3.4794458251083507, "learning_rate": 1.8949313384367038e-05, "loss": 1.0257, "step": 3672 }, { "epoch": 0.5186388025981361, "grad_norm": 4.0146358586845015, "learning_rate": 1.8948633063561536e-05, "loss": 1.1554, "step": 3673 }, { "epoch": 0.518780005648122, "grad_norm": 3.474779021786041, "learning_rate": 1.8947952534793663e-05, "loss": 0.8154, "step": 3674 }, { "epoch": 0.5189212086981079, "grad_norm": 4.125988408948386, "learning_rate": 1.894727179807924e-05, "loss": 1.1373, "step": 3675 }, { "epoch": 0.5190624117480938, "grad_norm": 3.534989023382573, "learning_rate": 1.894659085343408e-05, "loss": 0.9619, "step": 3676 }, { "epoch": 0.5192036147980796, "grad_norm": 4.243365209012949, "learning_rate": 1.8945909700874013e-05, "loss": 1.0293, "step": 3677 }, { "epoch": 0.5193448178480655, "grad_norm": 3.9015538129315464, "learning_rate": 1.894522834041487e-05, "loss": 1.2448, "step": 3678 }, { "epoch": 0.5194860208980514, "grad_norm": 4.077228732168723, "learning_rate": 1.8944546772072487e-05, "loss": 0.9919, "step": 3679 }, { "epoch": 0.5196272239480373, "grad_norm": 3.925950921261376, "learning_rate": 1.8943864995862692e-05, "loss": 1.1329, "step": 3680 }, { "epoch": 0.5197684269980232, "grad_norm": 4.148283827747259, "learning_rate": 1.8943183011801346e-05, "loss": 1.1611, "step": 3681 }, { "epoch": 0.519909630048009, "grad_norm": 3.540684038895232, "learning_rate": 1.8942500819904285e-05, "loss": 0.919, "step": 3682 }, { "epoch": 0.5200508330979949, "grad_norm": 6.981249547133298, "learning_rate": 1.8941818420187364e-05, "loss": 1.0309, "step": 3683 }, { "epoch": 0.5201920361479808, "grad_norm": 3.2245458914995524, "learning_rate": 1.8941135812666445e-05, "loss": 0.8724, "step": 3684 }, { "epoch": 0.5203332391979667, "grad_norm": 3.745572757756558, "learning_rate": 1.8940452997357394e-05, "loss": 0.9531, "step": 3685 }, { "epoch": 0.5204744422479526, "grad_norm": 3.777472974555665, "learning_rate": 1.8939769974276076e-05, "loss": 0.8905, "step": 3686 }, { "epoch": 0.5206156452979385, "grad_norm": 4.108352997250015, "learning_rate": 1.8939086743438363e-05, "loss": 1.1512, "step": 3687 }, { "epoch": 0.5207568483479244, "grad_norm": 3.9989724187218565, "learning_rate": 1.8938403304860137e-05, "loss": 1.0386, "step": 3688 }, { "epoch": 0.5208980513979102, "grad_norm": 3.5035108379191104, "learning_rate": 1.8937719658557276e-05, "loss": 1.1209, "step": 3689 }, { "epoch": 0.5210392544478961, "grad_norm": 3.29570684624625, "learning_rate": 1.893703580454567e-05, "loss": 0.841, "step": 3690 }, { "epoch": 0.521180457497882, "grad_norm": 2.9166760533203875, "learning_rate": 1.8936351742841213e-05, "loss": 0.8326, "step": 3691 }, { "epoch": 0.5213216605478679, "grad_norm": 3.593051297734018, "learning_rate": 1.89356674734598e-05, "loss": 1.0042, "step": 3692 }, { "epoch": 0.5214628635978538, "grad_norm": 5.735259005849613, "learning_rate": 1.8934982996417336e-05, "loss": 1.1838, "step": 3693 }, { "epoch": 0.5216040666478396, "grad_norm": 3.0227126054085107, "learning_rate": 1.8934298311729728e-05, "loss": 0.8901, "step": 3694 }, { "epoch": 0.5217452696978254, "grad_norm": 3.543966301628358, "learning_rate": 1.893361341941288e-05, "loss": 0.9893, "step": 3695 }, { "epoch": 0.5218864727478113, "grad_norm": 3.271505482749413, "learning_rate": 1.8932928319482714e-05, "loss": 0.7027, "step": 3696 }, { "epoch": 0.5220276757977972, "grad_norm": 4.9295632675804075, "learning_rate": 1.8932243011955154e-05, "loss": 1.1269, "step": 3697 }, { "epoch": 0.5221688788477831, "grad_norm": 4.914879796632518, "learning_rate": 1.8931557496846124e-05, "loss": 1.2371, "step": 3698 }, { "epoch": 0.522310081897769, "grad_norm": 3.8073822826407855, "learning_rate": 1.8930871774171555e-05, "loss": 0.9767, "step": 3699 }, { "epoch": 0.5224512849477548, "grad_norm": 3.4674787161621303, "learning_rate": 1.8930185843947382e-05, "loss": 0.9508, "step": 3700 }, { "epoch": 0.5225924879977407, "grad_norm": 3.5433596983422033, "learning_rate": 1.892949970618955e-05, "loss": 0.9219, "step": 3701 }, { "epoch": 0.5227336910477266, "grad_norm": 3.534829723048114, "learning_rate": 1.8928813360914e-05, "loss": 0.9747, "step": 3702 }, { "epoch": 0.5228748940977125, "grad_norm": 4.220976375918456, "learning_rate": 1.892812680813668e-05, "loss": 1.0705, "step": 3703 }, { "epoch": 0.5230160971476984, "grad_norm": 3.218765265829235, "learning_rate": 1.892744004787355e-05, "loss": 0.8724, "step": 3704 }, { "epoch": 0.5231573001976842, "grad_norm": 3.9482994667748335, "learning_rate": 1.892675308014057e-05, "loss": 1.0468, "step": 3705 }, { "epoch": 0.5232985032476701, "grad_norm": 4.398453109970903, "learning_rate": 1.8926065904953703e-05, "loss": 1.1235, "step": 3706 }, { "epoch": 0.523439706297656, "grad_norm": 3.7671666690079078, "learning_rate": 1.8925378522328918e-05, "loss": 0.9358, "step": 3707 }, { "epoch": 0.5235809093476419, "grad_norm": 3.692662359522412, "learning_rate": 1.8924690932282193e-05, "loss": 0.9935, "step": 3708 }, { "epoch": 0.5237221123976278, "grad_norm": 4.149845068508517, "learning_rate": 1.8924003134829504e-05, "loss": 1.1628, "step": 3709 }, { "epoch": 0.5238633154476137, "grad_norm": 3.477017695137049, "learning_rate": 1.8923315129986838e-05, "loss": 1.0132, "step": 3710 }, { "epoch": 0.5240045184975995, "grad_norm": 3.8890350611954827, "learning_rate": 1.8922626917770178e-05, "loss": 1.1258, "step": 3711 }, { "epoch": 0.5241457215475854, "grad_norm": 3.405094231212233, "learning_rate": 1.8921938498195523e-05, "loss": 1.0686, "step": 3712 }, { "epoch": 0.5242869245975713, "grad_norm": 3.883819331479461, "learning_rate": 1.8921249871278874e-05, "loss": 1.1591, "step": 3713 }, { "epoch": 0.5244281276475572, "grad_norm": 3.334312005637295, "learning_rate": 1.892056103703623e-05, "loss": 0.9147, "step": 3714 }, { "epoch": 0.5245693306975431, "grad_norm": 4.277839352820532, "learning_rate": 1.8919871995483595e-05, "loss": 1.0862, "step": 3715 }, { "epoch": 0.524710533747529, "grad_norm": 3.7651505289899068, "learning_rate": 1.891918274663699e-05, "loss": 1.0548, "step": 3716 }, { "epoch": 0.5248517367975148, "grad_norm": 4.06183280578874, "learning_rate": 1.8918493290512432e-05, "loss": 1.2302, "step": 3717 }, { "epoch": 0.5249929398475007, "grad_norm": 4.7193096967857775, "learning_rate": 1.891780362712594e-05, "loss": 1.3575, "step": 3718 }, { "epoch": 0.5251341428974866, "grad_norm": 4.056500474461471, "learning_rate": 1.8917113756493542e-05, "loss": 1.2785, "step": 3719 }, { "epoch": 0.5252753459474725, "grad_norm": 3.6656774541120503, "learning_rate": 1.891642367863127e-05, "loss": 1.0496, "step": 3720 }, { "epoch": 0.5254165489974584, "grad_norm": 3.427047669738966, "learning_rate": 1.8915733393555166e-05, "loss": 1.0009, "step": 3721 }, { "epoch": 0.5255577520474443, "grad_norm": 3.2383427100618047, "learning_rate": 1.891504290128127e-05, "loss": 0.9051, "step": 3722 }, { "epoch": 0.5256989550974301, "grad_norm": 2.821517886805279, "learning_rate": 1.8914352201825622e-05, "loss": 0.7275, "step": 3723 }, { "epoch": 0.525840158147416, "grad_norm": 3.9920522754610603, "learning_rate": 1.891366129520428e-05, "loss": 1.1282, "step": 3724 }, { "epoch": 0.5259813611974019, "grad_norm": 3.8416653054336467, "learning_rate": 1.89129701814333e-05, "loss": 1.3407, "step": 3725 }, { "epoch": 0.5261225642473878, "grad_norm": 3.5217875546557735, "learning_rate": 1.8912278860528742e-05, "loss": 1.048, "step": 3726 }, { "epoch": 0.5262637672973737, "grad_norm": 3.6915822280847763, "learning_rate": 1.8911587332506674e-05, "loss": 1.0335, "step": 3727 }, { "epoch": 0.5264049703473596, "grad_norm": 4.565950824037857, "learning_rate": 1.891089559738316e-05, "loss": 1.0954, "step": 3728 }, { "epoch": 0.5265461733973453, "grad_norm": 5.255165768442272, "learning_rate": 1.8910203655174285e-05, "loss": 1.3427, "step": 3729 }, { "epoch": 0.5266873764473312, "grad_norm": 3.5944402451511572, "learning_rate": 1.8909511505896122e-05, "loss": 0.9757, "step": 3730 }, { "epoch": 0.5268285794973171, "grad_norm": 3.621901786560854, "learning_rate": 1.8908819149564764e-05, "loss": 1.0628, "step": 3731 }, { "epoch": 0.526969782547303, "grad_norm": 3.2584361620380067, "learning_rate": 1.890812658619629e-05, "loss": 0.8937, "step": 3732 }, { "epoch": 0.5271109855972889, "grad_norm": 3.387890035674744, "learning_rate": 1.8907433815806805e-05, "loss": 0.9446, "step": 3733 }, { "epoch": 0.5272521886472747, "grad_norm": 3.6879236404237243, "learning_rate": 1.8906740838412404e-05, "loss": 0.8987, "step": 3734 }, { "epoch": 0.5273933916972606, "grad_norm": 4.474668386061455, "learning_rate": 1.8906047654029196e-05, "loss": 1.0467, "step": 3735 }, { "epoch": 0.5275345947472465, "grad_norm": 3.5608070420740296, "learning_rate": 1.890535426267328e-05, "loss": 1.1148, "step": 3736 }, { "epoch": 0.5276757977972324, "grad_norm": 3.929317790760646, "learning_rate": 1.8904660664360784e-05, "loss": 1.1848, "step": 3737 }, { "epoch": 0.5278170008472183, "grad_norm": 4.145990524836006, "learning_rate": 1.8903966859107816e-05, "loss": 0.9434, "step": 3738 }, { "epoch": 0.5279582038972042, "grad_norm": 3.3652632248775367, "learning_rate": 1.8903272846930503e-05, "loss": 1.0405, "step": 3739 }, { "epoch": 0.52809940694719, "grad_norm": 3.700148921884483, "learning_rate": 1.8902578627844975e-05, "loss": 1.057, "step": 3740 }, { "epoch": 0.5282406099971759, "grad_norm": 3.325584290683457, "learning_rate": 1.8901884201867364e-05, "loss": 0.924, "step": 3741 }, { "epoch": 0.5283818130471618, "grad_norm": 3.4258733127405856, "learning_rate": 1.890118956901381e-05, "loss": 0.9585, "step": 3742 }, { "epoch": 0.5285230160971477, "grad_norm": 3.5223727081235516, "learning_rate": 1.8900494729300453e-05, "loss": 1.0187, "step": 3743 }, { "epoch": 0.5286642191471336, "grad_norm": 4.102496728105654, "learning_rate": 1.8899799682743442e-05, "loss": 1.234, "step": 3744 }, { "epoch": 0.5288054221971195, "grad_norm": 3.501410272194689, "learning_rate": 1.8899104429358932e-05, "loss": 1.091, "step": 3745 }, { "epoch": 0.5289466252471053, "grad_norm": 3.9053115568756556, "learning_rate": 1.8898408969163078e-05, "loss": 0.9744, "step": 3746 }, { "epoch": 0.5290878282970912, "grad_norm": 3.5696788932511008, "learning_rate": 1.889771330217204e-05, "loss": 1.0454, "step": 3747 }, { "epoch": 0.5292290313470771, "grad_norm": 3.5202707605735375, "learning_rate": 1.889701742840199e-05, "loss": 1.185, "step": 3748 }, { "epoch": 0.529370234397063, "grad_norm": 4.311411512106789, "learning_rate": 1.8896321347869094e-05, "loss": 1.2281, "step": 3749 }, { "epoch": 0.5295114374470489, "grad_norm": 3.7448473654752594, "learning_rate": 1.8895625060589538e-05, "loss": 1.0566, "step": 3750 }, { "epoch": 0.5296526404970348, "grad_norm": 3.9009726567209104, "learning_rate": 1.8894928566579492e-05, "loss": 1.0038, "step": 3751 }, { "epoch": 0.5297938435470206, "grad_norm": 4.612467865358213, "learning_rate": 1.8894231865855152e-05, "loss": 1.3822, "step": 3752 }, { "epoch": 0.5299350465970065, "grad_norm": 3.9766762670366074, "learning_rate": 1.88935349584327e-05, "loss": 1.2327, "step": 3753 }, { "epoch": 0.5300762496469924, "grad_norm": 4.33365599534029, "learning_rate": 1.8892837844328338e-05, "loss": 1.1905, "step": 3754 }, { "epoch": 0.5302174526969783, "grad_norm": 4.475437724934253, "learning_rate": 1.8892140523558266e-05, "loss": 1.1966, "step": 3755 }, { "epoch": 0.5303586557469642, "grad_norm": 3.6951482705848915, "learning_rate": 1.8891442996138686e-05, "loss": 0.9878, "step": 3756 }, { "epoch": 0.53049985879695, "grad_norm": 5.005988774696322, "learning_rate": 1.8890745262085812e-05, "loss": 1.1872, "step": 3757 }, { "epoch": 0.5306410618469359, "grad_norm": 3.533546684299308, "learning_rate": 1.8890047321415856e-05, "loss": 0.8394, "step": 3758 }, { "epoch": 0.5307822648969218, "grad_norm": 3.0198817360690224, "learning_rate": 1.8889349174145044e-05, "loss": 0.7916, "step": 3759 }, { "epoch": 0.5309234679469077, "grad_norm": 4.366564004778834, "learning_rate": 1.8888650820289594e-05, "loss": 1.2799, "step": 3760 }, { "epoch": 0.5310646709968936, "grad_norm": 4.459936006808013, "learning_rate": 1.8887952259865735e-05, "loss": 1.1025, "step": 3761 }, { "epoch": 0.5312058740468795, "grad_norm": 3.9761443966282473, "learning_rate": 1.8887253492889708e-05, "loss": 1.0603, "step": 3762 }, { "epoch": 0.5313470770968652, "grad_norm": 3.8201912012924124, "learning_rate": 1.8886554519377744e-05, "loss": 1.0783, "step": 3763 }, { "epoch": 0.5314882801468511, "grad_norm": 3.990610110117607, "learning_rate": 1.8885855339346097e-05, "loss": 1.3363, "step": 3764 }, { "epoch": 0.531629483196837, "grad_norm": 3.7372920390115505, "learning_rate": 1.8885155952811e-05, "loss": 0.9556, "step": 3765 }, { "epoch": 0.5317706862468229, "grad_norm": 5.665162005196365, "learning_rate": 1.8884456359788725e-05, "loss": 0.8942, "step": 3766 }, { "epoch": 0.5319118892968088, "grad_norm": 3.572809254306325, "learning_rate": 1.8883756560295517e-05, "loss": 0.9111, "step": 3767 }, { "epoch": 0.5320530923467947, "grad_norm": 3.6545047638360697, "learning_rate": 1.8883056554347643e-05, "loss": 1.0559, "step": 3768 }, { "epoch": 0.5321942953967805, "grad_norm": 4.081169121234149, "learning_rate": 1.8882356341961374e-05, "loss": 1.102, "step": 3769 }, { "epoch": 0.5323354984467664, "grad_norm": 3.543406279372913, "learning_rate": 1.8881655923152975e-05, "loss": 0.906, "step": 3770 }, { "epoch": 0.5324767014967523, "grad_norm": 3.9266551932895135, "learning_rate": 1.888095529793873e-05, "loss": 0.9235, "step": 3771 }, { "epoch": 0.5326179045467382, "grad_norm": 3.265047355417172, "learning_rate": 1.888025446633492e-05, "loss": 0.8209, "step": 3772 }, { "epoch": 0.5327591075967241, "grad_norm": 3.963005410213608, "learning_rate": 1.8879553428357832e-05, "loss": 1.1074, "step": 3773 }, { "epoch": 0.53290031064671, "grad_norm": 3.4930562886484897, "learning_rate": 1.8878852184023754e-05, "loss": 0.9966, "step": 3774 }, { "epoch": 0.5330415136966958, "grad_norm": 3.6004359110176174, "learning_rate": 1.8878150733348988e-05, "loss": 1.0806, "step": 3775 }, { "epoch": 0.5331827167466817, "grad_norm": 3.3870535403913506, "learning_rate": 1.8877449076349833e-05, "loss": 0.9993, "step": 3776 }, { "epoch": 0.5333239197966676, "grad_norm": 4.6838007969239905, "learning_rate": 1.8876747213042593e-05, "loss": 1.3216, "step": 3777 }, { "epoch": 0.5334651228466535, "grad_norm": 3.514406427630022, "learning_rate": 1.8876045143443583e-05, "loss": 0.9919, "step": 3778 }, { "epoch": 0.5336063258966394, "grad_norm": 3.735516494113757, "learning_rate": 1.887534286756912e-05, "loss": 0.9415, "step": 3779 }, { "epoch": 0.5337475289466252, "grad_norm": 3.544848413726788, "learning_rate": 1.8874640385435515e-05, "loss": 1.0803, "step": 3780 }, { "epoch": 0.5338887319966111, "grad_norm": 4.4592700704814785, "learning_rate": 1.8873937697059106e-05, "loss": 1.1805, "step": 3781 }, { "epoch": 0.534029935046597, "grad_norm": 4.485667861004511, "learning_rate": 1.8873234802456216e-05, "loss": 1.1603, "step": 3782 }, { "epoch": 0.5341711380965829, "grad_norm": 3.140248543462782, "learning_rate": 1.887253170164318e-05, "loss": 0.8463, "step": 3783 }, { "epoch": 0.5343123411465688, "grad_norm": 3.4827840208249814, "learning_rate": 1.887182839463634e-05, "loss": 0.9549, "step": 3784 }, { "epoch": 0.5344535441965547, "grad_norm": 3.810842862002668, "learning_rate": 1.887112488145204e-05, "loss": 0.9456, "step": 3785 }, { "epoch": 0.5345947472465405, "grad_norm": 3.185679454074993, "learning_rate": 1.8870421162106628e-05, "loss": 0.7847, "step": 3786 }, { "epoch": 0.5347359502965264, "grad_norm": 3.685949497467339, "learning_rate": 1.886971723661646e-05, "loss": 1.0316, "step": 3787 }, { "epoch": 0.5348771533465123, "grad_norm": 4.831736445542316, "learning_rate": 1.8869013104997896e-05, "loss": 1.4054, "step": 3788 }, { "epoch": 0.5350183563964982, "grad_norm": 4.617643201035467, "learning_rate": 1.8868308767267294e-05, "loss": 1.2344, "step": 3789 }, { "epoch": 0.5351595594464841, "grad_norm": 3.55219316513033, "learning_rate": 1.8867604223441027e-05, "loss": 1.0954, "step": 3790 }, { "epoch": 0.53530076249647, "grad_norm": 3.4017549632117157, "learning_rate": 1.8866899473535464e-05, "loss": 0.8748, "step": 3791 }, { "epoch": 0.5354419655464558, "grad_norm": 3.6911470744908303, "learning_rate": 1.8866194517566993e-05, "loss": 0.9162, "step": 3792 }, { "epoch": 0.5355831685964417, "grad_norm": 4.577046268320204, "learning_rate": 1.8865489355551987e-05, "loss": 1.1754, "step": 3793 }, { "epoch": 0.5357243716464276, "grad_norm": 4.098129047252893, "learning_rate": 1.886478398750684e-05, "loss": 1.3211, "step": 3794 }, { "epoch": 0.5358655746964135, "grad_norm": 3.8419376047470495, "learning_rate": 1.8864078413447936e-05, "loss": 1.1913, "step": 3795 }, { "epoch": 0.5360067777463994, "grad_norm": 4.16435459236593, "learning_rate": 1.886337263339168e-05, "loss": 1.1394, "step": 3796 }, { "epoch": 0.5361479807963851, "grad_norm": 3.474837498994005, "learning_rate": 1.8862666647354476e-05, "loss": 0.9907, "step": 3797 }, { "epoch": 0.536289183846371, "grad_norm": 3.827259108160972, "learning_rate": 1.8861960455352723e-05, "loss": 1.112, "step": 3798 }, { "epoch": 0.5364303868963569, "grad_norm": 3.802314808632533, "learning_rate": 1.8861254057402836e-05, "loss": 1.1161, "step": 3799 }, { "epoch": 0.5365715899463428, "grad_norm": 4.0700284502483495, "learning_rate": 1.8860547453521232e-05, "loss": 1.1049, "step": 3800 }, { "epoch": 0.5367127929963287, "grad_norm": 4.19886670423054, "learning_rate": 1.8859840643724333e-05, "loss": 0.9153, "step": 3801 }, { "epoch": 0.5368539960463146, "grad_norm": 3.8601149461189697, "learning_rate": 1.8859133628028564e-05, "loss": 0.8878, "step": 3802 }, { "epoch": 0.5369951990963004, "grad_norm": 3.6688535772665247, "learning_rate": 1.8858426406450352e-05, "loss": 0.9219, "step": 3803 }, { "epoch": 0.5371364021462863, "grad_norm": 4.157277899248749, "learning_rate": 1.8857718979006135e-05, "loss": 1.0764, "step": 3804 }, { "epoch": 0.5372776051962722, "grad_norm": 3.7108899977284655, "learning_rate": 1.8857011345712363e-05, "loss": 0.9823, "step": 3805 }, { "epoch": 0.5374188082462581, "grad_norm": 3.9004438884537516, "learning_rate": 1.885630350658546e-05, "loss": 1.2722, "step": 3806 }, { "epoch": 0.537560011296244, "grad_norm": 3.728219225623841, "learning_rate": 1.8855595461641897e-05, "loss": 1.0473, "step": 3807 }, { "epoch": 0.5377012143462299, "grad_norm": 3.881517723387876, "learning_rate": 1.885488721089812e-05, "loss": 1.2276, "step": 3808 }, { "epoch": 0.5378424173962157, "grad_norm": 3.673150938176506, "learning_rate": 1.8854178754370585e-05, "loss": 0.9423, "step": 3809 }, { "epoch": 0.5379836204462016, "grad_norm": 3.761399395682253, "learning_rate": 1.885347009207576e-05, "loss": 1.1541, "step": 3810 }, { "epoch": 0.5381248234961875, "grad_norm": 3.5988274423545663, "learning_rate": 1.8852761224030115e-05, "loss": 0.9223, "step": 3811 }, { "epoch": 0.5382660265461734, "grad_norm": 5.293686764886406, "learning_rate": 1.8852052150250123e-05, "loss": 1.3332, "step": 3812 }, { "epoch": 0.5384072295961593, "grad_norm": 3.3795443651037074, "learning_rate": 1.885134287075226e-05, "loss": 1.0237, "step": 3813 }, { "epoch": 0.5385484326461452, "grad_norm": 3.454811603393523, "learning_rate": 1.885063338555301e-05, "loss": 0.8723, "step": 3814 }, { "epoch": 0.538689635696131, "grad_norm": 4.496403200579583, "learning_rate": 1.8849923694668864e-05, "loss": 1.1199, "step": 3815 }, { "epoch": 0.5388308387461169, "grad_norm": 4.651912270899583, "learning_rate": 1.8849213798116318e-05, "loss": 1.1843, "step": 3816 }, { "epoch": 0.5389720417961028, "grad_norm": 3.6081589923719237, "learning_rate": 1.884850369591186e-05, "loss": 0.936, "step": 3817 }, { "epoch": 0.5391132448460887, "grad_norm": 3.4993076219248125, "learning_rate": 1.8847793388071997e-05, "loss": 1.0451, "step": 3818 }, { "epoch": 0.5392544478960746, "grad_norm": 4.2185681753978805, "learning_rate": 1.884708287461324e-05, "loss": 1.1652, "step": 3819 }, { "epoch": 0.5393956509460605, "grad_norm": 4.156323819689551, "learning_rate": 1.8846372155552095e-05, "loss": 1.1968, "step": 3820 }, { "epoch": 0.5395368539960463, "grad_norm": 4.441259517968104, "learning_rate": 1.8845661230905083e-05, "loss": 1.1527, "step": 3821 }, { "epoch": 0.5396780570460322, "grad_norm": 3.5676195338518486, "learning_rate": 1.884495010068872e-05, "loss": 0.9997, "step": 3822 }, { "epoch": 0.5398192600960181, "grad_norm": 4.184453637252857, "learning_rate": 1.8844238764919543e-05, "loss": 1.0955, "step": 3823 }, { "epoch": 0.539960463146004, "grad_norm": 4.353390440911992, "learning_rate": 1.884352722361407e-05, "loss": 1.2126, "step": 3824 }, { "epoch": 0.5401016661959899, "grad_norm": 3.630989189050421, "learning_rate": 1.884281547678885e-05, "loss": 1.1242, "step": 3825 }, { "epoch": 0.5402428692459758, "grad_norm": 3.737872738951251, "learning_rate": 1.8842103524460414e-05, "loss": 1.0109, "step": 3826 }, { "epoch": 0.5403840722959616, "grad_norm": 3.6222416928471453, "learning_rate": 1.884139136664531e-05, "loss": 0.8416, "step": 3827 }, { "epoch": 0.5405252753459475, "grad_norm": 4.324389445276633, "learning_rate": 1.8840679003360088e-05, "loss": 0.9096, "step": 3828 }, { "epoch": 0.5406664783959334, "grad_norm": 3.7562904897280163, "learning_rate": 1.883996643462131e-05, "loss": 0.9866, "step": 3829 }, { "epoch": 0.5408076814459193, "grad_norm": 3.358237835679867, "learning_rate": 1.8839253660445523e-05, "loss": 0.8707, "step": 3830 }, { "epoch": 0.5409488844959051, "grad_norm": 3.269886142190921, "learning_rate": 1.8838540680849303e-05, "loss": 0.9054, "step": 3831 }, { "epoch": 0.5410900875458909, "grad_norm": 4.0656969369704905, "learning_rate": 1.883782749584921e-05, "loss": 1.2079, "step": 3832 }, { "epoch": 0.5412312905958768, "grad_norm": 3.5265498799610757, "learning_rate": 1.8837114105461827e-05, "loss": 1.0484, "step": 3833 }, { "epoch": 0.5413724936458627, "grad_norm": 4.024059228559577, "learning_rate": 1.8836400509703727e-05, "loss": 1.2135, "step": 3834 }, { "epoch": 0.5415136966958486, "grad_norm": 3.8157557191789553, "learning_rate": 1.8835686708591495e-05, "loss": 1.0417, "step": 3835 }, { "epoch": 0.5416548997458345, "grad_norm": 4.270475969552564, "learning_rate": 1.883497270214172e-05, "loss": 1.1325, "step": 3836 }, { "epoch": 0.5417961027958204, "grad_norm": 4.216334971731389, "learning_rate": 1.8834258490370997e-05, "loss": 1.1135, "step": 3837 }, { "epoch": 0.5419373058458062, "grad_norm": 3.2816332243081123, "learning_rate": 1.8833544073295918e-05, "loss": 1.0178, "step": 3838 }, { "epoch": 0.5420785088957921, "grad_norm": 4.519750750139794, "learning_rate": 1.8832829450933093e-05, "loss": 1.3017, "step": 3839 }, { "epoch": 0.542219711945778, "grad_norm": 3.796527579322978, "learning_rate": 1.8832114623299125e-05, "loss": 0.9906, "step": 3840 }, { "epoch": 0.5423609149957639, "grad_norm": 3.892357886016174, "learning_rate": 1.8831399590410626e-05, "loss": 1.0114, "step": 3841 }, { "epoch": 0.5425021180457498, "grad_norm": 4.878033631309455, "learning_rate": 1.8830684352284217e-05, "loss": 1.2065, "step": 3842 }, { "epoch": 0.5426433210957357, "grad_norm": 3.661274411992499, "learning_rate": 1.8829968908936514e-05, "loss": 0.9734, "step": 3843 }, { "epoch": 0.5427845241457215, "grad_norm": 3.9193353729226517, "learning_rate": 1.882925326038415e-05, "loss": 1.0587, "step": 3844 }, { "epoch": 0.5429257271957074, "grad_norm": 3.8774094487699835, "learning_rate": 1.8828537406643752e-05, "loss": 1.1293, "step": 3845 }, { "epoch": 0.5430669302456933, "grad_norm": 3.7422890666177935, "learning_rate": 1.8827821347731955e-05, "loss": 1.147, "step": 3846 }, { "epoch": 0.5432081332956792, "grad_norm": 4.159755691141257, "learning_rate": 1.882710508366541e-05, "loss": 1.1265, "step": 3847 }, { "epoch": 0.5433493363456651, "grad_norm": 4.008194922680433, "learning_rate": 1.8826388614460746e-05, "loss": 1.2734, "step": 3848 }, { "epoch": 0.543490539395651, "grad_norm": 3.253626219016584, "learning_rate": 1.8825671940134627e-05, "loss": 0.9434, "step": 3849 }, { "epoch": 0.5436317424456368, "grad_norm": 3.7675352513955818, "learning_rate": 1.88249550607037e-05, "loss": 0.9716, "step": 3850 }, { "epoch": 0.5437729454956227, "grad_norm": 3.6538046080886857, "learning_rate": 1.8824237976184638e-05, "loss": 0.9889, "step": 3851 }, { "epoch": 0.5439141485456086, "grad_norm": 3.9386585392304916, "learning_rate": 1.8823520686594087e-05, "loss": 1.1763, "step": 3852 }, { "epoch": 0.5440553515955945, "grad_norm": 4.15527059119074, "learning_rate": 1.8822803191948732e-05, "loss": 1.1513, "step": 3853 }, { "epoch": 0.5441965546455804, "grad_norm": 4.475196729599218, "learning_rate": 1.8822085492265235e-05, "loss": 1.1943, "step": 3854 }, { "epoch": 0.5443377576955662, "grad_norm": 3.615064963858525, "learning_rate": 1.8821367587560283e-05, "loss": 1.0521, "step": 3855 }, { "epoch": 0.5444789607455521, "grad_norm": 3.670768028409117, "learning_rate": 1.8820649477850562e-05, "loss": 0.9234, "step": 3856 }, { "epoch": 0.544620163795538, "grad_norm": 4.233917133415625, "learning_rate": 1.8819931163152753e-05, "loss": 1.0692, "step": 3857 }, { "epoch": 0.5447613668455239, "grad_norm": 3.931715131880214, "learning_rate": 1.881921264348355e-05, "loss": 0.9156, "step": 3858 }, { "epoch": 0.5449025698955098, "grad_norm": 3.927969452779757, "learning_rate": 1.881849391885966e-05, "loss": 1.1097, "step": 3859 }, { "epoch": 0.5450437729454957, "grad_norm": 3.492253830567953, "learning_rate": 1.8817774989297776e-05, "loss": 1.1507, "step": 3860 }, { "epoch": 0.5451849759954815, "grad_norm": 3.7321397409341097, "learning_rate": 1.881705585481461e-05, "loss": 1.012, "step": 3861 }, { "epoch": 0.5453261790454674, "grad_norm": 3.207884745262269, "learning_rate": 1.8816336515426873e-05, "loss": 0.8441, "step": 3862 }, { "epoch": 0.5454673820954533, "grad_norm": 4.531047815098707, "learning_rate": 1.8815616971151284e-05, "loss": 1.026, "step": 3863 }, { "epoch": 0.5456085851454392, "grad_norm": 4.255232529223473, "learning_rate": 1.8814897222004564e-05, "loss": 1.2007, "step": 3864 }, { "epoch": 0.545749788195425, "grad_norm": 4.066086229986954, "learning_rate": 1.881417726800344e-05, "loss": 1.0899, "step": 3865 }, { "epoch": 0.5458909912454109, "grad_norm": 3.304782280935318, "learning_rate": 1.8813457109164642e-05, "loss": 0.8937, "step": 3866 }, { "epoch": 0.5460321942953967, "grad_norm": 3.684151191189467, "learning_rate": 1.8812736745504904e-05, "loss": 1.0937, "step": 3867 }, { "epoch": 0.5461733973453826, "grad_norm": 4.3058057040360085, "learning_rate": 1.8812016177040975e-05, "loss": 1.2465, "step": 3868 }, { "epoch": 0.5463146003953685, "grad_norm": 3.7763622890534245, "learning_rate": 1.8811295403789595e-05, "loss": 1.1207, "step": 3869 }, { "epoch": 0.5464558034453544, "grad_norm": 3.64307849650698, "learning_rate": 1.8810574425767512e-05, "loss": 1.0671, "step": 3870 }, { "epoch": 0.5465970064953403, "grad_norm": 4.020394813710588, "learning_rate": 1.8809853242991485e-05, "loss": 1.2006, "step": 3871 }, { "epoch": 0.5467382095453261, "grad_norm": 3.2932329095896913, "learning_rate": 1.8809131855478276e-05, "loss": 0.9257, "step": 3872 }, { "epoch": 0.546879412595312, "grad_norm": 4.231867467963071, "learning_rate": 1.880841026324464e-05, "loss": 1.1722, "step": 3873 }, { "epoch": 0.5470206156452979, "grad_norm": 4.547104655318466, "learning_rate": 1.8807688466307362e-05, "loss": 1.3541, "step": 3874 }, { "epoch": 0.5471618186952838, "grad_norm": 3.281925917636599, "learning_rate": 1.8806966464683208e-05, "loss": 0.893, "step": 3875 }, { "epoch": 0.5473030217452697, "grad_norm": 4.044671521612103, "learning_rate": 1.880624425838895e-05, "loss": 1.1164, "step": 3876 }, { "epoch": 0.5474442247952556, "grad_norm": 4.525863483666993, "learning_rate": 1.8805521847441382e-05, "loss": 1.0555, "step": 3877 }, { "epoch": 0.5475854278452414, "grad_norm": 3.4182288264259126, "learning_rate": 1.8804799231857292e-05, "loss": 0.9431, "step": 3878 }, { "epoch": 0.5477266308952273, "grad_norm": 4.165673740847113, "learning_rate": 1.880407641165347e-05, "loss": 1.0617, "step": 3879 }, { "epoch": 0.5478678339452132, "grad_norm": 4.090336347631208, "learning_rate": 1.8803353386846708e-05, "loss": 1.0255, "step": 3880 }, { "epoch": 0.5480090369951991, "grad_norm": 3.638200032283269, "learning_rate": 1.8802630157453817e-05, "loss": 1.1112, "step": 3881 }, { "epoch": 0.548150240045185, "grad_norm": 3.4565866003698784, "learning_rate": 1.8801906723491606e-05, "loss": 1.0069, "step": 3882 }, { "epoch": 0.5482914430951709, "grad_norm": 3.9923389185351548, "learning_rate": 1.8801183084976885e-05, "loss": 0.9384, "step": 3883 }, { "epoch": 0.5484326461451567, "grad_norm": 4.8535901937060135, "learning_rate": 1.8800459241926466e-05, "loss": 1.2769, "step": 3884 }, { "epoch": 0.5485738491951426, "grad_norm": 3.5057984356674146, "learning_rate": 1.8799735194357176e-05, "loss": 0.9463, "step": 3885 }, { "epoch": 0.5487150522451285, "grad_norm": 3.9237456823091743, "learning_rate": 1.879901094228584e-05, "loss": 1.0291, "step": 3886 }, { "epoch": 0.5488562552951144, "grad_norm": 3.3254477230159667, "learning_rate": 1.8798286485729293e-05, "loss": 0.9555, "step": 3887 }, { "epoch": 0.5489974583451003, "grad_norm": 3.804346182232666, "learning_rate": 1.8797561824704364e-05, "loss": 1.1534, "step": 3888 }, { "epoch": 0.5491386613950862, "grad_norm": 4.098970539544811, "learning_rate": 1.8796836959227897e-05, "loss": 1.281, "step": 3889 }, { "epoch": 0.549279864445072, "grad_norm": 3.624041841256215, "learning_rate": 1.8796111889316742e-05, "loss": 0.9628, "step": 3890 }, { "epoch": 0.5494210674950579, "grad_norm": 3.881386219187497, "learning_rate": 1.8795386614987744e-05, "loss": 0.9089, "step": 3891 }, { "epoch": 0.5495622705450438, "grad_norm": 4.481357436138758, "learning_rate": 1.879466113625776e-05, "loss": 1.4944, "step": 3892 }, { "epoch": 0.5497034735950297, "grad_norm": 4.684090944139845, "learning_rate": 1.879393545314365e-05, "loss": 1.247, "step": 3893 }, { "epoch": 0.5498446766450156, "grad_norm": 3.399428854535425, "learning_rate": 1.8793209565662273e-05, "loss": 0.9096, "step": 3894 }, { "epoch": 0.5499858796950015, "grad_norm": 3.5351889011058413, "learning_rate": 1.8792483473830505e-05, "loss": 0.9215, "step": 3895 }, { "epoch": 0.5501270827449873, "grad_norm": 3.7516605124384075, "learning_rate": 1.8791757177665223e-05, "loss": 0.9511, "step": 3896 }, { "epoch": 0.5502682857949732, "grad_norm": 3.6299965782933143, "learning_rate": 1.8791030677183294e-05, "loss": 1.1281, "step": 3897 }, { "epoch": 0.5504094888449591, "grad_norm": 3.1349215061997713, "learning_rate": 1.8790303972401616e-05, "loss": 0.9632, "step": 3898 }, { "epoch": 0.5505506918949449, "grad_norm": 4.716373423201452, "learning_rate": 1.8789577063337066e-05, "loss": 1.4103, "step": 3899 }, { "epoch": 0.5506918949449308, "grad_norm": 3.919946827664476, "learning_rate": 1.878884995000654e-05, "loss": 0.9555, "step": 3900 }, { "epoch": 0.5508330979949166, "grad_norm": 4.3998338915242, "learning_rate": 1.878812263242694e-05, "loss": 1.3407, "step": 3901 }, { "epoch": 0.5509743010449025, "grad_norm": 4.713140550161659, "learning_rate": 1.8787395110615163e-05, "loss": 1.1962, "step": 3902 }, { "epoch": 0.5511155040948884, "grad_norm": 4.145138340696947, "learning_rate": 1.8786667384588117e-05, "loss": 1.0392, "step": 3903 }, { "epoch": 0.5512567071448743, "grad_norm": 4.80046731898512, "learning_rate": 1.878593945436272e-05, "loss": 1.094, "step": 3904 }, { "epoch": 0.5513979101948602, "grad_norm": 4.141521166719825, "learning_rate": 1.8785211319955882e-05, "loss": 1.2661, "step": 3905 }, { "epoch": 0.5515391132448461, "grad_norm": 3.625176001315281, "learning_rate": 1.8784482981384523e-05, "loss": 1.0683, "step": 3906 }, { "epoch": 0.5516803162948319, "grad_norm": 4.0191478155283455, "learning_rate": 1.878375443866558e-05, "loss": 1.1285, "step": 3907 }, { "epoch": 0.5518215193448178, "grad_norm": 4.209473167398816, "learning_rate": 1.8783025691815974e-05, "loss": 1.066, "step": 3908 }, { "epoch": 0.5519627223948037, "grad_norm": 3.9650233077924715, "learning_rate": 1.8782296740852645e-05, "loss": 1.0758, "step": 3909 }, { "epoch": 0.5521039254447896, "grad_norm": 6.0009516870332735, "learning_rate": 1.878156758579253e-05, "loss": 1.4329, "step": 3910 }, { "epoch": 0.5522451284947755, "grad_norm": 4.281558186087087, "learning_rate": 1.878083822665258e-05, "loss": 1.2022, "step": 3911 }, { "epoch": 0.5523863315447614, "grad_norm": 3.7524113877951164, "learning_rate": 1.8780108663449742e-05, "loss": 1.0042, "step": 3912 }, { "epoch": 0.5525275345947472, "grad_norm": 4.092354738939234, "learning_rate": 1.877937889620097e-05, "loss": 1.2553, "step": 3913 }, { "epoch": 0.5526687376447331, "grad_norm": 3.8064296048539634, "learning_rate": 1.8778648924923222e-05, "loss": 1.1935, "step": 3914 }, { "epoch": 0.552809940694719, "grad_norm": 3.793372793625187, "learning_rate": 1.8777918749633467e-05, "loss": 0.9848, "step": 3915 }, { "epoch": 0.5529511437447049, "grad_norm": 3.9324152935609007, "learning_rate": 1.8777188370348667e-05, "loss": 1.2574, "step": 3916 }, { "epoch": 0.5530923467946908, "grad_norm": 4.219963467373792, "learning_rate": 1.87764577870858e-05, "loss": 1.0743, "step": 3917 }, { "epoch": 0.5532335498446767, "grad_norm": 3.3366265132319826, "learning_rate": 1.877572699986185e-05, "loss": 0.9479, "step": 3918 }, { "epoch": 0.5533747528946625, "grad_norm": 3.409431986578069, "learning_rate": 1.8774996008693792e-05, "loss": 0.971, "step": 3919 }, { "epoch": 0.5535159559446484, "grad_norm": 3.7631322921156185, "learning_rate": 1.8774264813598614e-05, "loss": 1.1178, "step": 3920 }, { "epoch": 0.5536571589946343, "grad_norm": 4.065354728946702, "learning_rate": 1.8773533414593313e-05, "loss": 1.1869, "step": 3921 }, { "epoch": 0.5537983620446202, "grad_norm": 3.9253843613061417, "learning_rate": 1.8772801811694882e-05, "loss": 1.1406, "step": 3922 }, { "epoch": 0.5539395650946061, "grad_norm": 4.096879151591271, "learning_rate": 1.8772070004920327e-05, "loss": 1.0701, "step": 3923 }, { "epoch": 0.554080768144592, "grad_norm": 3.34851567164423, "learning_rate": 1.8771337994286656e-05, "loss": 1.0256, "step": 3924 }, { "epoch": 0.5542219711945778, "grad_norm": 3.646029895615022, "learning_rate": 1.8770605779810874e-05, "loss": 0.8369, "step": 3925 }, { "epoch": 0.5543631742445637, "grad_norm": 4.035941884605273, "learning_rate": 1.8769873361510004e-05, "loss": 1.2231, "step": 3926 }, { "epoch": 0.5545043772945496, "grad_norm": 4.230555942223311, "learning_rate": 1.8769140739401063e-05, "loss": 1.2381, "step": 3927 }, { "epoch": 0.5546455803445355, "grad_norm": 3.3963990347131623, "learning_rate": 1.876840791350108e-05, "loss": 0.9069, "step": 3928 }, { "epoch": 0.5547867833945214, "grad_norm": 3.8069282629586128, "learning_rate": 1.876767488382708e-05, "loss": 1.208, "step": 3929 }, { "epoch": 0.5549279864445072, "grad_norm": 3.8852130747312956, "learning_rate": 1.8766941650396112e-05, "loss": 0.9978, "step": 3930 }, { "epoch": 0.5550691894944931, "grad_norm": 3.798109228490835, "learning_rate": 1.8766208213225198e-05, "loss": 1.0595, "step": 3931 }, { "epoch": 0.555210392544479, "grad_norm": 3.9053044625587114, "learning_rate": 1.876547457233139e-05, "loss": 1.1099, "step": 3932 }, { "epoch": 0.5553515955944648, "grad_norm": 4.108566319220951, "learning_rate": 1.8764740727731744e-05, "loss": 1.143, "step": 3933 }, { "epoch": 0.5554927986444507, "grad_norm": 3.2962138723502017, "learning_rate": 1.8764006679443306e-05, "loss": 0.865, "step": 3934 }, { "epoch": 0.5556340016944366, "grad_norm": 3.512707024354957, "learning_rate": 1.8763272427483136e-05, "loss": 1.0043, "step": 3935 }, { "epoch": 0.5557752047444224, "grad_norm": 3.6645254605777655, "learning_rate": 1.87625379718683e-05, "loss": 1.0616, "step": 3936 }, { "epoch": 0.5559164077944083, "grad_norm": 3.579682619406661, "learning_rate": 1.8761803312615865e-05, "loss": 1.0111, "step": 3937 }, { "epoch": 0.5560576108443942, "grad_norm": 3.209388120596212, "learning_rate": 1.876106844974291e-05, "loss": 0.9496, "step": 3938 }, { "epoch": 0.5561988138943801, "grad_norm": 4.442654653991964, "learning_rate": 1.87603333832665e-05, "loss": 1.194, "step": 3939 }, { "epoch": 0.556340016944366, "grad_norm": 3.6068753790999653, "learning_rate": 1.875959811320373e-05, "loss": 1.0901, "step": 3940 }, { "epoch": 0.5564812199943519, "grad_norm": 3.699216213138372, "learning_rate": 1.8758862639571682e-05, "loss": 1.0667, "step": 3941 }, { "epoch": 0.5566224230443377, "grad_norm": 3.542524684387085, "learning_rate": 1.875812696238745e-05, "loss": 1.0257, "step": 3942 }, { "epoch": 0.5567636260943236, "grad_norm": 3.5789279840972443, "learning_rate": 1.875739108166813e-05, "loss": 0.98, "step": 3943 }, { "epoch": 0.5569048291443095, "grad_norm": 3.8004158515462616, "learning_rate": 1.8756654997430823e-05, "loss": 1.0158, "step": 3944 }, { "epoch": 0.5570460321942954, "grad_norm": 5.118654208789759, "learning_rate": 1.8755918709692637e-05, "loss": 1.2221, "step": 3945 }, { "epoch": 0.5571872352442813, "grad_norm": 3.7194326501607793, "learning_rate": 1.8755182218470675e-05, "loss": 0.9165, "step": 3946 }, { "epoch": 0.5573284382942671, "grad_norm": 4.043919639028234, "learning_rate": 1.8754445523782065e-05, "loss": 1.2577, "step": 3947 }, { "epoch": 0.557469641344253, "grad_norm": 4.373364474134514, "learning_rate": 1.8753708625643924e-05, "loss": 1.2497, "step": 3948 }, { "epoch": 0.5576108443942389, "grad_norm": 3.79483258680227, "learning_rate": 1.8752971524073368e-05, "loss": 1.0862, "step": 3949 }, { "epoch": 0.5577520474442248, "grad_norm": 3.553702322867798, "learning_rate": 1.8752234219087538e-05, "loss": 0.9789, "step": 3950 }, { "epoch": 0.5578932504942107, "grad_norm": 3.694344385272981, "learning_rate": 1.8751496710703564e-05, "loss": 0.946, "step": 3951 }, { "epoch": 0.5580344535441966, "grad_norm": 3.2854711849838565, "learning_rate": 1.8750758998938584e-05, "loss": 0.9047, "step": 3952 }, { "epoch": 0.5581756565941824, "grad_norm": 4.176018630550332, "learning_rate": 1.875002108380975e-05, "loss": 1.2, "step": 3953 }, { "epoch": 0.5583168596441683, "grad_norm": 4.115778279687423, "learning_rate": 1.8749282965334198e-05, "loss": 0.9926, "step": 3954 }, { "epoch": 0.5584580626941542, "grad_norm": 3.999272383878516, "learning_rate": 1.8748544643529093e-05, "loss": 1.1628, "step": 3955 }, { "epoch": 0.5585992657441401, "grad_norm": 4.1043874435203, "learning_rate": 1.8747806118411588e-05, "loss": 1.2388, "step": 3956 }, { "epoch": 0.558740468794126, "grad_norm": 3.941341065769649, "learning_rate": 1.8747067389998846e-05, "loss": 1.0345, "step": 3957 }, { "epoch": 0.5588816718441119, "grad_norm": 3.5947468141047993, "learning_rate": 1.8746328458308034e-05, "loss": 0.9336, "step": 3958 }, { "epoch": 0.5590228748940977, "grad_norm": 3.8959774682520094, "learning_rate": 1.8745589323356327e-05, "loss": 1.1158, "step": 3959 }, { "epoch": 0.5591640779440836, "grad_norm": 3.7044297122585794, "learning_rate": 1.87448499851609e-05, "loss": 1.1249, "step": 3960 }, { "epoch": 0.5593052809940695, "grad_norm": 4.002966209248357, "learning_rate": 1.8744110443738938e-05, "loss": 1.0362, "step": 3961 }, { "epoch": 0.5594464840440554, "grad_norm": 3.5635095782220207, "learning_rate": 1.8743370699107624e-05, "loss": 0.932, "step": 3962 }, { "epoch": 0.5595876870940413, "grad_norm": 3.929249530925411, "learning_rate": 1.874263075128415e-05, "loss": 1.1557, "step": 3963 }, { "epoch": 0.5597288901440272, "grad_norm": 3.601580126326468, "learning_rate": 1.8741890600285714e-05, "loss": 0.9278, "step": 3964 }, { "epoch": 0.559870093194013, "grad_norm": 3.1732489785219107, "learning_rate": 1.8741150246129522e-05, "loss": 0.9764, "step": 3965 }, { "epoch": 0.5600112962439989, "grad_norm": 3.934834744934047, "learning_rate": 1.8740409688832762e-05, "loss": 1.2504, "step": 3966 }, { "epoch": 0.5601524992939847, "grad_norm": 3.6517759031048835, "learning_rate": 1.8739668928412663e-05, "loss": 1.0386, "step": 3967 }, { "epoch": 0.5602937023439706, "grad_norm": 4.985809750805547, "learning_rate": 1.873892796488643e-05, "loss": 1.2311, "step": 3968 }, { "epoch": 0.5604349053939565, "grad_norm": 4.0889405466025615, "learning_rate": 1.8738186798271285e-05, "loss": 0.9641, "step": 3969 }, { "epoch": 0.5605761084439423, "grad_norm": 3.892564459366846, "learning_rate": 1.8737445428584456e-05, "loss": 1.3846, "step": 3970 }, { "epoch": 0.5607173114939282, "grad_norm": 4.134486488328932, "learning_rate": 1.8736703855843165e-05, "loss": 1.1632, "step": 3971 }, { "epoch": 0.5608585145439141, "grad_norm": 3.7667869071410816, "learning_rate": 1.8735962080064652e-05, "loss": 1.1535, "step": 3972 }, { "epoch": 0.5609997175939, "grad_norm": 4.821437270054569, "learning_rate": 1.873522010126615e-05, "loss": 1.2474, "step": 3973 }, { "epoch": 0.5611409206438859, "grad_norm": 3.78807913902716, "learning_rate": 1.8734477919464905e-05, "loss": 0.9248, "step": 3974 }, { "epoch": 0.5612821236938718, "grad_norm": 3.6390268765215956, "learning_rate": 1.873373553467817e-05, "loss": 0.995, "step": 3975 }, { "epoch": 0.5614233267438576, "grad_norm": 3.9631729049554787, "learning_rate": 1.8732992946923187e-05, "loss": 1.267, "step": 3976 }, { "epoch": 0.5615645297938435, "grad_norm": 3.8652569339753042, "learning_rate": 1.8732250156217223e-05, "loss": 1.1233, "step": 3977 }, { "epoch": 0.5617057328438294, "grad_norm": 3.8008455326318056, "learning_rate": 1.8731507162577536e-05, "loss": 1.1189, "step": 3978 }, { "epoch": 0.5618469358938153, "grad_norm": 3.889054749495909, "learning_rate": 1.8730763966021394e-05, "loss": 1.2347, "step": 3979 }, { "epoch": 0.5619881389438012, "grad_norm": 3.4570162683422203, "learning_rate": 1.8730020566566068e-05, "loss": 1.1929, "step": 3980 }, { "epoch": 0.5621293419937871, "grad_norm": 3.4686261039670065, "learning_rate": 1.8729276964228834e-05, "loss": 0.9427, "step": 3981 }, { "epoch": 0.5622705450437729, "grad_norm": 4.401191123030958, "learning_rate": 1.8728533159026972e-05, "loss": 1.2275, "step": 3982 }, { "epoch": 0.5624117480937588, "grad_norm": 3.5849903282434137, "learning_rate": 1.872778915097777e-05, "loss": 0.9179, "step": 3983 }, { "epoch": 0.5625529511437447, "grad_norm": 3.4736852223894794, "learning_rate": 1.8727044940098516e-05, "loss": 1.0392, "step": 3984 }, { "epoch": 0.5626941541937306, "grad_norm": 5.224027531039868, "learning_rate": 1.8726300526406508e-05, "loss": 1.0344, "step": 3985 }, { "epoch": 0.5628353572437165, "grad_norm": 3.4500965779584507, "learning_rate": 1.872555590991904e-05, "loss": 1.0101, "step": 3986 }, { "epoch": 0.5629765602937024, "grad_norm": 3.436432995624353, "learning_rate": 1.8724811090653428e-05, "loss": 0.9618, "step": 3987 }, { "epoch": 0.5631177633436882, "grad_norm": 4.233761733011712, "learning_rate": 1.872406606862697e-05, "loss": 1.1924, "step": 3988 }, { "epoch": 0.5632589663936741, "grad_norm": 3.5373979964986515, "learning_rate": 1.8723320843856986e-05, "loss": 1.0613, "step": 3989 }, { "epoch": 0.56340016944366, "grad_norm": 3.823543056753875, "learning_rate": 1.8722575416360794e-05, "loss": 1.1477, "step": 3990 }, { "epoch": 0.5635413724936459, "grad_norm": 4.141554449085671, "learning_rate": 1.8721829786155714e-05, "loss": 1.0549, "step": 3991 }, { "epoch": 0.5636825755436318, "grad_norm": 3.5182094935613293, "learning_rate": 1.8721083953259078e-05, "loss": 0.8924, "step": 3992 }, { "epoch": 0.5638237785936177, "grad_norm": 3.967193723416123, "learning_rate": 1.8720337917688213e-05, "loss": 1.2069, "step": 3993 }, { "epoch": 0.5639649816436035, "grad_norm": 3.6353769080875624, "learning_rate": 1.8719591679460464e-05, "loss": 1.1306, "step": 3994 }, { "epoch": 0.5641061846935894, "grad_norm": 4.465815263820734, "learning_rate": 1.871884523859317e-05, "loss": 0.9229, "step": 3995 }, { "epoch": 0.5642473877435753, "grad_norm": 3.619868883940579, "learning_rate": 1.871809859510368e-05, "loss": 1.0567, "step": 3996 }, { "epoch": 0.5643885907935612, "grad_norm": 3.8182239832595153, "learning_rate": 1.8717351749009342e-05, "loss": 0.9807, "step": 3997 }, { "epoch": 0.5645297938435471, "grad_norm": 3.6452521855172417, "learning_rate": 1.8716604700327516e-05, "loss": 1.1571, "step": 3998 }, { "epoch": 0.564670996893533, "grad_norm": 3.6015695462751625, "learning_rate": 1.8715857449075558e-05, "loss": 0.945, "step": 3999 }, { "epoch": 0.5648121999435188, "grad_norm": 3.6773534798326635, "learning_rate": 1.8715109995270836e-05, "loss": 1.1845, "step": 4000 }, { "epoch": 0.5649534029935046, "grad_norm": 4.15392292033527, "learning_rate": 1.8714362338930724e-05, "loss": 1.2354, "step": 4001 }, { "epoch": 0.5650946060434905, "grad_norm": 3.6891643024126273, "learning_rate": 1.8713614480072594e-05, "loss": 1.1217, "step": 4002 }, { "epoch": 0.5652358090934764, "grad_norm": 4.196458051514103, "learning_rate": 1.871286641871383e-05, "loss": 1.0469, "step": 4003 }, { "epoch": 0.5653770121434623, "grad_norm": 3.3708880991040098, "learning_rate": 1.8712118154871808e-05, "loss": 1.0082, "step": 4004 }, { "epoch": 0.5655182151934481, "grad_norm": 3.5647183047676654, "learning_rate": 1.8711369688563925e-05, "loss": 1.0219, "step": 4005 }, { "epoch": 0.565659418243434, "grad_norm": 3.658344839046683, "learning_rate": 1.871062101980757e-05, "loss": 1.0328, "step": 4006 }, { "epoch": 0.5658006212934199, "grad_norm": 3.7183317548085864, "learning_rate": 1.870987214862015e-05, "loss": 1.0559, "step": 4007 }, { "epoch": 0.5659418243434058, "grad_norm": 3.6854313763121973, "learning_rate": 1.870912307501906e-05, "loss": 1.1209, "step": 4008 }, { "epoch": 0.5660830273933917, "grad_norm": 4.1700625993183715, "learning_rate": 1.8708373799021705e-05, "loss": 1.2296, "step": 4009 }, { "epoch": 0.5662242304433776, "grad_norm": 3.569878134877533, "learning_rate": 1.870762432064551e-05, "loss": 1.0739, "step": 4010 }, { "epoch": 0.5663654334933634, "grad_norm": 4.714528583814792, "learning_rate": 1.8706874639907887e-05, "loss": 1.3346, "step": 4011 }, { "epoch": 0.5665066365433493, "grad_norm": 4.456733139042029, "learning_rate": 1.8706124756826255e-05, "loss": 1.2945, "step": 4012 }, { "epoch": 0.5666478395933352, "grad_norm": 2.8165700077283486, "learning_rate": 1.8705374671418048e-05, "loss": 0.7626, "step": 4013 }, { "epoch": 0.5667890426433211, "grad_norm": 4.4521736785944706, "learning_rate": 1.8704624383700686e-05, "loss": 1.1991, "step": 4014 }, { "epoch": 0.566930245693307, "grad_norm": 4.3107219518221465, "learning_rate": 1.8703873893691617e-05, "loss": 1.0934, "step": 4015 }, { "epoch": 0.5670714487432928, "grad_norm": 3.3212234512839416, "learning_rate": 1.8703123201408277e-05, "loss": 0.8906, "step": 4016 }, { "epoch": 0.5672126517932787, "grad_norm": 3.486644299402663, "learning_rate": 1.8702372306868113e-05, "loss": 0.9148, "step": 4017 }, { "epoch": 0.5673538548432646, "grad_norm": 4.497917731164971, "learning_rate": 1.8701621210088574e-05, "loss": 1.3779, "step": 4018 }, { "epoch": 0.5674950578932505, "grad_norm": 4.267107338949943, "learning_rate": 1.8700869911087115e-05, "loss": 1.0385, "step": 4019 }, { "epoch": 0.5676362609432364, "grad_norm": 4.0654836341133285, "learning_rate": 1.8700118409881198e-05, "loss": 1.1222, "step": 4020 }, { "epoch": 0.5677774639932223, "grad_norm": 3.657433983718111, "learning_rate": 1.8699366706488287e-05, "loss": 0.8517, "step": 4021 }, { "epoch": 0.5679186670432081, "grad_norm": 3.221267266718776, "learning_rate": 1.8698614800925853e-05, "loss": 1.1548, "step": 4022 }, { "epoch": 0.568059870093194, "grad_norm": 3.5436983626221457, "learning_rate": 1.8697862693211363e-05, "loss": 0.9938, "step": 4023 }, { "epoch": 0.5682010731431799, "grad_norm": 3.7820638060943956, "learning_rate": 1.86971103833623e-05, "loss": 1.0047, "step": 4024 }, { "epoch": 0.5683422761931658, "grad_norm": 3.210789657144361, "learning_rate": 1.869635787139615e-05, "loss": 0.9722, "step": 4025 }, { "epoch": 0.5684834792431517, "grad_norm": 4.125414082165563, "learning_rate": 1.8695605157330398e-05, "loss": 1.1898, "step": 4026 }, { "epoch": 0.5686246822931376, "grad_norm": 3.2006671030063605, "learning_rate": 1.869485224118254e-05, "loss": 1.067, "step": 4027 }, { "epoch": 0.5687658853431234, "grad_norm": 4.089174053233287, "learning_rate": 1.869409912297007e-05, "loss": 1.0943, "step": 4028 }, { "epoch": 0.5689070883931093, "grad_norm": 3.4092930254541267, "learning_rate": 1.869334580271049e-05, "loss": 0.9824, "step": 4029 }, { "epoch": 0.5690482914430952, "grad_norm": 4.35356074344431, "learning_rate": 1.8692592280421305e-05, "loss": 1.212, "step": 4030 }, { "epoch": 0.5691894944930811, "grad_norm": 3.489860911727647, "learning_rate": 1.8691838556120034e-05, "loss": 1.1868, "step": 4031 }, { "epoch": 0.569330697543067, "grad_norm": 3.361720575529464, "learning_rate": 1.8691084629824186e-05, "loss": 0.9423, "step": 4032 }, { "epoch": 0.5694719005930529, "grad_norm": 3.2236889991097093, "learning_rate": 1.8690330501551286e-05, "loss": 0.8294, "step": 4033 }, { "epoch": 0.5696131036430387, "grad_norm": 3.556290011979793, "learning_rate": 1.868957617131886e-05, "loss": 0.8222, "step": 4034 }, { "epoch": 0.5697543066930245, "grad_norm": 3.7321653029750155, "learning_rate": 1.8688821639144432e-05, "loss": 1.0997, "step": 4035 }, { "epoch": 0.5698955097430104, "grad_norm": 3.8130805488870547, "learning_rate": 1.8688066905045545e-05, "loss": 1.015, "step": 4036 }, { "epoch": 0.5700367127929963, "grad_norm": 3.8600953399708007, "learning_rate": 1.8687311969039735e-05, "loss": 1.0199, "step": 4037 }, { "epoch": 0.5701779158429822, "grad_norm": 4.134047799246088, "learning_rate": 1.8686556831144545e-05, "loss": 1.1477, "step": 4038 }, { "epoch": 0.570319118892968, "grad_norm": 3.9964222690492197, "learning_rate": 1.8685801491377527e-05, "loss": 1.1441, "step": 4039 }, { "epoch": 0.5704603219429539, "grad_norm": 3.407155980585171, "learning_rate": 1.8685045949756232e-05, "loss": 1.1131, "step": 4040 }, { "epoch": 0.5706015249929398, "grad_norm": 3.6004508519496996, "learning_rate": 1.868429020629822e-05, "loss": 1.1093, "step": 4041 }, { "epoch": 0.5707427280429257, "grad_norm": 3.294933598654903, "learning_rate": 1.8683534261021058e-05, "loss": 0.9021, "step": 4042 }, { "epoch": 0.5708839310929116, "grad_norm": 3.607231252704722, "learning_rate": 1.8682778113942306e-05, "loss": 0.9316, "step": 4043 }, { "epoch": 0.5710251341428975, "grad_norm": 3.310538084518935, "learning_rate": 1.8682021765079537e-05, "loss": 0.9706, "step": 4044 }, { "epoch": 0.5711663371928833, "grad_norm": 4.548667763013974, "learning_rate": 1.868126521445034e-05, "loss": 1.3445, "step": 4045 }, { "epoch": 0.5713075402428692, "grad_norm": 4.09011477774083, "learning_rate": 1.8680508462072282e-05, "loss": 1.047, "step": 4046 }, { "epoch": 0.5714487432928551, "grad_norm": 3.661998994232539, "learning_rate": 1.867975150796296e-05, "loss": 1.0011, "step": 4047 }, { "epoch": 0.571589946342841, "grad_norm": 3.3706059459380215, "learning_rate": 1.867899435213996e-05, "loss": 1.0994, "step": 4048 }, { "epoch": 0.5717311493928269, "grad_norm": 3.7157762270720895, "learning_rate": 1.8678236994620878e-05, "loss": 0.9293, "step": 4049 }, { "epoch": 0.5718723524428128, "grad_norm": 4.124502626145871, "learning_rate": 1.867747943542332e-05, "loss": 1.0396, "step": 4050 }, { "epoch": 0.5720135554927986, "grad_norm": 3.6139623445028577, "learning_rate": 1.8676721674564884e-05, "loss": 0.9247, "step": 4051 }, { "epoch": 0.5721547585427845, "grad_norm": 4.076364476169935, "learning_rate": 1.8675963712063184e-05, "loss": 1.2324, "step": 4052 }, { "epoch": 0.5722959615927704, "grad_norm": 3.4124404132729564, "learning_rate": 1.8675205547935836e-05, "loss": 0.991, "step": 4053 }, { "epoch": 0.5724371646427563, "grad_norm": 3.1382017917374117, "learning_rate": 1.8674447182200457e-05, "loss": 0.7637, "step": 4054 }, { "epoch": 0.5725783676927422, "grad_norm": 4.066103826686601, "learning_rate": 1.8673688614874668e-05, "loss": 1.1621, "step": 4055 }, { "epoch": 0.5727195707427281, "grad_norm": 3.5593029798153073, "learning_rate": 1.8672929845976106e-05, "loss": 1.0452, "step": 4056 }, { "epoch": 0.5728607737927139, "grad_norm": 3.483338870226903, "learning_rate": 1.86721708755224e-05, "loss": 0.9948, "step": 4057 }, { "epoch": 0.5730019768426998, "grad_norm": 4.148003201983862, "learning_rate": 1.8671411703531186e-05, "loss": 1.1782, "step": 4058 }, { "epoch": 0.5731431798926857, "grad_norm": 6.3901148085079935, "learning_rate": 1.867065233002011e-05, "loss": 1.1194, "step": 4059 }, { "epoch": 0.5732843829426716, "grad_norm": 4.240732582520416, "learning_rate": 1.8669892755006816e-05, "loss": 1.1402, "step": 4060 }, { "epoch": 0.5734255859926575, "grad_norm": 4.671011318299797, "learning_rate": 1.866913297850896e-05, "loss": 1.1747, "step": 4061 }, { "epoch": 0.5735667890426434, "grad_norm": 3.433875078800676, "learning_rate": 1.8668373000544197e-05, "loss": 0.955, "step": 4062 }, { "epoch": 0.5737079920926292, "grad_norm": 3.9755119151099567, "learning_rate": 1.8667612821130192e-05, "loss": 0.855, "step": 4063 }, { "epoch": 0.5738491951426151, "grad_norm": 3.66702021349931, "learning_rate": 1.8666852440284607e-05, "loss": 1.1501, "step": 4064 }, { "epoch": 0.573990398192601, "grad_norm": 3.833889190963788, "learning_rate": 1.8666091858025113e-05, "loss": 0.9233, "step": 4065 }, { "epoch": 0.5741316012425869, "grad_norm": 3.6927183979369844, "learning_rate": 1.8665331074369385e-05, "loss": 0.9356, "step": 4066 }, { "epoch": 0.5742728042925728, "grad_norm": 3.563584328804171, "learning_rate": 1.8664570089335107e-05, "loss": 1.0223, "step": 4067 }, { "epoch": 0.5744140073425587, "grad_norm": 3.827552698585337, "learning_rate": 1.8663808902939965e-05, "loss": 0.9718, "step": 4068 }, { "epoch": 0.5745552103925444, "grad_norm": 3.0354040275615426, "learning_rate": 1.866304751520164e-05, "loss": 0.757, "step": 4069 }, { "epoch": 0.5746964134425303, "grad_norm": 4.124308887238903, "learning_rate": 1.8662285926137837e-05, "loss": 0.9576, "step": 4070 }, { "epoch": 0.5748376164925162, "grad_norm": 3.748082007878318, "learning_rate": 1.8661524135766245e-05, "loss": 1.0634, "step": 4071 }, { "epoch": 0.5749788195425021, "grad_norm": 3.9242736955462334, "learning_rate": 1.866076214410458e-05, "loss": 1.1096, "step": 4072 }, { "epoch": 0.575120022592488, "grad_norm": 3.5970199025641625, "learning_rate": 1.8659999951170537e-05, "loss": 0.9973, "step": 4073 }, { "epoch": 0.5752612256424738, "grad_norm": 3.351985571872935, "learning_rate": 1.8659237556981836e-05, "loss": 0.9583, "step": 4074 }, { "epoch": 0.5754024286924597, "grad_norm": 3.7299514533190616, "learning_rate": 1.8658474961556196e-05, "loss": 1.0378, "step": 4075 }, { "epoch": 0.5755436317424456, "grad_norm": 3.3691051032372332, "learning_rate": 1.8657712164911337e-05, "loss": 0.9757, "step": 4076 }, { "epoch": 0.5756848347924315, "grad_norm": 4.269007724904411, "learning_rate": 1.8656949167064985e-05, "loss": 1.2039, "step": 4077 }, { "epoch": 0.5758260378424174, "grad_norm": 3.9688942324797383, "learning_rate": 1.865618596803487e-05, "loss": 1.0074, "step": 4078 }, { "epoch": 0.5759672408924033, "grad_norm": 3.8733222221692354, "learning_rate": 1.865542256783874e-05, "loss": 1.0798, "step": 4079 }, { "epoch": 0.5761084439423891, "grad_norm": 3.5535190444316527, "learning_rate": 1.865465896649432e-05, "loss": 1.0735, "step": 4080 }, { "epoch": 0.576249646992375, "grad_norm": 3.986588276414098, "learning_rate": 1.8653895164019366e-05, "loss": 1.0561, "step": 4081 }, { "epoch": 0.5763908500423609, "grad_norm": 3.7592889064791946, "learning_rate": 1.8653131160431622e-05, "loss": 1.1176, "step": 4082 }, { "epoch": 0.5765320530923468, "grad_norm": 3.925079357411468, "learning_rate": 1.865236695574885e-05, "loss": 1.1441, "step": 4083 }, { "epoch": 0.5766732561423327, "grad_norm": 3.6549853293507364, "learning_rate": 1.865160254998881e-05, "loss": 1.0528, "step": 4084 }, { "epoch": 0.5768144591923186, "grad_norm": 4.72273554505084, "learning_rate": 1.865083794316926e-05, "loss": 1.3406, "step": 4085 }, { "epoch": 0.5769556622423044, "grad_norm": 3.758135342394704, "learning_rate": 1.8650073135307973e-05, "loss": 1.1384, "step": 4086 }, { "epoch": 0.5770968652922903, "grad_norm": 3.9927339524153296, "learning_rate": 1.864930812642272e-05, "loss": 1.1253, "step": 4087 }, { "epoch": 0.5772380683422762, "grad_norm": 3.9828418644034222, "learning_rate": 1.8648542916531283e-05, "loss": 0.9463, "step": 4088 }, { "epoch": 0.5773792713922621, "grad_norm": 4.477233367179409, "learning_rate": 1.864777750565144e-05, "loss": 1.0366, "step": 4089 }, { "epoch": 0.577520474442248, "grad_norm": 4.6330626747561485, "learning_rate": 1.8647011893800987e-05, "loss": 1.3283, "step": 4090 }, { "epoch": 0.5776616774922338, "grad_norm": 4.523402042030172, "learning_rate": 1.8646246080997712e-05, "loss": 1.3327, "step": 4091 }, { "epoch": 0.5778028805422197, "grad_norm": 3.1236032032378027, "learning_rate": 1.864548006725941e-05, "loss": 0.8357, "step": 4092 }, { "epoch": 0.5779440835922056, "grad_norm": 3.721403427530213, "learning_rate": 1.8644713852603886e-05, "loss": 1.0216, "step": 4093 }, { "epoch": 0.5780852866421915, "grad_norm": 3.7577475616156506, "learning_rate": 1.8643947437048944e-05, "loss": 0.9567, "step": 4094 }, { "epoch": 0.5782264896921774, "grad_norm": 4.339273507854912, "learning_rate": 1.8643180820612397e-05, "loss": 0.8988, "step": 4095 }, { "epoch": 0.5783676927421633, "grad_norm": 4.325405401469997, "learning_rate": 1.8642414003312063e-05, "loss": 1.0157, "step": 4096 }, { "epoch": 0.5785088957921491, "grad_norm": 3.284602683304248, "learning_rate": 1.864164698516576e-05, "loss": 0.7211, "step": 4097 }, { "epoch": 0.578650098842135, "grad_norm": 3.638482015522909, "learning_rate": 1.8640879766191305e-05, "loss": 1.0067, "step": 4098 }, { "epoch": 0.5787913018921209, "grad_norm": 4.296071967987287, "learning_rate": 1.8640112346406545e-05, "loss": 1.1891, "step": 4099 }, { "epoch": 0.5789325049421068, "grad_norm": 4.901869242405439, "learning_rate": 1.8639344725829302e-05, "loss": 1.4346, "step": 4100 }, { "epoch": 0.5790737079920927, "grad_norm": 4.1041779440755315, "learning_rate": 1.863857690447742e-05, "loss": 1.1322, "step": 4101 }, { "epoch": 0.5792149110420786, "grad_norm": 3.320669993704349, "learning_rate": 1.863780888236874e-05, "loss": 1.0015, "step": 4102 }, { "epoch": 0.5793561140920643, "grad_norm": 3.481158138883747, "learning_rate": 1.8637040659521114e-05, "loss": 0.9114, "step": 4103 }, { "epoch": 0.5794973171420502, "grad_norm": 3.2296475287976234, "learning_rate": 1.863627223595239e-05, "loss": 0.949, "step": 4104 }, { "epoch": 0.5796385201920361, "grad_norm": 3.544018410815906, "learning_rate": 1.863550361168043e-05, "loss": 1.1104, "step": 4105 }, { "epoch": 0.579779723242022, "grad_norm": 3.70059130708354, "learning_rate": 1.8634734786723096e-05, "loss": 1.036, "step": 4106 }, { "epoch": 0.5799209262920079, "grad_norm": 3.9310511314850864, "learning_rate": 1.8633965761098255e-05, "loss": 1.1319, "step": 4107 }, { "epoch": 0.5800621293419937, "grad_norm": 4.1680156749198085, "learning_rate": 1.8633196534823778e-05, "loss": 1.3732, "step": 4108 }, { "epoch": 0.5802033323919796, "grad_norm": 3.547009320086699, "learning_rate": 1.8632427107917542e-05, "loss": 1.0682, "step": 4109 }, { "epoch": 0.5803445354419655, "grad_norm": 3.2804669783562694, "learning_rate": 1.863165748039743e-05, "loss": 0.8838, "step": 4110 }, { "epoch": 0.5804857384919514, "grad_norm": 3.7086539230506337, "learning_rate": 1.8630887652281325e-05, "loss": 1.0536, "step": 4111 }, { "epoch": 0.5806269415419373, "grad_norm": 3.9473233903276848, "learning_rate": 1.8630117623587116e-05, "loss": 1.2209, "step": 4112 }, { "epoch": 0.5807681445919232, "grad_norm": 3.1857044452577297, "learning_rate": 1.86293473943327e-05, "loss": 1.0196, "step": 4113 }, { "epoch": 0.580909347641909, "grad_norm": 3.0909825332315592, "learning_rate": 1.862857696453598e-05, "loss": 0.9297, "step": 4114 }, { "epoch": 0.5810505506918949, "grad_norm": 3.9863603334242033, "learning_rate": 1.8627806334214855e-05, "loss": 1.0411, "step": 4115 }, { "epoch": 0.5811917537418808, "grad_norm": 4.262376141816896, "learning_rate": 1.862703550338724e-05, "loss": 1.0171, "step": 4116 }, { "epoch": 0.5813329567918667, "grad_norm": 3.9985089115793975, "learning_rate": 1.8626264472071045e-05, "loss": 1.0034, "step": 4117 }, { "epoch": 0.5814741598418526, "grad_norm": 4.583967078420294, "learning_rate": 1.862549324028419e-05, "loss": 1.0899, "step": 4118 }, { "epoch": 0.5816153628918385, "grad_norm": 3.8435110755123376, "learning_rate": 1.8624721808044596e-05, "loss": 1.0668, "step": 4119 }, { "epoch": 0.5817565659418243, "grad_norm": 4.50179586566256, "learning_rate": 1.862395017537019e-05, "loss": 1.1168, "step": 4120 }, { "epoch": 0.5818977689918102, "grad_norm": 3.711670697973765, "learning_rate": 1.8623178342278907e-05, "loss": 0.86, "step": 4121 }, { "epoch": 0.5820389720417961, "grad_norm": 3.3557356490790973, "learning_rate": 1.8622406308788683e-05, "loss": 0.9903, "step": 4122 }, { "epoch": 0.582180175091782, "grad_norm": 3.5910738672981632, "learning_rate": 1.862163407491746e-05, "loss": 1.0231, "step": 4123 }, { "epoch": 0.5823213781417679, "grad_norm": 3.268798707228626, "learning_rate": 1.862086164068319e-05, "loss": 1.0015, "step": 4124 }, { "epoch": 0.5824625811917538, "grad_norm": 3.860641764477037, "learning_rate": 1.8620089006103812e-05, "loss": 1.0093, "step": 4125 }, { "epoch": 0.5826037842417396, "grad_norm": 4.298604029423862, "learning_rate": 1.8619316171197292e-05, "loss": 1.417, "step": 4126 }, { "epoch": 0.5827449872917255, "grad_norm": 3.471171173962099, "learning_rate": 1.8618543135981584e-05, "loss": 0.9174, "step": 4127 }, { "epoch": 0.5828861903417114, "grad_norm": 4.841094139622842, "learning_rate": 1.861776990047466e-05, "loss": 0.896, "step": 4128 }, { "epoch": 0.5830273933916973, "grad_norm": 3.5639196508723607, "learning_rate": 1.861699646469448e-05, "loss": 1.1188, "step": 4129 }, { "epoch": 0.5831685964416832, "grad_norm": 3.62673367153281, "learning_rate": 1.8616222828659024e-05, "loss": 1.0508, "step": 4130 }, { "epoch": 0.5833097994916691, "grad_norm": 4.39419136431228, "learning_rate": 1.861544899238627e-05, "loss": 1.4008, "step": 4131 }, { "epoch": 0.5834510025416549, "grad_norm": 3.496167935892804, "learning_rate": 1.8614674955894206e-05, "loss": 0.8996, "step": 4132 }, { "epoch": 0.5835922055916408, "grad_norm": 3.2883180045074263, "learning_rate": 1.8613900719200813e-05, "loss": 1.1551, "step": 4133 }, { "epoch": 0.5837334086416267, "grad_norm": 3.6695319780858475, "learning_rate": 1.8613126282324092e-05, "loss": 0.9432, "step": 4134 }, { "epoch": 0.5838746116916126, "grad_norm": 4.0264931087430265, "learning_rate": 1.8612351645282033e-05, "loss": 1.0691, "step": 4135 }, { "epoch": 0.5840158147415985, "grad_norm": 3.69807199678644, "learning_rate": 1.861157680809264e-05, "loss": 0.988, "step": 4136 }, { "epoch": 0.5841570177915842, "grad_norm": 3.8498733605858204, "learning_rate": 1.861080177077392e-05, "loss": 1.044, "step": 4137 }, { "epoch": 0.5842982208415701, "grad_norm": 4.068201377829482, "learning_rate": 1.861002653334389e-05, "loss": 0.8888, "step": 4138 }, { "epoch": 0.584439423891556, "grad_norm": 3.772110771026714, "learning_rate": 1.860925109582056e-05, "loss": 0.8924, "step": 4139 }, { "epoch": 0.5845806269415419, "grad_norm": 3.2289979354102494, "learning_rate": 1.860847545822195e-05, "loss": 0.7755, "step": 4140 }, { "epoch": 0.5847218299915278, "grad_norm": 3.775070156987605, "learning_rate": 1.860769962056609e-05, "loss": 1.0759, "step": 4141 }, { "epoch": 0.5848630330415137, "grad_norm": 3.407625556966518, "learning_rate": 1.8606923582871007e-05, "loss": 0.8833, "step": 4142 }, { "epoch": 0.5850042360914995, "grad_norm": 3.9787719510572295, "learning_rate": 1.8606147345154738e-05, "loss": 1.0986, "step": 4143 }, { "epoch": 0.5851454391414854, "grad_norm": 3.996893294690718, "learning_rate": 1.8605370907435322e-05, "loss": 1.0206, "step": 4144 }, { "epoch": 0.5852866421914713, "grad_norm": 4.154648656553519, "learning_rate": 1.8604594269730803e-05, "loss": 0.9879, "step": 4145 }, { "epoch": 0.5854278452414572, "grad_norm": 3.2572562237746685, "learning_rate": 1.860381743205923e-05, "loss": 0.9381, "step": 4146 }, { "epoch": 0.5855690482914431, "grad_norm": 4.43992863158585, "learning_rate": 1.8603040394438653e-05, "loss": 1.2913, "step": 4147 }, { "epoch": 0.585710251341429, "grad_norm": 3.5999277703032075, "learning_rate": 1.8602263156887136e-05, "loss": 1.0901, "step": 4148 }, { "epoch": 0.5858514543914148, "grad_norm": 3.794153402856877, "learning_rate": 1.8601485719422735e-05, "loss": 0.9834, "step": 4149 }, { "epoch": 0.5859926574414007, "grad_norm": 5.436880412803433, "learning_rate": 1.8600708082063518e-05, "loss": 1.06, "step": 4150 }, { "epoch": 0.5861338604913866, "grad_norm": 3.608668652080738, "learning_rate": 1.8599930244827565e-05, "loss": 1.0059, "step": 4151 }, { "epoch": 0.5862750635413725, "grad_norm": 3.952991424870413, "learning_rate": 1.8599152207732945e-05, "loss": 0.9753, "step": 4152 }, { "epoch": 0.5864162665913584, "grad_norm": 3.561323589778944, "learning_rate": 1.859837397079774e-05, "loss": 1.1503, "step": 4153 }, { "epoch": 0.5865574696413443, "grad_norm": 3.8801330679186115, "learning_rate": 1.859759553404004e-05, "loss": 0.9689, "step": 4154 }, { "epoch": 0.5866986726913301, "grad_norm": 3.9749590759581923, "learning_rate": 1.859681689747793e-05, "loss": 1.1224, "step": 4155 }, { "epoch": 0.586839875741316, "grad_norm": 3.897480694732494, "learning_rate": 1.859603806112951e-05, "loss": 1.2696, "step": 4156 }, { "epoch": 0.5869810787913019, "grad_norm": 3.364759248885293, "learning_rate": 1.859525902501288e-05, "loss": 0.9327, "step": 4157 }, { "epoch": 0.5871222818412878, "grad_norm": 3.8458697977703795, "learning_rate": 1.859447978914614e-05, "loss": 1.2428, "step": 4158 }, { "epoch": 0.5872634848912737, "grad_norm": 4.4123686120616235, "learning_rate": 1.85937003535474e-05, "loss": 1.1181, "step": 4159 }, { "epoch": 0.5874046879412596, "grad_norm": 4.076397130983413, "learning_rate": 1.8592920718234775e-05, "loss": 1.0772, "step": 4160 }, { "epoch": 0.5875458909912454, "grad_norm": 3.8956444640232397, "learning_rate": 1.8592140883226384e-05, "loss": 1.1652, "step": 4161 }, { "epoch": 0.5876870940412313, "grad_norm": 3.4993230281651857, "learning_rate": 1.859136084854035e-05, "loss": 1.1086, "step": 4162 }, { "epoch": 0.5878282970912172, "grad_norm": 3.2460513650642127, "learning_rate": 1.85905806141948e-05, "loss": 0.8666, "step": 4163 }, { "epoch": 0.5879695001412031, "grad_norm": 3.59038934578245, "learning_rate": 1.8589800180207865e-05, "loss": 1.0769, "step": 4164 }, { "epoch": 0.588110703191189, "grad_norm": 4.346430640078812, "learning_rate": 1.8589019546597686e-05, "loss": 1.2488, "step": 4165 }, { "epoch": 0.5882519062411748, "grad_norm": 4.067375155288011, "learning_rate": 1.85882387133824e-05, "loss": 1.1368, "step": 4166 }, { "epoch": 0.5883931092911607, "grad_norm": 3.2877716655822526, "learning_rate": 1.858745768058016e-05, "loss": 0.804, "step": 4167 }, { "epoch": 0.5885343123411466, "grad_norm": 3.4526663372858306, "learning_rate": 1.8586676448209107e-05, "loss": 1.2041, "step": 4168 }, { "epoch": 0.5886755153911325, "grad_norm": 3.579941098848215, "learning_rate": 1.8585895016287404e-05, "loss": 1.0065, "step": 4169 }, { "epoch": 0.5888167184411184, "grad_norm": 4.2726062913530045, "learning_rate": 1.8585113384833206e-05, "loss": 1.0268, "step": 4170 }, { "epoch": 0.5889579214911042, "grad_norm": 3.1946690958879755, "learning_rate": 1.8584331553864686e-05, "loss": 0.8639, "step": 4171 }, { "epoch": 0.58909912454109, "grad_norm": 3.7377133572636487, "learning_rate": 1.85835495234e-05, "loss": 1.0931, "step": 4172 }, { "epoch": 0.5892403275910759, "grad_norm": 4.032285210083998, "learning_rate": 1.8582767293457337e-05, "loss": 1.2589, "step": 4173 }, { "epoch": 0.5893815306410618, "grad_norm": 3.740966082866538, "learning_rate": 1.8581984864054866e-05, "loss": 0.9042, "step": 4174 }, { "epoch": 0.5895227336910477, "grad_norm": 3.295802433210191, "learning_rate": 1.8581202235210773e-05, "loss": 1.041, "step": 4175 }, { "epoch": 0.5896639367410336, "grad_norm": 3.414815003774926, "learning_rate": 1.858041940694325e-05, "loss": 1.025, "step": 4176 }, { "epoch": 0.5898051397910195, "grad_norm": 4.1443396562225, "learning_rate": 1.857963637927048e-05, "loss": 1.2908, "step": 4177 }, { "epoch": 0.5899463428410053, "grad_norm": 3.7999804497848286, "learning_rate": 1.8578853152210666e-05, "loss": 1.1903, "step": 4178 }, { "epoch": 0.5900875458909912, "grad_norm": 3.363521081523304, "learning_rate": 1.8578069725782013e-05, "loss": 0.8813, "step": 4179 }, { "epoch": 0.5902287489409771, "grad_norm": 3.7934736403695335, "learning_rate": 1.8577286100002723e-05, "loss": 1.2552, "step": 4180 }, { "epoch": 0.590369951990963, "grad_norm": 3.4828122939862243, "learning_rate": 1.8576502274891007e-05, "loss": 1.0573, "step": 4181 }, { "epoch": 0.5905111550409489, "grad_norm": 4.175525563806178, "learning_rate": 1.8575718250465083e-05, "loss": 1.2271, "step": 4182 }, { "epoch": 0.5906523580909347, "grad_norm": 3.5786096602089206, "learning_rate": 1.8574934026743168e-05, "loss": 0.8729, "step": 4183 }, { "epoch": 0.5907935611409206, "grad_norm": 4.089116699523889, "learning_rate": 1.8574149603743495e-05, "loss": 1.176, "step": 4184 }, { "epoch": 0.5909347641909065, "grad_norm": 3.8663664284317125, "learning_rate": 1.8573364981484285e-05, "loss": 1.0262, "step": 4185 }, { "epoch": 0.5910759672408924, "grad_norm": 3.741639408603783, "learning_rate": 1.8572580159983772e-05, "loss": 1.1781, "step": 4186 }, { "epoch": 0.5912171702908783, "grad_norm": 4.074612844350278, "learning_rate": 1.8571795139260206e-05, "loss": 1.2962, "step": 4187 }, { "epoch": 0.5913583733408642, "grad_norm": 3.385148177459225, "learning_rate": 1.8571009919331817e-05, "loss": 1.1209, "step": 4188 }, { "epoch": 0.59149957639085, "grad_norm": 3.699640781667295, "learning_rate": 1.857022450021686e-05, "loss": 1.1956, "step": 4189 }, { "epoch": 0.5916407794408359, "grad_norm": 3.4254288626768834, "learning_rate": 1.8569438881933587e-05, "loss": 0.8202, "step": 4190 }, { "epoch": 0.5917819824908218, "grad_norm": 3.1517625023060694, "learning_rate": 1.8568653064500257e-05, "loss": 0.8292, "step": 4191 }, { "epoch": 0.5919231855408077, "grad_norm": 3.5690309824726576, "learning_rate": 1.8567867047935128e-05, "loss": 0.978, "step": 4192 }, { "epoch": 0.5920643885907936, "grad_norm": 4.598491553393195, "learning_rate": 1.8567080832256467e-05, "loss": 1.1733, "step": 4193 }, { "epoch": 0.5922055916407795, "grad_norm": 3.5887122414354784, "learning_rate": 1.8566294417482552e-05, "loss": 1.0617, "step": 4194 }, { "epoch": 0.5923467946907653, "grad_norm": 3.419623106137663, "learning_rate": 1.8565507803631653e-05, "loss": 0.9393, "step": 4195 }, { "epoch": 0.5924879977407512, "grad_norm": 3.8740404633552097, "learning_rate": 1.8564720990722048e-05, "loss": 0.9876, "step": 4196 }, { "epoch": 0.5926292007907371, "grad_norm": 3.5426007129037513, "learning_rate": 1.856393397877203e-05, "loss": 1.1348, "step": 4197 }, { "epoch": 0.592770403840723, "grad_norm": 3.807543033676919, "learning_rate": 1.8563146767799884e-05, "loss": 0.9695, "step": 4198 }, { "epoch": 0.5929116068907089, "grad_norm": 4.016723350460209, "learning_rate": 1.8562359357823907e-05, "loss": 0.9035, "step": 4199 }, { "epoch": 0.5930528099406948, "grad_norm": 3.562927018738239, "learning_rate": 1.8561571748862394e-05, "loss": 1.1548, "step": 4200 }, { "epoch": 0.5931940129906806, "grad_norm": 3.3446241065301168, "learning_rate": 1.8560783940933655e-05, "loss": 0.8988, "step": 4201 }, { "epoch": 0.5933352160406665, "grad_norm": 3.8292064786297253, "learning_rate": 1.855999593405599e-05, "loss": 1.1284, "step": 4202 }, { "epoch": 0.5934764190906524, "grad_norm": 3.561859513307444, "learning_rate": 1.8559207728247716e-05, "loss": 0.9537, "step": 4203 }, { "epoch": 0.5936176221406383, "grad_norm": 3.518081777293403, "learning_rate": 1.8558419323527152e-05, "loss": 1.1501, "step": 4204 }, { "epoch": 0.5937588251906241, "grad_norm": 3.501337791651096, "learning_rate": 1.855763071991262e-05, "loss": 0.8717, "step": 4205 }, { "epoch": 0.59390002824061, "grad_norm": 4.265705061661282, "learning_rate": 1.8556841917422443e-05, "loss": 1.2586, "step": 4206 }, { "epoch": 0.5940412312905958, "grad_norm": 3.9160703937076953, "learning_rate": 1.8556052916074957e-05, "loss": 1.0242, "step": 4207 }, { "epoch": 0.5941824343405817, "grad_norm": 4.049207023650699, "learning_rate": 1.8555263715888493e-05, "loss": 1.1262, "step": 4208 }, { "epoch": 0.5943236373905676, "grad_norm": 3.1461353978224307, "learning_rate": 1.85544743168814e-05, "loss": 0.8149, "step": 4209 }, { "epoch": 0.5944648404405535, "grad_norm": 4.392253975893292, "learning_rate": 1.8553684719072017e-05, "loss": 1.2961, "step": 4210 }, { "epoch": 0.5946060434905394, "grad_norm": 4.2499285472583495, "learning_rate": 1.8552894922478697e-05, "loss": 0.9916, "step": 4211 }, { "epoch": 0.5947472465405252, "grad_norm": 4.094266899473215, "learning_rate": 1.855210492711979e-05, "loss": 1.0899, "step": 4212 }, { "epoch": 0.5948884495905111, "grad_norm": 3.8097712050021024, "learning_rate": 1.8551314733013657e-05, "loss": 1.096, "step": 4213 }, { "epoch": 0.595029652640497, "grad_norm": 3.7426982491451244, "learning_rate": 1.8550524340178664e-05, "loss": 1.0881, "step": 4214 }, { "epoch": 0.5951708556904829, "grad_norm": 3.7159524848947045, "learning_rate": 1.8549733748633177e-05, "loss": 1.1563, "step": 4215 }, { "epoch": 0.5953120587404688, "grad_norm": 3.628120590810224, "learning_rate": 1.8548942958395573e-05, "loss": 0.9457, "step": 4216 }, { "epoch": 0.5954532617904547, "grad_norm": 3.9646476483645956, "learning_rate": 1.8548151969484224e-05, "loss": 1.0168, "step": 4217 }, { "epoch": 0.5955944648404405, "grad_norm": 3.976566449011886, "learning_rate": 1.8547360781917515e-05, "loss": 1.2408, "step": 4218 }, { "epoch": 0.5957356678904264, "grad_norm": 3.558609644510249, "learning_rate": 1.8546569395713835e-05, "loss": 0.9833, "step": 4219 }, { "epoch": 0.5958768709404123, "grad_norm": 3.395662811036489, "learning_rate": 1.854577781089157e-05, "loss": 1.0397, "step": 4220 }, { "epoch": 0.5960180739903982, "grad_norm": 3.6616115074976676, "learning_rate": 1.854498602746912e-05, "loss": 0.9314, "step": 4221 }, { "epoch": 0.5961592770403841, "grad_norm": 3.769337826716327, "learning_rate": 1.8544194045464888e-05, "loss": 1.1068, "step": 4222 }, { "epoch": 0.59630048009037, "grad_norm": 3.9955321243283195, "learning_rate": 1.8543401864897274e-05, "loss": 1.0406, "step": 4223 }, { "epoch": 0.5964416831403558, "grad_norm": 4.216055356561233, "learning_rate": 1.854260948578469e-05, "loss": 1.275, "step": 4224 }, { "epoch": 0.5965828861903417, "grad_norm": 4.231304418914608, "learning_rate": 1.854181690814555e-05, "loss": 1.182, "step": 4225 }, { "epoch": 0.5967240892403276, "grad_norm": 3.7619344944707747, "learning_rate": 1.8541024131998277e-05, "loss": 1.1379, "step": 4226 }, { "epoch": 0.5968652922903135, "grad_norm": 3.5626609559296023, "learning_rate": 1.854023115736129e-05, "loss": 1.0559, "step": 4227 }, { "epoch": 0.5970064953402994, "grad_norm": 3.2219965946883273, "learning_rate": 1.853943798425302e-05, "loss": 0.8866, "step": 4228 }, { "epoch": 0.5971476983902853, "grad_norm": 3.996811555249157, "learning_rate": 1.85386446126919e-05, "loss": 1.1395, "step": 4229 }, { "epoch": 0.5972889014402711, "grad_norm": 3.5233814852189247, "learning_rate": 1.8537851042696363e-05, "loss": 1.0329, "step": 4230 }, { "epoch": 0.597430104490257, "grad_norm": 3.443132049818685, "learning_rate": 1.853705727428486e-05, "loss": 1.2396, "step": 4231 }, { "epoch": 0.5975713075402429, "grad_norm": 3.5988634046430628, "learning_rate": 1.8536263307475826e-05, "loss": 1.088, "step": 4232 }, { "epoch": 0.5977125105902288, "grad_norm": 3.765297979851038, "learning_rate": 1.8535469142287723e-05, "loss": 0.9556, "step": 4233 }, { "epoch": 0.5978537136402147, "grad_norm": 3.3080528605880883, "learning_rate": 1.8534674778739004e-05, "loss": 0.9631, "step": 4234 }, { "epoch": 0.5979949166902006, "grad_norm": 4.460455095885312, "learning_rate": 1.8533880216848126e-05, "loss": 1.3145, "step": 4235 }, { "epoch": 0.5981361197401864, "grad_norm": 3.9652244607295044, "learning_rate": 1.853308545663356e-05, "loss": 1.2007, "step": 4236 }, { "epoch": 0.5982773227901723, "grad_norm": 4.05360503758188, "learning_rate": 1.8532290498113776e-05, "loss": 1.1863, "step": 4237 }, { "epoch": 0.5984185258401582, "grad_norm": 4.4161962522304945, "learning_rate": 1.853149534130724e-05, "loss": 1.271, "step": 4238 }, { "epoch": 0.598559728890144, "grad_norm": 3.683654874400558, "learning_rate": 1.853069998623244e-05, "loss": 1.1423, "step": 4239 }, { "epoch": 0.5987009319401299, "grad_norm": 4.193599015810658, "learning_rate": 1.8529904432907858e-05, "loss": 1.0244, "step": 4240 }, { "epoch": 0.5988421349901157, "grad_norm": 3.6488400158111562, "learning_rate": 1.852910868135198e-05, "loss": 1.1112, "step": 4241 }, { "epoch": 0.5989833380401016, "grad_norm": 3.5124736225468487, "learning_rate": 1.8528312731583295e-05, "loss": 0.838, "step": 4242 }, { "epoch": 0.5991245410900875, "grad_norm": 3.5154409822658867, "learning_rate": 1.852751658362031e-05, "loss": 0.8923, "step": 4243 }, { "epoch": 0.5992657441400734, "grad_norm": 3.6414373337053596, "learning_rate": 1.8526720237481518e-05, "loss": 0.8413, "step": 4244 }, { "epoch": 0.5994069471900593, "grad_norm": 3.7847290395824844, "learning_rate": 1.8525923693185436e-05, "loss": 1.1494, "step": 4245 }, { "epoch": 0.5995481502400452, "grad_norm": 3.190683007903166, "learning_rate": 1.8525126950750564e-05, "loss": 0.8344, "step": 4246 }, { "epoch": 0.599689353290031, "grad_norm": 3.556293311070704, "learning_rate": 1.8524330010195427e-05, "loss": 1.1617, "step": 4247 }, { "epoch": 0.5998305563400169, "grad_norm": 3.177177563934956, "learning_rate": 1.852353287153854e-05, "loss": 0.8884, "step": 4248 }, { "epoch": 0.5999717593900028, "grad_norm": 4.4153313057268555, "learning_rate": 1.852273553479843e-05, "loss": 1.1623, "step": 4249 }, { "epoch": 0.6001129624399887, "grad_norm": 5.493923319127232, "learning_rate": 1.8521937999993627e-05, "loss": 1.2422, "step": 4250 }, { "epoch": 0.6002541654899746, "grad_norm": 3.7266683297269165, "learning_rate": 1.852114026714267e-05, "loss": 1.1787, "step": 4251 }, { "epoch": 0.6003953685399605, "grad_norm": 4.218267163153965, "learning_rate": 1.8520342336264082e-05, "loss": 1.1242, "step": 4252 }, { "epoch": 0.6005365715899463, "grad_norm": 3.6243595320272783, "learning_rate": 1.851954420737643e-05, "loss": 1.0289, "step": 4253 }, { "epoch": 0.6006777746399322, "grad_norm": 4.289339920288096, "learning_rate": 1.8518745880498242e-05, "loss": 1.1592, "step": 4254 }, { "epoch": 0.6008189776899181, "grad_norm": 3.7300587142374866, "learning_rate": 1.8517947355648084e-05, "loss": 0.9113, "step": 4255 }, { "epoch": 0.600960180739904, "grad_norm": 3.8710212539386974, "learning_rate": 1.8517148632844502e-05, "loss": 1.0612, "step": 4256 }, { "epoch": 0.6011013837898899, "grad_norm": 3.3473262963679593, "learning_rate": 1.851634971210607e-05, "loss": 1.0003, "step": 4257 }, { "epoch": 0.6012425868398757, "grad_norm": 3.5941393630773066, "learning_rate": 1.8515550593451347e-05, "loss": 0.9572, "step": 4258 }, { "epoch": 0.6013837898898616, "grad_norm": 3.7935664549859673, "learning_rate": 1.8514751276898903e-05, "loss": 1.0241, "step": 4259 }, { "epoch": 0.6015249929398475, "grad_norm": 4.366609812852916, "learning_rate": 1.8513951762467318e-05, "loss": 0.9878, "step": 4260 }, { "epoch": 0.6016661959898334, "grad_norm": 3.5354635410452815, "learning_rate": 1.851315205017517e-05, "loss": 1.0355, "step": 4261 }, { "epoch": 0.6018073990398193, "grad_norm": 3.200996948803274, "learning_rate": 1.851235214004105e-05, "loss": 1.1244, "step": 4262 }, { "epoch": 0.6019486020898052, "grad_norm": 2.9081502393929854, "learning_rate": 1.8511552032083534e-05, "loss": 0.8222, "step": 4263 }, { "epoch": 0.602089805139791, "grad_norm": 3.4199189479816603, "learning_rate": 1.8510751726321233e-05, "loss": 1.1355, "step": 4264 }, { "epoch": 0.6022310081897769, "grad_norm": 4.767391961866176, "learning_rate": 1.850995122277273e-05, "loss": 1.2989, "step": 4265 }, { "epoch": 0.6023722112397628, "grad_norm": 3.929014871784442, "learning_rate": 1.850915052145664e-05, "loss": 1.1773, "step": 4266 }, { "epoch": 0.6025134142897487, "grad_norm": 3.6256071754915244, "learning_rate": 1.8508349622391567e-05, "loss": 1.0806, "step": 4267 }, { "epoch": 0.6026546173397346, "grad_norm": 4.151512387625766, "learning_rate": 1.850754852559612e-05, "loss": 1.298, "step": 4268 }, { "epoch": 0.6027958203897205, "grad_norm": 3.9615881296394737, "learning_rate": 1.8506747231088927e-05, "loss": 1.0244, "step": 4269 }, { "epoch": 0.6029370234397063, "grad_norm": 3.095863356698826, "learning_rate": 1.8505945738888593e-05, "loss": 0.9552, "step": 4270 }, { "epoch": 0.6030782264896922, "grad_norm": 4.167119209725287, "learning_rate": 1.850514404901376e-05, "loss": 1.2741, "step": 4271 }, { "epoch": 0.6032194295396781, "grad_norm": 3.845709141474949, "learning_rate": 1.850434216148305e-05, "loss": 0.9657, "step": 4272 }, { "epoch": 0.603360632589664, "grad_norm": 3.233359135675125, "learning_rate": 1.85035400763151e-05, "loss": 0.8752, "step": 4273 }, { "epoch": 0.6035018356396498, "grad_norm": 3.677522602664705, "learning_rate": 1.8502737793528552e-05, "loss": 1.1389, "step": 4274 }, { "epoch": 0.6036430386896356, "grad_norm": 2.8577663112988447, "learning_rate": 1.850193531314205e-05, "loss": 0.9664, "step": 4275 }, { "epoch": 0.6037842417396215, "grad_norm": 3.8460644676733753, "learning_rate": 1.850113263517424e-05, "loss": 1.2241, "step": 4276 }, { "epoch": 0.6039254447896074, "grad_norm": 3.779842225333164, "learning_rate": 1.8500329759643785e-05, "loss": 1.0771, "step": 4277 }, { "epoch": 0.6040666478395933, "grad_norm": 3.2686051674757364, "learning_rate": 1.849952668656933e-05, "loss": 0.8744, "step": 4278 }, { "epoch": 0.6042078508895792, "grad_norm": 3.7801485055639312, "learning_rate": 1.849872341596955e-05, "loss": 1.1913, "step": 4279 }, { "epoch": 0.6043490539395651, "grad_norm": 3.2090358461520716, "learning_rate": 1.849791994786311e-05, "loss": 0.9344, "step": 4280 }, { "epoch": 0.604490256989551, "grad_norm": 4.241832249359513, "learning_rate": 1.8497116282268678e-05, "loss": 1.1513, "step": 4281 }, { "epoch": 0.6046314600395368, "grad_norm": 4.255957350041253, "learning_rate": 1.849631241920493e-05, "loss": 1.441, "step": 4282 }, { "epoch": 0.6047726630895227, "grad_norm": 4.289389383411926, "learning_rate": 1.8495508358690553e-05, "loss": 1.2956, "step": 4283 }, { "epoch": 0.6049138661395086, "grad_norm": 4.742569549176212, "learning_rate": 1.849470410074423e-05, "loss": 1.1518, "step": 4284 }, { "epoch": 0.6050550691894945, "grad_norm": 4.05783168612351, "learning_rate": 1.8493899645384655e-05, "loss": 1.0151, "step": 4285 }, { "epoch": 0.6051962722394804, "grad_norm": 3.7282836227336755, "learning_rate": 1.849309499263052e-05, "loss": 0.8935, "step": 4286 }, { "epoch": 0.6053374752894662, "grad_norm": 3.9518335745039996, "learning_rate": 1.8492290142500525e-05, "loss": 1.4142, "step": 4287 }, { "epoch": 0.6054786783394521, "grad_norm": 4.660699773767614, "learning_rate": 1.8491485095013373e-05, "loss": 1.3165, "step": 4288 }, { "epoch": 0.605619881389438, "grad_norm": 3.6905204640720464, "learning_rate": 1.8490679850187777e-05, "loss": 0.8193, "step": 4289 }, { "epoch": 0.6057610844394239, "grad_norm": 3.3390000469663157, "learning_rate": 1.8489874408042445e-05, "loss": 0.9842, "step": 4290 }, { "epoch": 0.6059022874894098, "grad_norm": 4.271465888664024, "learning_rate": 1.8489068768596102e-05, "loss": 1.2088, "step": 4291 }, { "epoch": 0.6060434905393957, "grad_norm": 3.9510854025319784, "learning_rate": 1.8488262931867464e-05, "loss": 0.9423, "step": 4292 }, { "epoch": 0.6061846935893815, "grad_norm": 3.5102377307477304, "learning_rate": 1.848745689787526e-05, "loss": 0.9111, "step": 4293 }, { "epoch": 0.6063258966393674, "grad_norm": 3.828347108219323, "learning_rate": 1.848665066663823e-05, "loss": 1.1261, "step": 4294 }, { "epoch": 0.6064670996893533, "grad_norm": 3.1182586821432716, "learning_rate": 1.8485844238175096e-05, "loss": 0.8194, "step": 4295 }, { "epoch": 0.6066083027393392, "grad_norm": 3.268083492131184, "learning_rate": 1.848503761250461e-05, "loss": 0.9082, "step": 4296 }, { "epoch": 0.6067495057893251, "grad_norm": 3.805365360832415, "learning_rate": 1.8484230789645513e-05, "loss": 0.979, "step": 4297 }, { "epoch": 0.606890708839311, "grad_norm": 3.704003677401258, "learning_rate": 1.848342376961656e-05, "loss": 0.9663, "step": 4298 }, { "epoch": 0.6070319118892968, "grad_norm": 3.5517112409648597, "learning_rate": 1.8482616552436496e-05, "loss": 1.0243, "step": 4299 }, { "epoch": 0.6071731149392827, "grad_norm": 3.2416394725908866, "learning_rate": 1.8481809138124092e-05, "loss": 0.9677, "step": 4300 }, { "epoch": 0.6073143179892686, "grad_norm": 3.46163890062641, "learning_rate": 1.8481001526698105e-05, "loss": 1.0115, "step": 4301 }, { "epoch": 0.6074555210392545, "grad_norm": 4.221948263292534, "learning_rate": 1.8480193718177305e-05, "loss": 1.2259, "step": 4302 }, { "epoch": 0.6075967240892404, "grad_norm": 3.8160697871476645, "learning_rate": 1.8479385712580464e-05, "loss": 0.8947, "step": 4303 }, { "epoch": 0.6077379271392263, "grad_norm": 3.520242089070327, "learning_rate": 1.8478577509926367e-05, "loss": 0.8917, "step": 4304 }, { "epoch": 0.6078791301892121, "grad_norm": 2.960125950329372, "learning_rate": 1.8477769110233782e-05, "loss": 0.7706, "step": 4305 }, { "epoch": 0.608020333239198, "grad_norm": 3.948698253197586, "learning_rate": 1.847696051352151e-05, "loss": 1.1429, "step": 4306 }, { "epoch": 0.6081615362891839, "grad_norm": 4.900719500156943, "learning_rate": 1.8476151719808335e-05, "loss": 0.8102, "step": 4307 }, { "epoch": 0.6083027393391697, "grad_norm": 3.857864701407322, "learning_rate": 1.8475342729113053e-05, "loss": 1.0524, "step": 4308 }, { "epoch": 0.6084439423891556, "grad_norm": 3.642662758904823, "learning_rate": 1.8474533541454466e-05, "loss": 1.2191, "step": 4309 }, { "epoch": 0.6085851454391414, "grad_norm": 3.5845742752865517, "learning_rate": 1.8473724156851384e-05, "loss": 0.9252, "step": 4310 }, { "epoch": 0.6087263484891273, "grad_norm": 4.05289204848755, "learning_rate": 1.847291457532261e-05, "loss": 1.1514, "step": 4311 }, { "epoch": 0.6088675515391132, "grad_norm": 3.6204961252813543, "learning_rate": 1.8472104796886957e-05, "loss": 1.1462, "step": 4312 }, { "epoch": 0.6090087545890991, "grad_norm": 3.6751279773185295, "learning_rate": 1.8471294821563247e-05, "loss": 0.8422, "step": 4313 }, { "epoch": 0.609149957639085, "grad_norm": 3.919763420356692, "learning_rate": 1.847048464937031e-05, "loss": 1.3057, "step": 4314 }, { "epoch": 0.6092911606890709, "grad_norm": 3.3365861210899306, "learning_rate": 1.8469674280326963e-05, "loss": 0.943, "step": 4315 }, { "epoch": 0.6094323637390567, "grad_norm": 4.002395484372098, "learning_rate": 1.8468863714452044e-05, "loss": 0.7989, "step": 4316 }, { "epoch": 0.6095735667890426, "grad_norm": 3.3102026849017263, "learning_rate": 1.846805295176439e-05, "loss": 0.9228, "step": 4317 }, { "epoch": 0.6097147698390285, "grad_norm": 4.094722011425383, "learning_rate": 1.8467241992282842e-05, "loss": 1.1037, "step": 4318 }, { "epoch": 0.6098559728890144, "grad_norm": 3.5560157936762504, "learning_rate": 1.8466430836026247e-05, "loss": 0.9872, "step": 4319 }, { "epoch": 0.6099971759390003, "grad_norm": 3.4272592639305084, "learning_rate": 1.846561948301346e-05, "loss": 0.9431, "step": 4320 }, { "epoch": 0.6101383789889862, "grad_norm": 3.213289111056129, "learning_rate": 1.846480793326333e-05, "loss": 0.9638, "step": 4321 }, { "epoch": 0.610279582038972, "grad_norm": 3.418920951031553, "learning_rate": 1.8463996186794716e-05, "loss": 0.9983, "step": 4322 }, { "epoch": 0.6104207850889579, "grad_norm": 3.432638954570329, "learning_rate": 1.846318424362649e-05, "loss": 1.0573, "step": 4323 }, { "epoch": 0.6105619881389438, "grad_norm": 4.473833099919569, "learning_rate": 1.8462372103777512e-05, "loss": 1.1035, "step": 4324 }, { "epoch": 0.6107031911889297, "grad_norm": 4.274874915998192, "learning_rate": 1.8461559767266663e-05, "loss": 1.1037, "step": 4325 }, { "epoch": 0.6108443942389156, "grad_norm": 3.7647601499030636, "learning_rate": 1.846074723411282e-05, "loss": 1.1401, "step": 4326 }, { "epoch": 0.6109855972889014, "grad_norm": 3.6657051160591974, "learning_rate": 1.8459934504334868e-05, "loss": 1.1786, "step": 4327 }, { "epoch": 0.6111268003388873, "grad_norm": 3.4727467595433477, "learning_rate": 1.845912157795169e-05, "loss": 0.971, "step": 4328 }, { "epoch": 0.6112680033888732, "grad_norm": 3.2968915284355638, "learning_rate": 1.8458308454982177e-05, "loss": 0.8461, "step": 4329 }, { "epoch": 0.6114092064388591, "grad_norm": 2.9382177570735037, "learning_rate": 1.845749513544523e-05, "loss": 0.845, "step": 4330 }, { "epoch": 0.611550409488845, "grad_norm": 3.8970802737992822, "learning_rate": 1.8456681619359748e-05, "loss": 1.0907, "step": 4331 }, { "epoch": 0.6116916125388309, "grad_norm": 3.3898756107880956, "learning_rate": 1.845586790674464e-05, "loss": 1.1002, "step": 4332 }, { "epoch": 0.6118328155888167, "grad_norm": 3.950958939267451, "learning_rate": 1.8455053997618814e-05, "loss": 0.97, "step": 4333 }, { "epoch": 0.6119740186388026, "grad_norm": 3.6938178612248684, "learning_rate": 1.845423989200118e-05, "loss": 1.1848, "step": 4334 }, { "epoch": 0.6121152216887885, "grad_norm": 3.97209680519993, "learning_rate": 1.8453425589910665e-05, "loss": 1.0352, "step": 4335 }, { "epoch": 0.6122564247387744, "grad_norm": 5.18941579057397, "learning_rate": 1.8452611091366188e-05, "loss": 1.2571, "step": 4336 }, { "epoch": 0.6123976277887603, "grad_norm": 3.1217568876186594, "learning_rate": 1.8451796396386683e-05, "loss": 0.8443, "step": 4337 }, { "epoch": 0.6125388308387462, "grad_norm": 3.7954005089073015, "learning_rate": 1.8450981504991077e-05, "loss": 0.9554, "step": 4338 }, { "epoch": 0.612680033888732, "grad_norm": 3.6412879588767524, "learning_rate": 1.845016641719831e-05, "loss": 1.0309, "step": 4339 }, { "epoch": 0.6128212369387179, "grad_norm": 3.7442795923431413, "learning_rate": 1.8449351133027327e-05, "loss": 1.1098, "step": 4340 }, { "epoch": 0.6129624399887038, "grad_norm": 3.5945135423653176, "learning_rate": 1.8448535652497073e-05, "loss": 1.0877, "step": 4341 }, { "epoch": 0.6131036430386896, "grad_norm": 3.124375688020515, "learning_rate": 1.8447719975626496e-05, "loss": 0.9594, "step": 4342 }, { "epoch": 0.6132448460886755, "grad_norm": 4.1813427416423545, "learning_rate": 1.8446904102434556e-05, "loss": 1.0325, "step": 4343 }, { "epoch": 0.6133860491386613, "grad_norm": 3.226743169077687, "learning_rate": 1.8446088032940217e-05, "loss": 0.9507, "step": 4344 }, { "epoch": 0.6135272521886472, "grad_norm": 3.2471833522178963, "learning_rate": 1.8445271767162435e-05, "loss": 0.9772, "step": 4345 }, { "epoch": 0.6136684552386331, "grad_norm": 12.002604965145798, "learning_rate": 1.8444455305120187e-05, "loss": 1.1078, "step": 4346 }, { "epoch": 0.613809658288619, "grad_norm": 3.7535403236409275, "learning_rate": 1.8443638646832442e-05, "loss": 0.9741, "step": 4347 }, { "epoch": 0.6139508613386049, "grad_norm": 4.062758731877399, "learning_rate": 1.8442821792318183e-05, "loss": 1.1391, "step": 4348 }, { "epoch": 0.6140920643885908, "grad_norm": 3.230238976145585, "learning_rate": 1.844200474159639e-05, "loss": 0.9042, "step": 4349 }, { "epoch": 0.6142332674385766, "grad_norm": 4.216110181318829, "learning_rate": 1.8441187494686055e-05, "loss": 1.1695, "step": 4350 }, { "epoch": 0.6143744704885625, "grad_norm": 3.203289150902373, "learning_rate": 1.8440370051606165e-05, "loss": 0.8446, "step": 4351 }, { "epoch": 0.6145156735385484, "grad_norm": 4.225409869809968, "learning_rate": 1.8439552412375725e-05, "loss": 0.9955, "step": 4352 }, { "epoch": 0.6146568765885343, "grad_norm": 3.6316339010142236, "learning_rate": 1.8438734577013728e-05, "loss": 0.9645, "step": 4353 }, { "epoch": 0.6147980796385202, "grad_norm": 3.913365408444451, "learning_rate": 1.8437916545539185e-05, "loss": 0.9768, "step": 4354 }, { "epoch": 0.6149392826885061, "grad_norm": 4.2515260010201965, "learning_rate": 1.8437098317971104e-05, "loss": 1.0734, "step": 4355 }, { "epoch": 0.6150804857384919, "grad_norm": 4.041407631860882, "learning_rate": 1.8436279894328507e-05, "loss": 1.1296, "step": 4356 }, { "epoch": 0.6152216887884778, "grad_norm": 4.083777152102148, "learning_rate": 1.8435461274630402e-05, "loss": 1.0844, "step": 4357 }, { "epoch": 0.6153628918384637, "grad_norm": 4.561251812429453, "learning_rate": 1.8434642458895823e-05, "loss": 1.2106, "step": 4358 }, { "epoch": 0.6155040948884496, "grad_norm": 3.2818716878503937, "learning_rate": 1.8433823447143796e-05, "loss": 0.9058, "step": 4359 }, { "epoch": 0.6156452979384355, "grad_norm": 3.5011028544096723, "learning_rate": 1.8433004239393353e-05, "loss": 1.02, "step": 4360 }, { "epoch": 0.6157865009884214, "grad_norm": 3.6274211625064607, "learning_rate": 1.8432184835663535e-05, "loss": 1.0762, "step": 4361 }, { "epoch": 0.6159277040384072, "grad_norm": 3.7649414902412053, "learning_rate": 1.8431365235973383e-05, "loss": 1.1366, "step": 4362 }, { "epoch": 0.6160689070883931, "grad_norm": 4.352915838656869, "learning_rate": 1.8430545440341942e-05, "loss": 1.1059, "step": 4363 }, { "epoch": 0.616210110138379, "grad_norm": 3.9600291454201906, "learning_rate": 1.8429725448788267e-05, "loss": 1.0093, "step": 4364 }, { "epoch": 0.6163513131883649, "grad_norm": 3.7443006441089977, "learning_rate": 1.8428905261331412e-05, "loss": 0.8644, "step": 4365 }, { "epoch": 0.6164925162383508, "grad_norm": 4.144582192396024, "learning_rate": 1.8428084877990443e-05, "loss": 0.9091, "step": 4366 }, { "epoch": 0.6166337192883367, "grad_norm": 3.4799150978699314, "learning_rate": 1.8427264298784418e-05, "loss": 0.9291, "step": 4367 }, { "epoch": 0.6167749223383225, "grad_norm": 3.6674010679912827, "learning_rate": 1.8426443523732412e-05, "loss": 1.0794, "step": 4368 }, { "epoch": 0.6169161253883084, "grad_norm": 4.548581679043314, "learning_rate": 1.8425622552853494e-05, "loss": 1.0694, "step": 4369 }, { "epoch": 0.6170573284382943, "grad_norm": 4.033874777887552, "learning_rate": 1.8424801386166752e-05, "loss": 0.9868, "step": 4370 }, { "epoch": 0.6171985314882802, "grad_norm": 4.402225524661612, "learning_rate": 1.8423980023691258e-05, "loss": 1.1061, "step": 4371 }, { "epoch": 0.6173397345382661, "grad_norm": 4.600419846281784, "learning_rate": 1.842315846544611e-05, "loss": 1.3261, "step": 4372 }, { "epoch": 0.617480937588252, "grad_norm": 3.6130993731055905, "learning_rate": 1.8422336711450396e-05, "loss": 0.8885, "step": 4373 }, { "epoch": 0.6176221406382378, "grad_norm": 3.762301154754903, "learning_rate": 1.8421514761723216e-05, "loss": 0.9497, "step": 4374 }, { "epoch": 0.6177633436882237, "grad_norm": 4.411073240054198, "learning_rate": 1.842069261628367e-05, "loss": 1.2685, "step": 4375 }, { "epoch": 0.6179045467382095, "grad_norm": 5.0687021611726975, "learning_rate": 1.841987027515086e-05, "loss": 1.3425, "step": 4376 }, { "epoch": 0.6180457497881954, "grad_norm": 3.854728007837796, "learning_rate": 1.8419047738343905e-05, "loss": 1.1232, "step": 4377 }, { "epoch": 0.6181869528381813, "grad_norm": 4.524296759978584, "learning_rate": 1.8418225005881916e-05, "loss": 1.281, "step": 4378 }, { "epoch": 0.6183281558881671, "grad_norm": 3.406193395504181, "learning_rate": 1.841740207778401e-05, "loss": 0.8997, "step": 4379 }, { "epoch": 0.618469358938153, "grad_norm": 4.063073434545334, "learning_rate": 1.8416578954069318e-05, "loss": 1.0026, "step": 4380 }, { "epoch": 0.6186105619881389, "grad_norm": 3.835876532332979, "learning_rate": 1.8415755634756968e-05, "loss": 1.1256, "step": 4381 }, { "epoch": 0.6187517650381248, "grad_norm": 4.256828421338441, "learning_rate": 1.841493211986609e-05, "loss": 0.9758, "step": 4382 }, { "epoch": 0.6188929680881107, "grad_norm": 3.7133289117110286, "learning_rate": 1.841410840941582e-05, "loss": 1.2061, "step": 4383 }, { "epoch": 0.6190341711380966, "grad_norm": 3.38850620404612, "learning_rate": 1.841328450342531e-05, "loss": 0.9146, "step": 4384 }, { "epoch": 0.6191753741880824, "grad_norm": 3.70629461396281, "learning_rate": 1.8412460401913695e-05, "loss": 1.098, "step": 4385 }, { "epoch": 0.6193165772380683, "grad_norm": 3.042261920644096, "learning_rate": 1.8411636104900138e-05, "loss": 0.8615, "step": 4386 }, { "epoch": 0.6194577802880542, "grad_norm": 3.2142485653704163, "learning_rate": 1.841081161240379e-05, "loss": 0.867, "step": 4387 }, { "epoch": 0.6195989833380401, "grad_norm": 3.5280286072296625, "learning_rate": 1.840998692444381e-05, "loss": 0.8558, "step": 4388 }, { "epoch": 0.619740186388026, "grad_norm": 3.582594442166063, "learning_rate": 1.840916204103937e-05, "loss": 0.9091, "step": 4389 }, { "epoch": 0.6198813894380119, "grad_norm": 8.016979710903913, "learning_rate": 1.840833696220963e-05, "loss": 1.015, "step": 4390 }, { "epoch": 0.6200225924879977, "grad_norm": 3.39726898594206, "learning_rate": 1.8407511687973776e-05, "loss": 0.9433, "step": 4391 }, { "epoch": 0.6201637955379836, "grad_norm": 3.345535187457047, "learning_rate": 1.840668621835098e-05, "loss": 1.0486, "step": 4392 }, { "epoch": 0.6203049985879695, "grad_norm": 3.4042547687665112, "learning_rate": 1.8405860553360428e-05, "loss": 0.9434, "step": 4393 }, { "epoch": 0.6204462016379554, "grad_norm": 3.9051811726060186, "learning_rate": 1.84050346930213e-05, "loss": 1.0895, "step": 4394 }, { "epoch": 0.6205874046879413, "grad_norm": 3.6391237022508496, "learning_rate": 1.8404208637352802e-05, "loss": 1.1197, "step": 4395 }, { "epoch": 0.6207286077379272, "grad_norm": 4.229181776013685, "learning_rate": 1.8403382386374124e-05, "loss": 1.11, "step": 4396 }, { "epoch": 0.620869810787913, "grad_norm": 3.250385696788391, "learning_rate": 1.8402555940104466e-05, "loss": 0.9462, "step": 4397 }, { "epoch": 0.6210110138378989, "grad_norm": 4.422670888092435, "learning_rate": 1.840172929856304e-05, "loss": 1.167, "step": 4398 }, { "epoch": 0.6211522168878848, "grad_norm": 3.729328508456831, "learning_rate": 1.840090246176905e-05, "loss": 1.1157, "step": 4399 }, { "epoch": 0.6212934199378707, "grad_norm": 3.5917787914221164, "learning_rate": 1.840007542974172e-05, "loss": 1.0857, "step": 4400 }, { "epoch": 0.6214346229878566, "grad_norm": 3.765418931913234, "learning_rate": 1.839924820250026e-05, "loss": 1.1719, "step": 4401 }, { "epoch": 0.6215758260378424, "grad_norm": 3.5311564364710315, "learning_rate": 1.8398420780063905e-05, "loss": 0.9262, "step": 4402 }, { "epoch": 0.6217170290878283, "grad_norm": 3.371886888159024, "learning_rate": 1.839759316245187e-05, "loss": 0.9837, "step": 4403 }, { "epoch": 0.6218582321378142, "grad_norm": 4.939380592632123, "learning_rate": 1.8396765349683404e-05, "loss": 1.2113, "step": 4404 }, { "epoch": 0.6219994351878001, "grad_norm": 4.262762004402425, "learning_rate": 1.8395937341777732e-05, "loss": 1.2345, "step": 4405 }, { "epoch": 0.622140638237786, "grad_norm": 3.405183986160233, "learning_rate": 1.8395109138754104e-05, "loss": 0.8409, "step": 4406 }, { "epoch": 0.6222818412877719, "grad_norm": 3.842974881606262, "learning_rate": 1.8394280740631765e-05, "loss": 1.1841, "step": 4407 }, { "epoch": 0.6224230443377577, "grad_norm": 3.293031420713474, "learning_rate": 1.8393452147429965e-05, "loss": 0.8353, "step": 4408 }, { "epoch": 0.6225642473877436, "grad_norm": 6.341168204060529, "learning_rate": 1.8392623359167964e-05, "loss": 1.3582, "step": 4409 }, { "epoch": 0.6227054504377294, "grad_norm": 3.4525857501510275, "learning_rate": 1.8391794375865025e-05, "loss": 0.9091, "step": 4410 }, { "epoch": 0.6228466534877153, "grad_norm": 3.3273481256198507, "learning_rate": 1.8390965197540404e-05, "loss": 1.1268, "step": 4411 }, { "epoch": 0.6229878565377012, "grad_norm": 3.906910916070192, "learning_rate": 1.8390135824213376e-05, "loss": 0.9928, "step": 4412 }, { "epoch": 0.623129059587687, "grad_norm": 3.1274694188244045, "learning_rate": 1.8389306255903216e-05, "loss": 0.9818, "step": 4413 }, { "epoch": 0.6232702626376729, "grad_norm": 3.634100659794148, "learning_rate": 1.8388476492629198e-05, "loss": 1.0588, "step": 4414 }, { "epoch": 0.6234114656876588, "grad_norm": 4.412308863264425, "learning_rate": 1.8387646534410612e-05, "loss": 1.1713, "step": 4415 }, { "epoch": 0.6235526687376447, "grad_norm": 3.9035968303001995, "learning_rate": 1.838681638126674e-05, "loss": 0.9356, "step": 4416 }, { "epoch": 0.6236938717876306, "grad_norm": 3.93375096694715, "learning_rate": 1.8385986033216883e-05, "loss": 1.0768, "step": 4417 }, { "epoch": 0.6238350748376165, "grad_norm": 3.050506509267695, "learning_rate": 1.8385155490280327e-05, "loss": 0.7046, "step": 4418 }, { "epoch": 0.6239762778876023, "grad_norm": 4.065606605231039, "learning_rate": 1.8384324752476384e-05, "loss": 1.3575, "step": 4419 }, { "epoch": 0.6241174809375882, "grad_norm": 3.767112275529218, "learning_rate": 1.838349381982435e-05, "loss": 0.9412, "step": 4420 }, { "epoch": 0.6242586839875741, "grad_norm": 3.940423550970751, "learning_rate": 1.838266269234354e-05, "loss": 1.2461, "step": 4421 }, { "epoch": 0.62439988703756, "grad_norm": 3.8664067894759766, "learning_rate": 1.838183137005327e-05, "loss": 1.0906, "step": 4422 }, { "epoch": 0.6245410900875459, "grad_norm": 4.511575420446997, "learning_rate": 1.8380999852972864e-05, "loss": 1.1844, "step": 4423 }, { "epoch": 0.6246822931375318, "grad_norm": 3.773325226169781, "learning_rate": 1.8380168141121635e-05, "loss": 0.9064, "step": 4424 }, { "epoch": 0.6248234961875176, "grad_norm": 6.5215619801270135, "learning_rate": 1.8379336234518917e-05, "loss": 1.1895, "step": 4425 }, { "epoch": 0.6249646992375035, "grad_norm": 3.827810219302066, "learning_rate": 1.8378504133184047e-05, "loss": 0.9361, "step": 4426 }, { "epoch": 0.6251059022874894, "grad_norm": 4.05771314673562, "learning_rate": 1.837767183713636e-05, "loss": 0.9726, "step": 4427 }, { "epoch": 0.6252471053374753, "grad_norm": 3.749656983279884, "learning_rate": 1.8376839346395194e-05, "loss": 1.0087, "step": 4428 }, { "epoch": 0.6253883083874612, "grad_norm": 3.7481365913493954, "learning_rate": 1.83760066609799e-05, "loss": 1.0726, "step": 4429 }, { "epoch": 0.6255295114374471, "grad_norm": 3.621531124764263, "learning_rate": 1.837517378090983e-05, "loss": 1.0061, "step": 4430 }, { "epoch": 0.6256707144874329, "grad_norm": 3.7742884626722653, "learning_rate": 1.8374340706204335e-05, "loss": 1.085, "step": 4431 }, { "epoch": 0.6258119175374188, "grad_norm": 3.2934977661687923, "learning_rate": 1.8373507436882784e-05, "loss": 0.8606, "step": 4432 }, { "epoch": 0.6259531205874047, "grad_norm": 3.6011481022996708, "learning_rate": 1.8372673972964535e-05, "loss": 0.8677, "step": 4433 }, { "epoch": 0.6260943236373906, "grad_norm": 3.5833876856541007, "learning_rate": 1.8371840314468956e-05, "loss": 0.971, "step": 4434 }, { "epoch": 0.6262355266873765, "grad_norm": 3.962042712675888, "learning_rate": 1.8371006461415425e-05, "loss": 1.3831, "step": 4435 }, { "epoch": 0.6263767297373624, "grad_norm": 4.032990100519072, "learning_rate": 1.837017241382332e-05, "loss": 1.1031, "step": 4436 }, { "epoch": 0.6265179327873482, "grad_norm": 3.5215285586942793, "learning_rate": 1.836933817171202e-05, "loss": 0.9561, "step": 4437 }, { "epoch": 0.6266591358373341, "grad_norm": 4.019527275141383, "learning_rate": 1.836850373510092e-05, "loss": 1.1747, "step": 4438 }, { "epoch": 0.62680033888732, "grad_norm": 4.611558473214138, "learning_rate": 1.8367669104009404e-05, "loss": 1.1272, "step": 4439 }, { "epoch": 0.6269415419373059, "grad_norm": 3.8696414448772556, "learning_rate": 1.8366834278456872e-05, "loss": 1.0262, "step": 4440 }, { "epoch": 0.6270827449872918, "grad_norm": 3.6289432295911843, "learning_rate": 1.8365999258462723e-05, "loss": 0.9288, "step": 4441 }, { "epoch": 0.6272239480372777, "grad_norm": 3.5102175085059617, "learning_rate": 1.8365164044046367e-05, "loss": 1.0798, "step": 4442 }, { "epoch": 0.6273651510872635, "grad_norm": 4.158317837204322, "learning_rate": 1.836432863522721e-05, "loss": 1.146, "step": 4443 }, { "epoch": 0.6275063541372493, "grad_norm": 3.2960379221288565, "learning_rate": 1.836349303202467e-05, "loss": 0.8905, "step": 4444 }, { "epoch": 0.6276475571872352, "grad_norm": 4.611075690617447, "learning_rate": 1.8362657234458163e-05, "loss": 1.3082, "step": 4445 }, { "epoch": 0.6277887602372211, "grad_norm": 3.4786485043090507, "learning_rate": 1.836182124254711e-05, "loss": 0.9914, "step": 4446 }, { "epoch": 0.627929963287207, "grad_norm": 3.387774997106242, "learning_rate": 1.836098505631095e-05, "loss": 1.0317, "step": 4447 }, { "epoch": 0.6280711663371928, "grad_norm": 3.3823558115944894, "learning_rate": 1.83601486757691e-05, "loss": 0.889, "step": 4448 }, { "epoch": 0.6282123693871787, "grad_norm": 3.9062011448442835, "learning_rate": 1.8359312100941006e-05, "loss": 1.0739, "step": 4449 }, { "epoch": 0.6283535724371646, "grad_norm": 3.786089113585838, "learning_rate": 1.835847533184611e-05, "loss": 1.154, "step": 4450 }, { "epoch": 0.6284947754871505, "grad_norm": 3.815584600994966, "learning_rate": 1.835763836850386e-05, "loss": 0.8192, "step": 4451 }, { "epoch": 0.6286359785371364, "grad_norm": 3.729234535841036, "learning_rate": 1.83568012109337e-05, "loss": 1.2437, "step": 4452 }, { "epoch": 0.6287771815871223, "grad_norm": 4.992764756871374, "learning_rate": 1.8355963859155094e-05, "loss": 1.3867, "step": 4453 }, { "epoch": 0.6289183846371081, "grad_norm": 3.9709959593868565, "learning_rate": 1.8355126313187496e-05, "loss": 1.0025, "step": 4454 }, { "epoch": 0.629059587687094, "grad_norm": 3.704040045931481, "learning_rate": 1.8354288573050364e-05, "loss": 0.9998, "step": 4455 }, { "epoch": 0.6292007907370799, "grad_norm": 3.2631988806846355, "learning_rate": 1.8353450638763178e-05, "loss": 1.0733, "step": 4456 }, { "epoch": 0.6293419937870658, "grad_norm": 3.5694840123375706, "learning_rate": 1.8352612510345408e-05, "loss": 0.94, "step": 4457 }, { "epoch": 0.6294831968370517, "grad_norm": 3.9160449820073158, "learning_rate": 1.835177418781653e-05, "loss": 1.2328, "step": 4458 }, { "epoch": 0.6296243998870376, "grad_norm": 3.1263720788105855, "learning_rate": 1.8350935671196026e-05, "loss": 0.8892, "step": 4459 }, { "epoch": 0.6297656029370234, "grad_norm": 3.475668549940923, "learning_rate": 1.8350096960503383e-05, "loss": 1.0001, "step": 4460 }, { "epoch": 0.6299068059870093, "grad_norm": 4.243678612462539, "learning_rate": 1.834925805575809e-05, "loss": 1.4809, "step": 4461 }, { "epoch": 0.6300480090369952, "grad_norm": 3.7825593357338567, "learning_rate": 1.834841895697965e-05, "loss": 0.9324, "step": 4462 }, { "epoch": 0.6301892120869811, "grad_norm": 3.8191435834360137, "learning_rate": 1.834757966418756e-05, "loss": 1.0293, "step": 4463 }, { "epoch": 0.630330415136967, "grad_norm": 4.590071906850395, "learning_rate": 1.8346740177401318e-05, "loss": 1.1511, "step": 4464 }, { "epoch": 0.6304716181869529, "grad_norm": 3.542681945452479, "learning_rate": 1.834590049664044e-05, "loss": 0.9322, "step": 4465 }, { "epoch": 0.6306128212369387, "grad_norm": 3.395470354662677, "learning_rate": 1.8345060621924443e-05, "loss": 0.742, "step": 4466 }, { "epoch": 0.6307540242869246, "grad_norm": 3.6866421696617837, "learning_rate": 1.834422055327284e-05, "loss": 1.0092, "step": 4467 }, { "epoch": 0.6308952273369105, "grad_norm": 3.143910194067715, "learning_rate": 1.8343380290705153e-05, "loss": 0.8565, "step": 4468 }, { "epoch": 0.6310364303868964, "grad_norm": 2.874535376548412, "learning_rate": 1.8342539834240913e-05, "loss": 0.7365, "step": 4469 }, { "epoch": 0.6311776334368823, "grad_norm": 3.6555748165155006, "learning_rate": 1.834169918389965e-05, "loss": 1.0443, "step": 4470 }, { "epoch": 0.6313188364868682, "grad_norm": 3.4407911794432766, "learning_rate": 1.8340858339700898e-05, "loss": 0.9479, "step": 4471 }, { "epoch": 0.631460039536854, "grad_norm": 3.2905696274577485, "learning_rate": 1.83400173016642e-05, "loss": 1.0659, "step": 4472 }, { "epoch": 0.6316012425868399, "grad_norm": 2.9714262691637883, "learning_rate": 1.8339176069809105e-05, "loss": 0.8688, "step": 4473 }, { "epoch": 0.6317424456368258, "grad_norm": 3.6993155010881624, "learning_rate": 1.833833464415516e-05, "loss": 1.0227, "step": 4474 }, { "epoch": 0.6318836486868117, "grad_norm": 3.301771523100553, "learning_rate": 1.8337493024721916e-05, "loss": 0.9338, "step": 4475 }, { "epoch": 0.6320248517367976, "grad_norm": 4.342635213912344, "learning_rate": 1.8336651211528934e-05, "loss": 1.0832, "step": 4476 }, { "epoch": 0.6321660547867834, "grad_norm": 3.217873586231962, "learning_rate": 1.8335809204595783e-05, "loss": 0.9106, "step": 4477 }, { "epoch": 0.6323072578367692, "grad_norm": 4.306907120449864, "learning_rate": 1.833496700394202e-05, "loss": 1.0176, "step": 4478 }, { "epoch": 0.6324484608867551, "grad_norm": 3.2652759061837955, "learning_rate": 1.833412460958723e-05, "loss": 0.9687, "step": 4479 }, { "epoch": 0.632589663936741, "grad_norm": 4.007952258897854, "learning_rate": 1.833328202155098e-05, "loss": 0.9891, "step": 4480 }, { "epoch": 0.6327308669867269, "grad_norm": 4.5062408290577896, "learning_rate": 1.8332439239852853e-05, "loss": 1.191, "step": 4481 }, { "epoch": 0.6328720700367128, "grad_norm": 4.087579168531247, "learning_rate": 1.833159626451244e-05, "loss": 1.1485, "step": 4482 }, { "epoch": 0.6330132730866986, "grad_norm": 3.2725089384653963, "learning_rate": 1.8330753095549327e-05, "loss": 0.9366, "step": 4483 }, { "epoch": 0.6331544761366845, "grad_norm": 3.5713121535573147, "learning_rate": 1.8329909732983105e-05, "loss": 0.8435, "step": 4484 }, { "epoch": 0.6332956791866704, "grad_norm": 3.475280902638243, "learning_rate": 1.8329066176833382e-05, "loss": 1.0355, "step": 4485 }, { "epoch": 0.6334368822366563, "grad_norm": 3.3673255309496564, "learning_rate": 1.832822242711976e-05, "loss": 0.8821, "step": 4486 }, { "epoch": 0.6335780852866422, "grad_norm": 2.879470937192115, "learning_rate": 1.8327378483861845e-05, "loss": 0.7917, "step": 4487 }, { "epoch": 0.633719288336628, "grad_norm": 3.5712047907540923, "learning_rate": 1.832653434707925e-05, "loss": 1.0643, "step": 4488 }, { "epoch": 0.6338604913866139, "grad_norm": 4.021387237877244, "learning_rate": 1.832569001679159e-05, "loss": 1.1849, "step": 4489 }, { "epoch": 0.6340016944365998, "grad_norm": 4.1168449114814205, "learning_rate": 1.832484549301849e-05, "loss": 1.0742, "step": 4490 }, { "epoch": 0.6341428974865857, "grad_norm": 4.254266241993345, "learning_rate": 1.8324000775779576e-05, "loss": 1.4046, "step": 4491 }, { "epoch": 0.6342841005365716, "grad_norm": 3.5818406374412124, "learning_rate": 1.8323155865094483e-05, "loss": 0.9332, "step": 4492 }, { "epoch": 0.6344253035865575, "grad_norm": 3.390283448361259, "learning_rate": 1.832231076098284e-05, "loss": 0.8011, "step": 4493 }, { "epoch": 0.6345665066365433, "grad_norm": 3.539876510569891, "learning_rate": 1.8321465463464287e-05, "loss": 0.9926, "step": 4494 }, { "epoch": 0.6347077096865292, "grad_norm": 3.9181161661553783, "learning_rate": 1.8320619972558474e-05, "loss": 1.0555, "step": 4495 }, { "epoch": 0.6348489127365151, "grad_norm": 3.81105112581345, "learning_rate": 1.831977428828504e-05, "loss": 1.0853, "step": 4496 }, { "epoch": 0.634990115786501, "grad_norm": 3.0529870825959327, "learning_rate": 1.831892841066365e-05, "loss": 1.0287, "step": 4497 }, { "epoch": 0.6351313188364869, "grad_norm": 4.162422965998469, "learning_rate": 1.8318082339713955e-05, "loss": 1.0866, "step": 4498 }, { "epoch": 0.6352725218864728, "grad_norm": 3.84856250614326, "learning_rate": 1.831723607545562e-05, "loss": 1.1286, "step": 4499 }, { "epoch": 0.6354137249364586, "grad_norm": 3.3861787769267546, "learning_rate": 1.831638961790831e-05, "loss": 0.9862, "step": 4500 }, { "epoch": 0.6355549279864445, "grad_norm": 4.055853400177247, "learning_rate": 1.8315542967091695e-05, "loss": 1.0254, "step": 4501 }, { "epoch": 0.6356961310364304, "grad_norm": 3.904491008047052, "learning_rate": 1.8314696123025456e-05, "loss": 1.0947, "step": 4502 }, { "epoch": 0.6358373340864163, "grad_norm": 3.6570228640694866, "learning_rate": 1.8313849085729267e-05, "loss": 0.9937, "step": 4503 }, { "epoch": 0.6359785371364022, "grad_norm": 4.152901635512655, "learning_rate": 1.8313001855222812e-05, "loss": 1.1166, "step": 4504 }, { "epoch": 0.6361197401863881, "grad_norm": 3.315713716575013, "learning_rate": 1.831215443152579e-05, "loss": 0.9015, "step": 4505 }, { "epoch": 0.6362609432363739, "grad_norm": 3.410684538342167, "learning_rate": 1.8311306814657886e-05, "loss": 1.0261, "step": 4506 }, { "epoch": 0.6364021462863598, "grad_norm": 3.840334847038781, "learning_rate": 1.8310459004638805e-05, "loss": 0.9529, "step": 4507 }, { "epoch": 0.6365433493363457, "grad_norm": 3.65332010702626, "learning_rate": 1.8309611001488242e-05, "loss": 1.0287, "step": 4508 }, { "epoch": 0.6366845523863316, "grad_norm": 3.933885186415056, "learning_rate": 1.830876280522591e-05, "loss": 1.2359, "step": 4509 }, { "epoch": 0.6368257554363175, "grad_norm": 3.5427175444104626, "learning_rate": 1.8307914415871516e-05, "loss": 0.9466, "step": 4510 }, { "epoch": 0.6369669584863034, "grad_norm": 3.1932845287353095, "learning_rate": 1.8307065833444784e-05, "loss": 0.8355, "step": 4511 }, { "epoch": 0.6371081615362891, "grad_norm": 3.1010186561725277, "learning_rate": 1.8306217057965427e-05, "loss": 0.8894, "step": 4512 }, { "epoch": 0.637249364586275, "grad_norm": 4.006587176440935, "learning_rate": 1.830536808945317e-05, "loss": 0.9065, "step": 4513 }, { "epoch": 0.6373905676362609, "grad_norm": 3.5575402574691735, "learning_rate": 1.8304518927927745e-05, "loss": 1.1287, "step": 4514 }, { "epoch": 0.6375317706862468, "grad_norm": 3.19681203157768, "learning_rate": 1.8303669573408892e-05, "loss": 1.0517, "step": 4515 }, { "epoch": 0.6376729737362327, "grad_norm": 4.1789453598548745, "learning_rate": 1.830282002591634e-05, "loss": 1.1777, "step": 4516 }, { "epoch": 0.6378141767862185, "grad_norm": 3.516854343342237, "learning_rate": 1.8301970285469836e-05, "loss": 1.0064, "step": 4517 }, { "epoch": 0.6379553798362044, "grad_norm": 4.217738613973506, "learning_rate": 1.830112035208913e-05, "loss": 1.3973, "step": 4518 }, { "epoch": 0.6380965828861903, "grad_norm": 3.3091890250436995, "learning_rate": 1.830027022579397e-05, "loss": 0.9522, "step": 4519 }, { "epoch": 0.6382377859361762, "grad_norm": 3.8105978136497964, "learning_rate": 1.8299419906604115e-05, "loss": 1.0089, "step": 4520 }, { "epoch": 0.6383789889861621, "grad_norm": 4.150422685226725, "learning_rate": 1.829856939453933e-05, "loss": 1.4429, "step": 4521 }, { "epoch": 0.638520192036148, "grad_norm": 3.519358245190021, "learning_rate": 1.8297718689619374e-05, "loss": 0.9234, "step": 4522 }, { "epoch": 0.6386613950861338, "grad_norm": 3.419689646828112, "learning_rate": 1.8296867791864015e-05, "loss": 0.9182, "step": 4523 }, { "epoch": 0.6388025981361197, "grad_norm": 3.933473215591213, "learning_rate": 1.8296016701293037e-05, "loss": 1.0111, "step": 4524 }, { "epoch": 0.6389438011861056, "grad_norm": 3.656649283461363, "learning_rate": 1.8295165417926207e-05, "loss": 0.9708, "step": 4525 }, { "epoch": 0.6390850042360915, "grad_norm": 3.2013994759170146, "learning_rate": 1.829431394178332e-05, "loss": 0.9283, "step": 4526 }, { "epoch": 0.6392262072860774, "grad_norm": 3.5633902628225376, "learning_rate": 1.829346227288416e-05, "loss": 1.0057, "step": 4527 }, { "epoch": 0.6393674103360633, "grad_norm": 4.063219815764296, "learning_rate": 1.8292610411248515e-05, "loss": 1.0876, "step": 4528 }, { "epoch": 0.6395086133860491, "grad_norm": 3.285624569423119, "learning_rate": 1.829175835689619e-05, "loss": 0.9535, "step": 4529 }, { "epoch": 0.639649816436035, "grad_norm": 3.8741562196396733, "learning_rate": 1.8290906109846974e-05, "loss": 1.0871, "step": 4530 }, { "epoch": 0.6397910194860209, "grad_norm": 3.921147166257154, "learning_rate": 1.8290053670120688e-05, "loss": 1.1746, "step": 4531 }, { "epoch": 0.6399322225360068, "grad_norm": 3.3778631453096444, "learning_rate": 1.828920103773713e-05, "loss": 0.9642, "step": 4532 }, { "epoch": 0.6400734255859927, "grad_norm": 3.157425385591998, "learning_rate": 1.8288348212716123e-05, "loss": 0.8477, "step": 4533 }, { "epoch": 0.6402146286359786, "grad_norm": 3.251755664706043, "learning_rate": 1.8287495195077482e-05, "loss": 0.9638, "step": 4534 }, { "epoch": 0.6403558316859644, "grad_norm": 3.4943158578558937, "learning_rate": 1.8286641984841034e-05, "loss": 1.0252, "step": 4535 }, { "epoch": 0.6404970347359503, "grad_norm": 4.262836446468636, "learning_rate": 1.8285788582026602e-05, "loss": 1.1046, "step": 4536 }, { "epoch": 0.6406382377859362, "grad_norm": 3.458998047442154, "learning_rate": 1.828493498665402e-05, "loss": 1.0034, "step": 4537 }, { "epoch": 0.6407794408359221, "grad_norm": 3.527031350757502, "learning_rate": 1.828408119874313e-05, "loss": 1.0655, "step": 4538 }, { "epoch": 0.640920643885908, "grad_norm": 3.866473359486658, "learning_rate": 1.828322721831377e-05, "loss": 1.1108, "step": 4539 }, { "epoch": 0.6410618469358939, "grad_norm": 4.54847754312386, "learning_rate": 1.8282373045385786e-05, "loss": 1.2367, "step": 4540 }, { "epoch": 0.6412030499858797, "grad_norm": 3.7804342098021553, "learning_rate": 1.8281518679979033e-05, "loss": 0.8964, "step": 4541 }, { "epoch": 0.6413442530358656, "grad_norm": 4.13911516392743, "learning_rate": 1.8280664122113356e-05, "loss": 1.1259, "step": 4542 }, { "epoch": 0.6414854560858515, "grad_norm": 3.3151382805248812, "learning_rate": 1.8279809371808624e-05, "loss": 0.9799, "step": 4543 }, { "epoch": 0.6416266591358374, "grad_norm": 3.5757881553227744, "learning_rate": 1.82789544290847e-05, "loss": 1.0838, "step": 4544 }, { "epoch": 0.6417678621858233, "grad_norm": 3.3304208783556883, "learning_rate": 1.8278099293961447e-05, "loss": 0.9105, "step": 4545 }, { "epoch": 0.641909065235809, "grad_norm": 4.039868994568356, "learning_rate": 1.8277243966458744e-05, "loss": 1.1773, "step": 4546 }, { "epoch": 0.6420502682857949, "grad_norm": 4.101694453999152, "learning_rate": 1.8276388446596465e-05, "loss": 1.4507, "step": 4547 }, { "epoch": 0.6421914713357808, "grad_norm": 3.683479892194015, "learning_rate": 1.827553273439449e-05, "loss": 0.9877, "step": 4548 }, { "epoch": 0.6423326743857667, "grad_norm": 3.2534788849997804, "learning_rate": 1.8274676829872714e-05, "loss": 0.9815, "step": 4549 }, { "epoch": 0.6424738774357526, "grad_norm": 3.2377073043618165, "learning_rate": 1.8273820733051016e-05, "loss": 0.9049, "step": 4550 }, { "epoch": 0.6426150804857385, "grad_norm": 3.5475742046770202, "learning_rate": 1.8272964443949305e-05, "loss": 0.9928, "step": 4551 }, { "epoch": 0.6427562835357243, "grad_norm": 3.719771490078364, "learning_rate": 1.8272107962587465e-05, "loss": 1.1384, "step": 4552 }, { "epoch": 0.6428974865857102, "grad_norm": 3.5243567234010467, "learning_rate": 1.8271251288985414e-05, "loss": 1.0709, "step": 4553 }, { "epoch": 0.6430386896356961, "grad_norm": 3.433692503565806, "learning_rate": 1.827039442316305e-05, "loss": 1.0366, "step": 4554 }, { "epoch": 0.643179892685682, "grad_norm": 3.965191933649661, "learning_rate": 1.8269537365140294e-05, "loss": 0.9199, "step": 4555 }, { "epoch": 0.6433210957356679, "grad_norm": 3.6968802292751732, "learning_rate": 1.8268680114937064e-05, "loss": 0.9139, "step": 4556 }, { "epoch": 0.6434622987856538, "grad_norm": 3.425189899571042, "learning_rate": 1.8267822672573276e-05, "loss": 0.9445, "step": 4557 }, { "epoch": 0.6436035018356396, "grad_norm": 3.326960335496322, "learning_rate": 1.8266965038068856e-05, "loss": 0.8838, "step": 4558 }, { "epoch": 0.6437447048856255, "grad_norm": 3.3413559533621897, "learning_rate": 1.8266107211443747e-05, "loss": 0.9792, "step": 4559 }, { "epoch": 0.6438859079356114, "grad_norm": 3.549229661574941, "learning_rate": 1.8265249192717868e-05, "loss": 1.0903, "step": 4560 }, { "epoch": 0.6440271109855973, "grad_norm": 3.6421008045368284, "learning_rate": 1.8264390981911174e-05, "loss": 1.1925, "step": 4561 }, { "epoch": 0.6441683140355832, "grad_norm": 3.720524097986373, "learning_rate": 1.8263532579043598e-05, "loss": 1.0794, "step": 4562 }, { "epoch": 0.644309517085569, "grad_norm": 3.436122760357629, "learning_rate": 1.8262673984135096e-05, "loss": 1.1836, "step": 4563 }, { "epoch": 0.6444507201355549, "grad_norm": 4.031870773754906, "learning_rate": 1.8261815197205617e-05, "loss": 1.0171, "step": 4564 }, { "epoch": 0.6445919231855408, "grad_norm": 3.7045512501971944, "learning_rate": 1.8260956218275125e-05, "loss": 1.1864, "step": 4565 }, { "epoch": 0.6447331262355267, "grad_norm": 3.824516092540899, "learning_rate": 1.8260097047363575e-05, "loss": 1.1279, "step": 4566 }, { "epoch": 0.6448743292855126, "grad_norm": 3.5204651403061935, "learning_rate": 1.8259237684490937e-05, "loss": 0.9931, "step": 4567 }, { "epoch": 0.6450155323354985, "grad_norm": 3.7345374798212596, "learning_rate": 1.8258378129677184e-05, "loss": 1.1322, "step": 4568 }, { "epoch": 0.6451567353854843, "grad_norm": 3.8539690488262375, "learning_rate": 1.8257518382942286e-05, "loss": 0.9222, "step": 4569 }, { "epoch": 0.6452979384354702, "grad_norm": 4.159862384564203, "learning_rate": 1.8256658444306233e-05, "loss": 1.0485, "step": 4570 }, { "epoch": 0.6454391414854561, "grad_norm": 3.85053409061351, "learning_rate": 1.8255798313789e-05, "loss": 0.8996, "step": 4571 }, { "epoch": 0.645580344535442, "grad_norm": 3.434964937458507, "learning_rate": 1.825493799141058e-05, "loss": 1.003, "step": 4572 }, { "epoch": 0.6457215475854279, "grad_norm": 3.546202297219499, "learning_rate": 1.8254077477190965e-05, "loss": 1.1473, "step": 4573 }, { "epoch": 0.6458627506354138, "grad_norm": 4.063426613953572, "learning_rate": 1.8253216771150153e-05, "loss": 1.3927, "step": 4574 }, { "epoch": 0.6460039536853996, "grad_norm": 4.387307867119283, "learning_rate": 1.825235587330815e-05, "loss": 1.3608, "step": 4575 }, { "epoch": 0.6461451567353855, "grad_norm": 4.64016898026523, "learning_rate": 1.825149478368496e-05, "loss": 1.3142, "step": 4576 }, { "epoch": 0.6462863597853714, "grad_norm": 3.884925241223521, "learning_rate": 1.8250633502300595e-05, "loss": 1.1283, "step": 4577 }, { "epoch": 0.6464275628353573, "grad_norm": 3.5173560731835427, "learning_rate": 1.8249772029175067e-05, "loss": 1.2375, "step": 4578 }, { "epoch": 0.6465687658853432, "grad_norm": 3.4559690525684017, "learning_rate": 1.82489103643284e-05, "loss": 0.8954, "step": 4579 }, { "epoch": 0.646709968935329, "grad_norm": 3.449057794178173, "learning_rate": 1.8248048507780626e-05, "loss": 1.1819, "step": 4580 }, { "epoch": 0.6468511719853148, "grad_norm": 4.628988383032907, "learning_rate": 1.824718645955176e-05, "loss": 1.3103, "step": 4581 }, { "epoch": 0.6469923750353007, "grad_norm": 4.230663153231694, "learning_rate": 1.8246324219661848e-05, "loss": 1.1269, "step": 4582 }, { "epoch": 0.6471335780852866, "grad_norm": 4.635457842218894, "learning_rate": 1.8245461788130913e-05, "loss": 1.1859, "step": 4583 }, { "epoch": 0.6472747811352725, "grad_norm": 3.8603934556932202, "learning_rate": 1.8244599164979015e-05, "loss": 0.9436, "step": 4584 }, { "epoch": 0.6474159841852584, "grad_norm": 3.239888791174741, "learning_rate": 1.824373635022619e-05, "loss": 0.8821, "step": 4585 }, { "epoch": 0.6475571872352442, "grad_norm": 3.796674885944984, "learning_rate": 1.8242873343892494e-05, "loss": 1.0962, "step": 4586 }, { "epoch": 0.6476983902852301, "grad_norm": 3.3894691424612065, "learning_rate": 1.824201014599798e-05, "loss": 0.9973, "step": 4587 }, { "epoch": 0.647839593335216, "grad_norm": 3.7404034486926916, "learning_rate": 1.824114675656271e-05, "loss": 1.2176, "step": 4588 }, { "epoch": 0.6479807963852019, "grad_norm": 3.2981377484431467, "learning_rate": 1.824028317560675e-05, "loss": 0.9491, "step": 4589 }, { "epoch": 0.6481219994351878, "grad_norm": 3.773070793854085, "learning_rate": 1.823941940315017e-05, "loss": 1.0863, "step": 4590 }, { "epoch": 0.6482632024851737, "grad_norm": 4.686594941398059, "learning_rate": 1.8238555439213033e-05, "loss": 1.1405, "step": 4591 }, { "epoch": 0.6484044055351595, "grad_norm": 4.468332682798484, "learning_rate": 1.823769128381543e-05, "loss": 1.1687, "step": 4592 }, { "epoch": 0.6485456085851454, "grad_norm": 3.748583185874479, "learning_rate": 1.823682693697744e-05, "loss": 1.0472, "step": 4593 }, { "epoch": 0.6486868116351313, "grad_norm": 4.288275618474232, "learning_rate": 1.823596239871915e-05, "loss": 1.0521, "step": 4594 }, { "epoch": 0.6488280146851172, "grad_norm": 3.990739923176175, "learning_rate": 1.823509766906065e-05, "loss": 1.111, "step": 4595 }, { "epoch": 0.6489692177351031, "grad_norm": 4.020429423251817, "learning_rate": 1.8234232748022033e-05, "loss": 1.2982, "step": 4596 }, { "epoch": 0.649110420785089, "grad_norm": 4.486556060264408, "learning_rate": 1.8233367635623407e-05, "loss": 1.2132, "step": 4597 }, { "epoch": 0.6492516238350748, "grad_norm": 3.708488563165353, "learning_rate": 1.823250233188487e-05, "loss": 0.9743, "step": 4598 }, { "epoch": 0.6493928268850607, "grad_norm": 3.331277906535079, "learning_rate": 1.8231636836826538e-05, "loss": 1.0684, "step": 4599 }, { "epoch": 0.6495340299350466, "grad_norm": 4.586559809840767, "learning_rate": 1.8230771150468517e-05, "loss": 1.186, "step": 4600 }, { "epoch": 0.6496752329850325, "grad_norm": 3.6494978936888347, "learning_rate": 1.8229905272830932e-05, "loss": 0.9938, "step": 4601 }, { "epoch": 0.6498164360350184, "grad_norm": 4.5140921628629975, "learning_rate": 1.8229039203933903e-05, "loss": 1.0305, "step": 4602 }, { "epoch": 0.6499576390850043, "grad_norm": 3.7473310258422785, "learning_rate": 1.8228172943797554e-05, "loss": 1.2156, "step": 4603 }, { "epoch": 0.6500988421349901, "grad_norm": 3.8694364782183106, "learning_rate": 1.8227306492442022e-05, "loss": 1.0766, "step": 4604 }, { "epoch": 0.650240045184976, "grad_norm": 3.6788366402542367, "learning_rate": 1.8226439849887437e-05, "loss": 0.9183, "step": 4605 }, { "epoch": 0.6503812482349619, "grad_norm": 3.50909888915248, "learning_rate": 1.8225573016153945e-05, "loss": 1.1211, "step": 4606 }, { "epoch": 0.6505224512849478, "grad_norm": 3.618161915774754, "learning_rate": 1.8224705991261688e-05, "loss": 1.001, "step": 4607 }, { "epoch": 0.6506636543349337, "grad_norm": 3.8930015488095058, "learning_rate": 1.822383877523082e-05, "loss": 1.1374, "step": 4608 }, { "epoch": 0.6508048573849196, "grad_norm": 3.6089539550890617, "learning_rate": 1.8222971368081483e-05, "loss": 1.0412, "step": 4609 }, { "epoch": 0.6509460604349054, "grad_norm": 3.3313654850020553, "learning_rate": 1.8222103769833844e-05, "loss": 0.9294, "step": 4610 }, { "epoch": 0.6510872634848913, "grad_norm": 3.778713039415247, "learning_rate": 1.8221235980508067e-05, "loss": 1.0493, "step": 4611 }, { "epoch": 0.6512284665348772, "grad_norm": 4.30633508263959, "learning_rate": 1.8220368000124316e-05, "loss": 1.1642, "step": 4612 }, { "epoch": 0.6513696695848631, "grad_norm": 3.429061173126783, "learning_rate": 1.821949982870276e-05, "loss": 0.9445, "step": 4613 }, { "epoch": 0.6515108726348489, "grad_norm": 3.3524600570242082, "learning_rate": 1.8218631466263584e-05, "loss": 0.8737, "step": 4614 }, { "epoch": 0.6516520756848347, "grad_norm": 3.3400958016346274, "learning_rate": 1.8217762912826956e-05, "loss": 1.0053, "step": 4615 }, { "epoch": 0.6517932787348206, "grad_norm": 4.250109656551829, "learning_rate": 1.821689416841307e-05, "loss": 1.1588, "step": 4616 }, { "epoch": 0.6519344817848065, "grad_norm": 3.5752164322423194, "learning_rate": 1.821602523304211e-05, "loss": 1.105, "step": 4617 }, { "epoch": 0.6520756848347924, "grad_norm": 4.682130443328893, "learning_rate": 1.8215156106734274e-05, "loss": 1.1023, "step": 4618 }, { "epoch": 0.6522168878847783, "grad_norm": 3.5833662404807067, "learning_rate": 1.8214286789509755e-05, "loss": 0.9021, "step": 4619 }, { "epoch": 0.6523580909347642, "grad_norm": 3.9118971844220756, "learning_rate": 1.821341728138876e-05, "loss": 1.0303, "step": 4620 }, { "epoch": 0.65249929398475, "grad_norm": 3.5423796957880898, "learning_rate": 1.8212547582391497e-05, "loss": 1.0113, "step": 4621 }, { "epoch": 0.6526404970347359, "grad_norm": 3.5313099655981723, "learning_rate": 1.821167769253817e-05, "loss": 1.0141, "step": 4622 }, { "epoch": 0.6527817000847218, "grad_norm": 3.431586762782378, "learning_rate": 1.8210807611849002e-05, "loss": 1.0405, "step": 4623 }, { "epoch": 0.6529229031347077, "grad_norm": 3.93252851326111, "learning_rate": 1.820993734034421e-05, "loss": 1.1038, "step": 4624 }, { "epoch": 0.6530641061846936, "grad_norm": 3.859233873508907, "learning_rate": 1.8209066878044025e-05, "loss": 1.2042, "step": 4625 }, { "epoch": 0.6532053092346795, "grad_norm": 3.8734210535665943, "learning_rate": 1.8208196224968663e-05, "loss": 1.2707, "step": 4626 }, { "epoch": 0.6533465122846653, "grad_norm": 3.1039815283465058, "learning_rate": 1.820732538113837e-05, "loss": 0.8601, "step": 4627 }, { "epoch": 0.6534877153346512, "grad_norm": 3.542434455116299, "learning_rate": 1.820645434657338e-05, "loss": 0.9444, "step": 4628 }, { "epoch": 0.6536289183846371, "grad_norm": 3.0512050082119524, "learning_rate": 1.8205583121293936e-05, "loss": 0.8367, "step": 4629 }, { "epoch": 0.653770121434623, "grad_norm": 3.7902289317954536, "learning_rate": 1.8204711705320282e-05, "loss": 1.0869, "step": 4630 }, { "epoch": 0.6539113244846089, "grad_norm": 3.6035631993689297, "learning_rate": 1.820384009867267e-05, "loss": 0.8744, "step": 4631 }, { "epoch": 0.6540525275345948, "grad_norm": 3.198058640850185, "learning_rate": 1.820296830137136e-05, "loss": 0.8624, "step": 4632 }, { "epoch": 0.6541937305845806, "grad_norm": 3.1074222694897107, "learning_rate": 1.820209631343661e-05, "loss": 0.7617, "step": 4633 }, { "epoch": 0.6543349336345665, "grad_norm": 3.1027927781208935, "learning_rate": 1.820122413488868e-05, "loss": 0.9057, "step": 4634 }, { "epoch": 0.6544761366845524, "grad_norm": 3.4057336540790657, "learning_rate": 1.8200351765747846e-05, "loss": 0.9429, "step": 4635 }, { "epoch": 0.6546173397345383, "grad_norm": 3.1630887201071007, "learning_rate": 1.8199479206034374e-05, "loss": 0.8692, "step": 4636 }, { "epoch": 0.6547585427845242, "grad_norm": 3.4564615527659965, "learning_rate": 1.819860645576855e-05, "loss": 0.9472, "step": 4637 }, { "epoch": 0.65489974583451, "grad_norm": 3.374162776184678, "learning_rate": 1.8197733514970655e-05, "loss": 0.8467, "step": 4638 }, { "epoch": 0.6550409488844959, "grad_norm": 3.9076348143316912, "learning_rate": 1.819686038366097e-05, "loss": 1.0205, "step": 4639 }, { "epoch": 0.6551821519344818, "grad_norm": 3.372368585320994, "learning_rate": 1.819598706185979e-05, "loss": 0.8872, "step": 4640 }, { "epoch": 0.6553233549844677, "grad_norm": 4.017091323041263, "learning_rate": 1.819511354958741e-05, "loss": 1.0744, "step": 4641 }, { "epoch": 0.6554645580344536, "grad_norm": 4.437701355038319, "learning_rate": 1.8194239846864133e-05, "loss": 1.2738, "step": 4642 }, { "epoch": 0.6556057610844395, "grad_norm": 4.095355965809119, "learning_rate": 1.8193365953710257e-05, "loss": 1.0416, "step": 4643 }, { "epoch": 0.6557469641344253, "grad_norm": 3.3131417486233237, "learning_rate": 1.81924918701461e-05, "loss": 1.1107, "step": 4644 }, { "epoch": 0.6558881671844112, "grad_norm": 4.420830292414085, "learning_rate": 1.8191617596191963e-05, "loss": 1.1637, "step": 4645 }, { "epoch": 0.6560293702343971, "grad_norm": 3.910580543913456, "learning_rate": 1.8190743131868176e-05, "loss": 1.0787, "step": 4646 }, { "epoch": 0.656170573284383, "grad_norm": 4.895015583266957, "learning_rate": 1.818986847719505e-05, "loss": 1.1761, "step": 4647 }, { "epoch": 0.6563117763343688, "grad_norm": 3.122385716415178, "learning_rate": 1.8188993632192926e-05, "loss": 0.8288, "step": 4648 }, { "epoch": 0.6564529793843547, "grad_norm": 3.0130979949093777, "learning_rate": 1.818811859688212e-05, "loss": 0.8368, "step": 4649 }, { "epoch": 0.6565941824343405, "grad_norm": 3.4756726689541737, "learning_rate": 1.8187243371282976e-05, "loss": 0.95, "step": 4650 }, { "epoch": 0.6567353854843264, "grad_norm": 3.090300048298037, "learning_rate": 1.8186367955415833e-05, "loss": 0.8527, "step": 4651 }, { "epoch": 0.6568765885343123, "grad_norm": 3.44735902907066, "learning_rate": 1.8185492349301035e-05, "loss": 0.9747, "step": 4652 }, { "epoch": 0.6570177915842982, "grad_norm": 3.3898378037844004, "learning_rate": 1.8184616552958926e-05, "loss": 0.9305, "step": 4653 }, { "epoch": 0.6571589946342841, "grad_norm": 3.298769520854058, "learning_rate": 1.8183740566409867e-05, "loss": 0.9132, "step": 4654 }, { "epoch": 0.65730019768427, "grad_norm": 3.8820296016745792, "learning_rate": 1.8182864389674207e-05, "loss": 1.1299, "step": 4655 }, { "epoch": 0.6574414007342558, "grad_norm": 3.588698581002136, "learning_rate": 1.8181988022772315e-05, "loss": 1.0762, "step": 4656 }, { "epoch": 0.6575826037842417, "grad_norm": 3.045969057374051, "learning_rate": 1.8181111465724554e-05, "loss": 0.7468, "step": 4657 }, { "epoch": 0.6577238068342276, "grad_norm": 3.759168538342847, "learning_rate": 1.81802347185513e-05, "loss": 1.0298, "step": 4658 }, { "epoch": 0.6578650098842135, "grad_norm": 3.3440842806922304, "learning_rate": 1.817935778127292e-05, "loss": 0.974, "step": 4659 }, { "epoch": 0.6580062129341994, "grad_norm": 4.643981061042282, "learning_rate": 1.8178480653909795e-05, "loss": 1.3181, "step": 4660 }, { "epoch": 0.6581474159841852, "grad_norm": 3.5715994510089573, "learning_rate": 1.8177603336482315e-05, "loss": 1.0588, "step": 4661 }, { "epoch": 0.6582886190341711, "grad_norm": 3.612075824056748, "learning_rate": 1.8176725829010868e-05, "loss": 1.1072, "step": 4662 }, { "epoch": 0.658429822084157, "grad_norm": 3.64374437676882, "learning_rate": 1.817584813151584e-05, "loss": 1.0264, "step": 4663 }, { "epoch": 0.6585710251341429, "grad_norm": 3.2892141835326596, "learning_rate": 1.817497024401763e-05, "loss": 0.9023, "step": 4664 }, { "epoch": 0.6587122281841288, "grad_norm": 3.2971175944963345, "learning_rate": 1.8174092166536646e-05, "loss": 0.9194, "step": 4665 }, { "epoch": 0.6588534312341147, "grad_norm": 3.2932146963095668, "learning_rate": 1.817321389909329e-05, "loss": 0.9572, "step": 4666 }, { "epoch": 0.6589946342841005, "grad_norm": 3.4346237562271855, "learning_rate": 1.817233544170797e-05, "loss": 1.0022, "step": 4667 }, { "epoch": 0.6591358373340864, "grad_norm": 3.4097256785502537, "learning_rate": 1.817145679440111e-05, "loss": 1.1754, "step": 4668 }, { "epoch": 0.6592770403840723, "grad_norm": 4.416772184062021, "learning_rate": 1.8170577957193115e-05, "loss": 1.199, "step": 4669 }, { "epoch": 0.6594182434340582, "grad_norm": 4.123528459243989, "learning_rate": 1.816969893010442e-05, "loss": 1.067, "step": 4670 }, { "epoch": 0.6595594464840441, "grad_norm": 3.841921627395763, "learning_rate": 1.8168819713155453e-05, "loss": 0.9206, "step": 4671 }, { "epoch": 0.65970064953403, "grad_norm": 3.7253958761317705, "learning_rate": 1.8167940306366642e-05, "loss": 1.0666, "step": 4672 }, { "epoch": 0.6598418525840158, "grad_norm": 4.415109649298847, "learning_rate": 1.8167060709758425e-05, "loss": 1.3117, "step": 4673 }, { "epoch": 0.6599830556340017, "grad_norm": 4.4215949538484445, "learning_rate": 1.8166180923351244e-05, "loss": 1.102, "step": 4674 }, { "epoch": 0.6601242586839876, "grad_norm": 3.3355497891831765, "learning_rate": 1.8165300947165544e-05, "loss": 0.8919, "step": 4675 }, { "epoch": 0.6602654617339735, "grad_norm": 4.068710837204282, "learning_rate": 1.8164420781221777e-05, "loss": 1.249, "step": 4676 }, { "epoch": 0.6604066647839594, "grad_norm": 3.2622439655953523, "learning_rate": 1.81635404255404e-05, "loss": 0.9239, "step": 4677 }, { "epoch": 0.6605478678339453, "grad_norm": 3.8161659707197395, "learning_rate": 1.8162659880141865e-05, "loss": 1.2931, "step": 4678 }, { "epoch": 0.6606890708839311, "grad_norm": 3.1943028110608314, "learning_rate": 1.8161779145046642e-05, "loss": 0.8951, "step": 4679 }, { "epoch": 0.660830273933917, "grad_norm": 3.6976607956718346, "learning_rate": 1.8160898220275196e-05, "loss": 1.1755, "step": 4680 }, { "epoch": 0.6609714769839029, "grad_norm": 3.236632876944451, "learning_rate": 1.8160017105848e-05, "loss": 0.8717, "step": 4681 }, { "epoch": 0.6611126800338887, "grad_norm": 3.596044533880722, "learning_rate": 1.815913580178553e-05, "loss": 0.9898, "step": 4682 }, { "epoch": 0.6612538830838746, "grad_norm": 3.385224742606756, "learning_rate": 1.8158254308108264e-05, "loss": 0.9642, "step": 4683 }, { "epoch": 0.6613950861338604, "grad_norm": 3.5255333387281045, "learning_rate": 1.81573726248367e-05, "loss": 1.0195, "step": 4684 }, { "epoch": 0.6615362891838463, "grad_norm": 3.896605729789775, "learning_rate": 1.815649075199131e-05, "loss": 0.9732, "step": 4685 }, { "epoch": 0.6616774922338322, "grad_norm": 3.675805447545512, "learning_rate": 1.8155608689592604e-05, "loss": 0.9367, "step": 4686 }, { "epoch": 0.6618186952838181, "grad_norm": 4.920856744816194, "learning_rate": 1.815472643766107e-05, "loss": 1.0818, "step": 4687 }, { "epoch": 0.661959898333804, "grad_norm": 4.356999492060913, "learning_rate": 1.815384399621722e-05, "loss": 1.0801, "step": 4688 }, { "epoch": 0.6621011013837899, "grad_norm": 3.396975648266339, "learning_rate": 1.8152961365281553e-05, "loss": 1.0822, "step": 4689 }, { "epoch": 0.6622423044337757, "grad_norm": 4.1227904246854, "learning_rate": 1.8152078544874582e-05, "loss": 1.1275, "step": 4690 }, { "epoch": 0.6623835074837616, "grad_norm": 3.23330422351804, "learning_rate": 1.815119553501683e-05, "loss": 0.8303, "step": 4691 }, { "epoch": 0.6625247105337475, "grad_norm": 4.492672864303307, "learning_rate": 1.8150312335728818e-05, "loss": 1.1039, "step": 4692 }, { "epoch": 0.6626659135837334, "grad_norm": 3.3019735844623197, "learning_rate": 1.814942894703106e-05, "loss": 1.0844, "step": 4693 }, { "epoch": 0.6628071166337193, "grad_norm": 4.0414694710092665, "learning_rate": 1.8148545368944096e-05, "loss": 1.2137, "step": 4694 }, { "epoch": 0.6629483196837052, "grad_norm": 3.8366277613489346, "learning_rate": 1.814766160148846e-05, "loss": 0.8661, "step": 4695 }, { "epoch": 0.663089522733691, "grad_norm": 3.859848802374575, "learning_rate": 1.814677764468468e-05, "loss": 1.2148, "step": 4696 }, { "epoch": 0.6632307257836769, "grad_norm": 4.284039735137657, "learning_rate": 1.8145893498553313e-05, "loss": 0.9882, "step": 4697 }, { "epoch": 0.6633719288336628, "grad_norm": 3.9422573045588254, "learning_rate": 1.8145009163114894e-05, "loss": 1.11, "step": 4698 }, { "epoch": 0.6635131318836487, "grad_norm": 3.769067108200693, "learning_rate": 1.814412463838998e-05, "loss": 0.8708, "step": 4699 }, { "epoch": 0.6636543349336346, "grad_norm": 3.7481965732626343, "learning_rate": 1.8143239924399127e-05, "loss": 1.0826, "step": 4700 }, { "epoch": 0.6637955379836205, "grad_norm": 3.763426290397732, "learning_rate": 1.8142355021162896e-05, "loss": 1.0971, "step": 4701 }, { "epoch": 0.6639367410336063, "grad_norm": 3.5463667396049354, "learning_rate": 1.8141469928701852e-05, "loss": 1.0123, "step": 4702 }, { "epoch": 0.6640779440835922, "grad_norm": 3.6445530707877674, "learning_rate": 1.814058464703656e-05, "loss": 1.1036, "step": 4703 }, { "epoch": 0.6642191471335781, "grad_norm": 3.3180508079938855, "learning_rate": 1.81396991761876e-05, "loss": 0.8006, "step": 4704 }, { "epoch": 0.664360350183564, "grad_norm": 3.4741113454864565, "learning_rate": 1.813881351617554e-05, "loss": 1.1895, "step": 4705 }, { "epoch": 0.6645015532335499, "grad_norm": 3.9110491193051224, "learning_rate": 1.8137927667020975e-05, "loss": 1.1032, "step": 4706 }, { "epoch": 0.6646427562835358, "grad_norm": 3.2135479742135233, "learning_rate": 1.8137041628744484e-05, "loss": 0.7431, "step": 4707 }, { "epoch": 0.6647839593335216, "grad_norm": 3.180459684122676, "learning_rate": 1.8136155401366658e-05, "loss": 0.785, "step": 4708 }, { "epoch": 0.6649251623835075, "grad_norm": 2.923757329219554, "learning_rate": 1.8135268984908096e-05, "loss": 0.8922, "step": 4709 }, { "epoch": 0.6650663654334934, "grad_norm": 3.920675770943473, "learning_rate": 1.8134382379389396e-05, "loss": 1.2585, "step": 4710 }, { "epoch": 0.6652075684834793, "grad_norm": 3.3123937070912604, "learning_rate": 1.8133495584831162e-05, "loss": 0.8287, "step": 4711 }, { "epoch": 0.6653487715334652, "grad_norm": 3.7887501415193308, "learning_rate": 1.8132608601254003e-05, "loss": 0.7796, "step": 4712 }, { "epoch": 0.665489974583451, "grad_norm": 3.6513554320498702, "learning_rate": 1.813172142867853e-05, "loss": 0.8681, "step": 4713 }, { "epoch": 0.6656311776334369, "grad_norm": 3.8061712385153887, "learning_rate": 1.8130834067125368e-05, "loss": 1.0904, "step": 4714 }, { "epoch": 0.6657723806834228, "grad_norm": 4.708096852592885, "learning_rate": 1.8129946516615132e-05, "loss": 1.3727, "step": 4715 }, { "epoch": 0.6659135837334086, "grad_norm": 3.844778511070779, "learning_rate": 1.8129058777168447e-05, "loss": 1.1557, "step": 4716 }, { "epoch": 0.6660547867833945, "grad_norm": 3.7033263031726245, "learning_rate": 1.8128170848805948e-05, "loss": 1.0681, "step": 4717 }, { "epoch": 0.6661959898333804, "grad_norm": 3.642178480920516, "learning_rate": 1.812728273154827e-05, "loss": 0.9452, "step": 4718 }, { "epoch": 0.6663371928833662, "grad_norm": 3.7812079580389284, "learning_rate": 1.812639442541605e-05, "loss": 0.8593, "step": 4719 }, { "epoch": 0.6664783959333521, "grad_norm": 3.3106297504726476, "learning_rate": 1.8125505930429936e-05, "loss": 0.8577, "step": 4720 }, { "epoch": 0.666619598983338, "grad_norm": 3.9747291825481637, "learning_rate": 1.812461724661057e-05, "loss": 1.086, "step": 4721 }, { "epoch": 0.6667608020333239, "grad_norm": 4.0402096432963335, "learning_rate": 1.812372837397861e-05, "loss": 1.1567, "step": 4722 }, { "epoch": 0.6669020050833098, "grad_norm": 3.856774707620448, "learning_rate": 1.812283931255471e-05, "loss": 1.1974, "step": 4723 }, { "epoch": 0.6670432081332957, "grad_norm": 3.2283315080963897, "learning_rate": 1.812195006235953e-05, "loss": 0.7623, "step": 4724 }, { "epoch": 0.6671844111832815, "grad_norm": 3.5052753435558444, "learning_rate": 1.812106062341374e-05, "loss": 0.8269, "step": 4725 }, { "epoch": 0.6673256142332674, "grad_norm": 3.8187666575054298, "learning_rate": 1.812017099573801e-05, "loss": 0.9153, "step": 4726 }, { "epoch": 0.6674668172832533, "grad_norm": 3.8672699831054844, "learning_rate": 1.8119281179353012e-05, "loss": 1.0505, "step": 4727 }, { "epoch": 0.6676080203332392, "grad_norm": 5.965859385275775, "learning_rate": 1.8118391174279426e-05, "loss": 1.3257, "step": 4728 }, { "epoch": 0.6677492233832251, "grad_norm": 3.564251006849823, "learning_rate": 1.811750098053793e-05, "loss": 0.9496, "step": 4729 }, { "epoch": 0.667890426433211, "grad_norm": 3.3011830896753755, "learning_rate": 1.8116610598149225e-05, "loss": 0.8024, "step": 4730 }, { "epoch": 0.6680316294831968, "grad_norm": 3.607815564240608, "learning_rate": 1.811572002713399e-05, "loss": 0.8944, "step": 4731 }, { "epoch": 0.6681728325331827, "grad_norm": 4.119697025101832, "learning_rate": 1.811482926751293e-05, "loss": 1.2629, "step": 4732 }, { "epoch": 0.6683140355831686, "grad_norm": 4.658072246054126, "learning_rate": 1.8113938319306742e-05, "loss": 0.9771, "step": 4733 }, { "epoch": 0.6684552386331545, "grad_norm": 3.5058724391364984, "learning_rate": 1.8113047182536128e-05, "loss": 1.1764, "step": 4734 }, { "epoch": 0.6685964416831404, "grad_norm": 3.3339197772764546, "learning_rate": 1.8112155857221804e-05, "loss": 0.9223, "step": 4735 }, { "epoch": 0.6687376447331262, "grad_norm": 2.9983833803004, "learning_rate": 1.811126434338448e-05, "loss": 0.8341, "step": 4736 }, { "epoch": 0.6688788477831121, "grad_norm": 3.1666045022221163, "learning_rate": 1.8110372641044877e-05, "loss": 0.808, "step": 4737 }, { "epoch": 0.669020050833098, "grad_norm": 2.747128193298705, "learning_rate": 1.8109480750223714e-05, "loss": 0.8157, "step": 4738 }, { "epoch": 0.6691612538830839, "grad_norm": 3.991494124470312, "learning_rate": 1.810858867094172e-05, "loss": 1.0451, "step": 4739 }, { "epoch": 0.6693024569330698, "grad_norm": 3.8759447533167375, "learning_rate": 1.810769640321963e-05, "loss": 0.9854, "step": 4740 }, { "epoch": 0.6694436599830557, "grad_norm": 3.6382780142886215, "learning_rate": 1.8106803947078176e-05, "loss": 0.9664, "step": 4741 }, { "epoch": 0.6695848630330415, "grad_norm": 3.5736061001178645, "learning_rate": 1.81059113025381e-05, "loss": 0.8898, "step": 4742 }, { "epoch": 0.6697260660830274, "grad_norm": 4.107436665773372, "learning_rate": 1.8105018469620148e-05, "loss": 1.0438, "step": 4743 }, { "epoch": 0.6698672691330133, "grad_norm": 3.9626905472942693, "learning_rate": 1.8104125448345066e-05, "loss": 0.962, "step": 4744 }, { "epoch": 0.6700084721829992, "grad_norm": 3.389731162657271, "learning_rate": 1.8103232238733604e-05, "loss": 0.9988, "step": 4745 }, { "epoch": 0.6701496752329851, "grad_norm": 3.592591468334822, "learning_rate": 1.810233884080653e-05, "loss": 0.9002, "step": 4746 }, { "epoch": 0.670290878282971, "grad_norm": 3.4309640671956956, "learning_rate": 1.8101445254584598e-05, "loss": 0.9629, "step": 4747 }, { "epoch": 0.6704320813329568, "grad_norm": 3.668855648686981, "learning_rate": 1.8100551480088574e-05, "loss": 1.0746, "step": 4748 }, { "epoch": 0.6705732843829427, "grad_norm": 4.275233030252731, "learning_rate": 1.8099657517339238e-05, "loss": 1.1161, "step": 4749 }, { "epoch": 0.6707144874329285, "grad_norm": 3.278405843435245, "learning_rate": 1.8098763366357354e-05, "loss": 0.9256, "step": 4750 }, { "epoch": 0.6708556904829144, "grad_norm": 3.7703346705322565, "learning_rate": 1.809786902716371e-05, "loss": 1.0297, "step": 4751 }, { "epoch": 0.6709968935329003, "grad_norm": 3.917623566421039, "learning_rate": 1.8096974499779084e-05, "loss": 1.1661, "step": 4752 }, { "epoch": 0.6711380965828861, "grad_norm": 3.0268611824431746, "learning_rate": 1.809607978422427e-05, "loss": 0.8715, "step": 4753 }, { "epoch": 0.671279299632872, "grad_norm": 3.0041851383344675, "learning_rate": 1.8095184880520058e-05, "loss": 1.0014, "step": 4754 }, { "epoch": 0.6714205026828579, "grad_norm": 4.355659125118408, "learning_rate": 1.8094289788687245e-05, "loss": 1.2732, "step": 4755 }, { "epoch": 0.6715617057328438, "grad_norm": 3.8891004624817835, "learning_rate": 1.8093394508746635e-05, "loss": 0.9823, "step": 4756 }, { "epoch": 0.6717029087828297, "grad_norm": 3.687737187095739, "learning_rate": 1.809249904071903e-05, "loss": 1.0306, "step": 4757 }, { "epoch": 0.6718441118328156, "grad_norm": 4.320731686852281, "learning_rate": 1.8091603384625243e-05, "loss": 1.2813, "step": 4758 }, { "epoch": 0.6719853148828014, "grad_norm": 3.4446081881025883, "learning_rate": 1.8090707540486084e-05, "loss": 1.016, "step": 4759 }, { "epoch": 0.6721265179327873, "grad_norm": 3.4882347161783613, "learning_rate": 1.8089811508322382e-05, "loss": 0.9352, "step": 4760 }, { "epoch": 0.6722677209827732, "grad_norm": 3.992682276576138, "learning_rate": 1.808891528815495e-05, "loss": 1.1728, "step": 4761 }, { "epoch": 0.6724089240327591, "grad_norm": 3.780274392650913, "learning_rate": 1.8088018880004622e-05, "loss": 0.9996, "step": 4762 }, { "epoch": 0.672550127082745, "grad_norm": 4.836477736250618, "learning_rate": 1.8087122283892225e-05, "loss": 1.2319, "step": 4763 }, { "epoch": 0.6726913301327309, "grad_norm": 4.326133942304628, "learning_rate": 1.8086225499838602e-05, "loss": 1.2238, "step": 4764 }, { "epoch": 0.6728325331827167, "grad_norm": 3.688863732220834, "learning_rate": 1.808532852786459e-05, "loss": 1.0643, "step": 4765 }, { "epoch": 0.6729737362327026, "grad_norm": 3.7866731790223276, "learning_rate": 1.8084431367991032e-05, "loss": 1.0984, "step": 4766 }, { "epoch": 0.6731149392826885, "grad_norm": 4.195853810018272, "learning_rate": 1.8083534020238785e-05, "loss": 1.38, "step": 4767 }, { "epoch": 0.6732561423326744, "grad_norm": 4.415064308465423, "learning_rate": 1.80826364846287e-05, "loss": 1.3547, "step": 4768 }, { "epoch": 0.6733973453826603, "grad_norm": 3.960475445166232, "learning_rate": 1.8081738761181625e-05, "loss": 1.1361, "step": 4769 }, { "epoch": 0.6735385484326462, "grad_norm": 3.985634674969819, "learning_rate": 1.808084084991844e-05, "loss": 1.0786, "step": 4770 }, { "epoch": 0.673679751482632, "grad_norm": 3.179007587443989, "learning_rate": 1.8079942750859997e-05, "loss": 0.8858, "step": 4771 }, { "epoch": 0.6738209545326179, "grad_norm": 4.370074151945814, "learning_rate": 1.8079044464027177e-05, "loss": 1.1869, "step": 4772 }, { "epoch": 0.6739621575826038, "grad_norm": 3.636154697813383, "learning_rate": 1.8078145989440855e-05, "loss": 1.0024, "step": 4773 }, { "epoch": 0.6741033606325897, "grad_norm": 3.572663990130696, "learning_rate": 1.8077247327121904e-05, "loss": 0.9112, "step": 4774 }, { "epoch": 0.6742445636825756, "grad_norm": 3.1465112482158464, "learning_rate": 1.8076348477091216e-05, "loss": 0.7324, "step": 4775 }, { "epoch": 0.6743857667325615, "grad_norm": 4.741794133349117, "learning_rate": 1.8075449439369678e-05, "loss": 1.1829, "step": 4776 }, { "epoch": 0.6745269697825473, "grad_norm": 3.7614585064282946, "learning_rate": 1.8074550213978183e-05, "loss": 0.9857, "step": 4777 }, { "epoch": 0.6746681728325332, "grad_norm": 3.74063270546666, "learning_rate": 1.8073650800937627e-05, "loss": 0.8813, "step": 4778 }, { "epoch": 0.6748093758825191, "grad_norm": 3.723534993405122, "learning_rate": 1.8072751200268913e-05, "loss": 0.8022, "step": 4779 }, { "epoch": 0.674950578932505, "grad_norm": 3.233225088954693, "learning_rate": 1.8071851411992948e-05, "loss": 1.0225, "step": 4780 }, { "epoch": 0.6750917819824909, "grad_norm": 3.716678899999008, "learning_rate": 1.807095143613064e-05, "loss": 1.023, "step": 4781 }, { "epoch": 0.6752329850324768, "grad_norm": 3.7037170452978416, "learning_rate": 1.8070051272702905e-05, "loss": 0.951, "step": 4782 }, { "epoch": 0.6753741880824626, "grad_norm": 3.5794030332401214, "learning_rate": 1.8069150921730668e-05, "loss": 0.9104, "step": 4783 }, { "epoch": 0.6755153911324484, "grad_norm": 3.9408589207242652, "learning_rate": 1.806825038323485e-05, "loss": 1.1243, "step": 4784 }, { "epoch": 0.6756565941824343, "grad_norm": 3.3504929115028776, "learning_rate": 1.8067349657236374e-05, "loss": 0.8142, "step": 4785 }, { "epoch": 0.6757977972324202, "grad_norm": 3.3313176633658457, "learning_rate": 1.8066448743756173e-05, "loss": 0.8381, "step": 4786 }, { "epoch": 0.675939000282406, "grad_norm": 3.7392803961403454, "learning_rate": 1.806554764281519e-05, "loss": 0.9539, "step": 4787 }, { "epoch": 0.6760802033323919, "grad_norm": 3.6274335435429, "learning_rate": 1.8064646354434363e-05, "loss": 1.0429, "step": 4788 }, { "epoch": 0.6762214063823778, "grad_norm": 3.0727636329367645, "learning_rate": 1.806374487863464e-05, "loss": 0.8502, "step": 4789 }, { "epoch": 0.6763626094323637, "grad_norm": 3.580258509023351, "learning_rate": 1.8062843215436965e-05, "loss": 1.1466, "step": 4790 }, { "epoch": 0.6765038124823496, "grad_norm": 3.904707926386714, "learning_rate": 1.8061941364862298e-05, "loss": 1.2405, "step": 4791 }, { "epoch": 0.6766450155323355, "grad_norm": 3.3796818144992864, "learning_rate": 1.8061039326931592e-05, "loss": 0.8945, "step": 4792 }, { "epoch": 0.6767862185823214, "grad_norm": 4.139585113864387, "learning_rate": 1.806013710166582e-05, "loss": 1.0117, "step": 4793 }, { "epoch": 0.6769274216323072, "grad_norm": 4.219950537132137, "learning_rate": 1.8059234689085935e-05, "loss": 1.3829, "step": 4794 }, { "epoch": 0.6770686246822931, "grad_norm": 4.118136651398588, "learning_rate": 1.805833208921292e-05, "loss": 1.1909, "step": 4795 }, { "epoch": 0.677209827732279, "grad_norm": 4.4347400580385505, "learning_rate": 1.8057429302067748e-05, "loss": 1.0705, "step": 4796 }, { "epoch": 0.6773510307822649, "grad_norm": 3.6787284403167524, "learning_rate": 1.8056526327671396e-05, "loss": 1.0412, "step": 4797 }, { "epoch": 0.6774922338322508, "grad_norm": 3.2283748757895254, "learning_rate": 1.8055623166044855e-05, "loss": 0.8246, "step": 4798 }, { "epoch": 0.6776334368822367, "grad_norm": 3.4977745205903323, "learning_rate": 1.8054719817209107e-05, "loss": 1.0895, "step": 4799 }, { "epoch": 0.6777746399322225, "grad_norm": 2.8660882965646595, "learning_rate": 1.8053816281185154e-05, "loss": 0.8597, "step": 4800 }, { "epoch": 0.6779158429822084, "grad_norm": 3.14123087654651, "learning_rate": 1.8052912557993983e-05, "loss": 1.05, "step": 4801 }, { "epoch": 0.6780570460321943, "grad_norm": 3.4273673691330493, "learning_rate": 1.8052008647656605e-05, "loss": 0.9943, "step": 4802 }, { "epoch": 0.6781982490821802, "grad_norm": 3.5600421480646767, "learning_rate": 1.8051104550194024e-05, "loss": 0.9313, "step": 4803 }, { "epoch": 0.6783394521321661, "grad_norm": 3.259273142044434, "learning_rate": 1.8050200265627247e-05, "loss": 0.8943, "step": 4804 }, { "epoch": 0.678480655182152, "grad_norm": 3.758498833701552, "learning_rate": 1.8049295793977294e-05, "loss": 1.1804, "step": 4805 }, { "epoch": 0.6786218582321378, "grad_norm": 3.8291999656663824, "learning_rate": 1.8048391135265185e-05, "loss": 1.2403, "step": 4806 }, { "epoch": 0.6787630612821237, "grad_norm": 3.3367429905978456, "learning_rate": 1.804748628951194e-05, "loss": 0.9607, "step": 4807 }, { "epoch": 0.6789042643321096, "grad_norm": 3.529891021686482, "learning_rate": 1.8046581256738592e-05, "loss": 1.0931, "step": 4808 }, { "epoch": 0.6790454673820955, "grad_norm": 3.6847509995865155, "learning_rate": 1.8045676036966168e-05, "loss": 0.9374, "step": 4809 }, { "epoch": 0.6791866704320814, "grad_norm": 3.7895070392050436, "learning_rate": 1.8044770630215706e-05, "loss": 0.953, "step": 4810 }, { "epoch": 0.6793278734820672, "grad_norm": 3.6749260737117457, "learning_rate": 1.804386503650825e-05, "loss": 1.0148, "step": 4811 }, { "epoch": 0.6794690765320531, "grad_norm": 3.461074273487788, "learning_rate": 1.8042959255864846e-05, "loss": 1.0565, "step": 4812 }, { "epoch": 0.679610279582039, "grad_norm": 3.3270221471902586, "learning_rate": 1.804205328830654e-05, "loss": 1.0843, "step": 4813 }, { "epoch": 0.6797514826320249, "grad_norm": 3.85241320846476, "learning_rate": 1.804114713385439e-05, "loss": 1.1383, "step": 4814 }, { "epoch": 0.6798926856820108, "grad_norm": 4.012760611467689, "learning_rate": 1.8040240792529457e-05, "loss": 1.027, "step": 4815 }, { "epoch": 0.6800338887319967, "grad_norm": 3.2400781618899206, "learning_rate": 1.8039334264352794e-05, "loss": 0.9966, "step": 4816 }, { "epoch": 0.6801750917819825, "grad_norm": 3.2432741102507756, "learning_rate": 1.803842754934548e-05, "loss": 1.0201, "step": 4817 }, { "epoch": 0.6803162948319683, "grad_norm": 3.688204072523597, "learning_rate": 1.8037520647528576e-05, "loss": 0.9485, "step": 4818 }, { "epoch": 0.6804574978819542, "grad_norm": 4.155285121958546, "learning_rate": 1.803661355892317e-05, "loss": 1.1593, "step": 4819 }, { "epoch": 0.6805987009319401, "grad_norm": 3.792562006503575, "learning_rate": 1.803570628355033e-05, "loss": 1.0216, "step": 4820 }, { "epoch": 0.680739903981926, "grad_norm": 3.2203572360239714, "learning_rate": 1.8034798821431146e-05, "loss": 0.9205, "step": 4821 }, { "epoch": 0.6808811070319118, "grad_norm": 3.249799980212703, "learning_rate": 1.803389117258671e-05, "loss": 1.0793, "step": 4822 }, { "epoch": 0.6810223100818977, "grad_norm": 3.566213009754105, "learning_rate": 1.803298333703811e-05, "loss": 1.0183, "step": 4823 }, { "epoch": 0.6811635131318836, "grad_norm": 3.560328288195111, "learning_rate": 1.803207531480645e-05, "loss": 1.0015, "step": 4824 }, { "epoch": 0.6813047161818695, "grad_norm": 4.145151558755745, "learning_rate": 1.8031167105912828e-05, "loss": 1.3682, "step": 4825 }, { "epoch": 0.6814459192318554, "grad_norm": 3.2218094598860354, "learning_rate": 1.8030258710378348e-05, "loss": 0.9294, "step": 4826 }, { "epoch": 0.6815871222818413, "grad_norm": 3.1631731361180213, "learning_rate": 1.8029350128224126e-05, "loss": 1.0385, "step": 4827 }, { "epoch": 0.6817283253318271, "grad_norm": 4.3486490094319645, "learning_rate": 1.8028441359471273e-05, "loss": 0.9646, "step": 4828 }, { "epoch": 0.681869528381813, "grad_norm": 3.304591147931252, "learning_rate": 1.8027532404140914e-05, "loss": 1.1562, "step": 4829 }, { "epoch": 0.6820107314317989, "grad_norm": 3.811938130683063, "learning_rate": 1.8026623262254164e-05, "loss": 0.9833, "step": 4830 }, { "epoch": 0.6821519344817848, "grad_norm": 3.590591899478524, "learning_rate": 1.802571393383216e-05, "loss": 0.9524, "step": 4831 }, { "epoch": 0.6822931375317707, "grad_norm": 3.472513492817397, "learning_rate": 1.8024804418896026e-05, "loss": 0.9623, "step": 4832 }, { "epoch": 0.6824343405817566, "grad_norm": 3.1760262048708388, "learning_rate": 1.8023894717466904e-05, "loss": 0.9801, "step": 4833 }, { "epoch": 0.6825755436317424, "grad_norm": 3.5500072584261333, "learning_rate": 1.8022984829565935e-05, "loss": 1.1531, "step": 4834 }, { "epoch": 0.6827167466817283, "grad_norm": 4.15453036117576, "learning_rate": 1.8022074755214264e-05, "loss": 0.8229, "step": 4835 }, { "epoch": 0.6828579497317142, "grad_norm": 3.579126912745341, "learning_rate": 1.8021164494433038e-05, "loss": 1.1329, "step": 4836 }, { "epoch": 0.6829991527817001, "grad_norm": 3.826680535979476, "learning_rate": 1.8020254047243413e-05, "loss": 1.0305, "step": 4837 }, { "epoch": 0.683140355831686, "grad_norm": 3.4828094514436496, "learning_rate": 1.801934341366655e-05, "loss": 1.0038, "step": 4838 }, { "epoch": 0.6832815588816719, "grad_norm": 4.320111056414389, "learning_rate": 1.801843259372361e-05, "loss": 1.0814, "step": 4839 }, { "epoch": 0.6834227619316577, "grad_norm": 3.549533724743314, "learning_rate": 1.801752158743576e-05, "loss": 1.0644, "step": 4840 }, { "epoch": 0.6835639649816436, "grad_norm": 3.3937231726407053, "learning_rate": 1.8016610394824168e-05, "loss": 1.0014, "step": 4841 }, { "epoch": 0.6837051680316295, "grad_norm": 3.846986015963456, "learning_rate": 1.801569901591001e-05, "loss": 1.0466, "step": 4842 }, { "epoch": 0.6838463710816154, "grad_norm": 3.5676368238464677, "learning_rate": 1.801478745071447e-05, "loss": 0.935, "step": 4843 }, { "epoch": 0.6839875741316013, "grad_norm": 3.739033956686776, "learning_rate": 1.8013875699258738e-05, "loss": 1.0882, "step": 4844 }, { "epoch": 0.6841287771815872, "grad_norm": 3.808747480885693, "learning_rate": 1.801296376156399e-05, "loss": 1.1735, "step": 4845 }, { "epoch": 0.684269980231573, "grad_norm": 4.424774924232497, "learning_rate": 1.8012051637651423e-05, "loss": 1.1288, "step": 4846 }, { "epoch": 0.6844111832815589, "grad_norm": 3.590957629098163, "learning_rate": 1.8011139327542238e-05, "loss": 0.9503, "step": 4847 }, { "epoch": 0.6845523863315448, "grad_norm": 3.27030680117124, "learning_rate": 1.8010226831257636e-05, "loss": 0.9983, "step": 4848 }, { "epoch": 0.6846935893815307, "grad_norm": 2.8577112140751058, "learning_rate": 1.8009314148818822e-05, "loss": 0.8771, "step": 4849 }, { "epoch": 0.6848347924315166, "grad_norm": 3.306867257306951, "learning_rate": 1.8008401280247005e-05, "loss": 1.0379, "step": 4850 }, { "epoch": 0.6849759954815025, "grad_norm": 3.8980972002258016, "learning_rate": 1.80074882255634e-05, "loss": 1.0437, "step": 4851 }, { "epoch": 0.6851171985314882, "grad_norm": 3.732492234526865, "learning_rate": 1.8006574984789226e-05, "loss": 1.1157, "step": 4852 }, { "epoch": 0.6852584015814741, "grad_norm": 3.8633955724280953, "learning_rate": 1.800566155794571e-05, "loss": 0.9841, "step": 4853 }, { "epoch": 0.68539960463146, "grad_norm": 3.7733800500700596, "learning_rate": 1.8004747945054076e-05, "loss": 1.0379, "step": 4854 }, { "epoch": 0.6855408076814459, "grad_norm": 3.325498523459098, "learning_rate": 1.8003834146135557e-05, "loss": 0.9438, "step": 4855 }, { "epoch": 0.6856820107314318, "grad_norm": 3.6169162193298456, "learning_rate": 1.8002920161211384e-05, "loss": 1.0583, "step": 4856 }, { "epoch": 0.6858232137814176, "grad_norm": 3.845935905359987, "learning_rate": 1.800200599030281e-05, "loss": 1.2527, "step": 4857 }, { "epoch": 0.6859644168314035, "grad_norm": 3.3436245075169473, "learning_rate": 1.800109163343107e-05, "loss": 0.8502, "step": 4858 }, { "epoch": 0.6861056198813894, "grad_norm": 3.796254835586533, "learning_rate": 1.8000177090617416e-05, "loss": 1.0515, "step": 4859 }, { "epoch": 0.6862468229313753, "grad_norm": 3.9392811400000984, "learning_rate": 1.7999262361883102e-05, "loss": 1.1274, "step": 4860 }, { "epoch": 0.6863880259813612, "grad_norm": 3.730663995315453, "learning_rate": 1.799834744724938e-05, "loss": 0.9405, "step": 4861 }, { "epoch": 0.686529229031347, "grad_norm": 4.319885924397974, "learning_rate": 1.7997432346737524e-05, "loss": 1.1759, "step": 4862 }, { "epoch": 0.6866704320813329, "grad_norm": 4.045438380914125, "learning_rate": 1.7996517060368793e-05, "loss": 0.9222, "step": 4863 }, { "epoch": 0.6868116351313188, "grad_norm": 3.189527000501543, "learning_rate": 1.7995601588164456e-05, "loss": 0.961, "step": 4864 }, { "epoch": 0.6869528381813047, "grad_norm": 3.763511417082652, "learning_rate": 1.7994685930145793e-05, "loss": 1.1567, "step": 4865 }, { "epoch": 0.6870940412312906, "grad_norm": 3.594409764039015, "learning_rate": 1.7993770086334082e-05, "loss": 1.0028, "step": 4866 }, { "epoch": 0.6872352442812765, "grad_norm": 3.493799892916875, "learning_rate": 1.7992854056750604e-05, "loss": 0.9926, "step": 4867 }, { "epoch": 0.6873764473312624, "grad_norm": 3.134414188298595, "learning_rate": 1.7991937841416652e-05, "loss": 0.854, "step": 4868 }, { "epoch": 0.6875176503812482, "grad_norm": 3.8320882077153144, "learning_rate": 1.7991021440353515e-05, "loss": 1.3191, "step": 4869 }, { "epoch": 0.6876588534312341, "grad_norm": 3.734186464785118, "learning_rate": 1.7990104853582494e-05, "loss": 1.0545, "step": 4870 }, { "epoch": 0.68780005648122, "grad_norm": 4.560120936341732, "learning_rate": 1.7989188081124883e-05, "loss": 1.1339, "step": 4871 }, { "epoch": 0.6879412595312059, "grad_norm": 3.342683470989466, "learning_rate": 1.7988271123001994e-05, "loss": 0.9153, "step": 4872 }, { "epoch": 0.6880824625811918, "grad_norm": 3.548955463691675, "learning_rate": 1.798735397923513e-05, "loss": 0.9469, "step": 4873 }, { "epoch": 0.6882236656311777, "grad_norm": 3.7711465742839034, "learning_rate": 1.798643664984561e-05, "loss": 1.1507, "step": 4874 }, { "epoch": 0.6883648686811635, "grad_norm": 3.0601005189804953, "learning_rate": 1.7985519134854755e-05, "loss": 0.9641, "step": 4875 }, { "epoch": 0.6885060717311494, "grad_norm": 3.472981350550807, "learning_rate": 1.798460143428388e-05, "loss": 1.0574, "step": 4876 }, { "epoch": 0.6886472747811353, "grad_norm": 3.0069917516150113, "learning_rate": 1.7983683548154318e-05, "loss": 0.8841, "step": 4877 }, { "epoch": 0.6887884778311212, "grad_norm": 3.772836179279812, "learning_rate": 1.7982765476487398e-05, "loss": 1.0526, "step": 4878 }, { "epoch": 0.6889296808811071, "grad_norm": 3.252576219679041, "learning_rate": 1.7981847219304456e-05, "loss": 0.972, "step": 4879 }, { "epoch": 0.689070883931093, "grad_norm": 3.5598209124002707, "learning_rate": 1.7980928776626833e-05, "loss": 1.0249, "step": 4880 }, { "epoch": 0.6892120869810788, "grad_norm": 3.6909074570822145, "learning_rate": 1.7980010148475868e-05, "loss": 1.158, "step": 4881 }, { "epoch": 0.6893532900310647, "grad_norm": 3.8880888594016634, "learning_rate": 1.7979091334872915e-05, "loss": 1.3167, "step": 4882 }, { "epoch": 0.6894944930810506, "grad_norm": 3.333804720538366, "learning_rate": 1.7978172335839324e-05, "loss": 1.0112, "step": 4883 }, { "epoch": 0.6896356961310365, "grad_norm": 3.1728299871942185, "learning_rate": 1.797725315139646e-05, "loss": 0.8991, "step": 4884 }, { "epoch": 0.6897768991810224, "grad_norm": 4.031043572413333, "learning_rate": 1.7976333781565672e-05, "loss": 0.8381, "step": 4885 }, { "epoch": 0.6899181022310081, "grad_norm": 3.37456968412429, "learning_rate": 1.7975414226368334e-05, "loss": 0.9592, "step": 4886 }, { "epoch": 0.690059305280994, "grad_norm": 3.902029201000711, "learning_rate": 1.7974494485825812e-05, "loss": 1.0123, "step": 4887 }, { "epoch": 0.6902005083309799, "grad_norm": 3.518613387923976, "learning_rate": 1.797357455995948e-05, "loss": 0.9706, "step": 4888 }, { "epoch": 0.6903417113809658, "grad_norm": 3.598395742373168, "learning_rate": 1.7972654448790723e-05, "loss": 0.967, "step": 4889 }, { "epoch": 0.6904829144309517, "grad_norm": 4.119492894180726, "learning_rate": 1.7971734152340918e-05, "loss": 1.2623, "step": 4890 }, { "epoch": 0.6906241174809375, "grad_norm": 3.9999078769706573, "learning_rate": 1.7970813670631454e-05, "loss": 1.0526, "step": 4891 }, { "epoch": 0.6907653205309234, "grad_norm": 3.7720480674761525, "learning_rate": 1.7969893003683726e-05, "loss": 0.9604, "step": 4892 }, { "epoch": 0.6909065235809093, "grad_norm": 3.1130275854445393, "learning_rate": 1.796897215151912e-05, "loss": 0.8214, "step": 4893 }, { "epoch": 0.6910477266308952, "grad_norm": 3.6919640978968475, "learning_rate": 1.7968051114159046e-05, "loss": 1.044, "step": 4894 }, { "epoch": 0.6911889296808811, "grad_norm": 3.5024666374190843, "learning_rate": 1.7967129891624907e-05, "loss": 0.9204, "step": 4895 }, { "epoch": 0.691330132730867, "grad_norm": 4.039641906444027, "learning_rate": 1.7966208483938108e-05, "loss": 1.2647, "step": 4896 }, { "epoch": 0.6914713357808528, "grad_norm": 3.1430245958419274, "learning_rate": 1.7965286891120064e-05, "loss": 0.7836, "step": 4897 }, { "epoch": 0.6916125388308387, "grad_norm": 3.0189861197600996, "learning_rate": 1.7964365113192195e-05, "loss": 0.9178, "step": 4898 }, { "epoch": 0.6917537418808246, "grad_norm": 3.778769813900629, "learning_rate": 1.7963443150175915e-05, "loss": 1.1918, "step": 4899 }, { "epoch": 0.6918949449308105, "grad_norm": 3.46060760335112, "learning_rate": 1.796252100209266e-05, "loss": 0.8881, "step": 4900 }, { "epoch": 0.6920361479807964, "grad_norm": 3.791675125173452, "learning_rate": 1.796159866896386e-05, "loss": 1.0193, "step": 4901 }, { "epoch": 0.6921773510307823, "grad_norm": 3.618495650277027, "learning_rate": 1.7960676150810937e-05, "loss": 0.9839, "step": 4902 }, { "epoch": 0.6923185540807681, "grad_norm": 3.5567501844874982, "learning_rate": 1.795975344765534e-05, "loss": 1.1792, "step": 4903 }, { "epoch": 0.692459757130754, "grad_norm": 3.882062105729809, "learning_rate": 1.7958830559518513e-05, "loss": 1.1627, "step": 4904 }, { "epoch": 0.6926009601807399, "grad_norm": 3.1158292523688784, "learning_rate": 1.7957907486421896e-05, "loss": 0.798, "step": 4905 }, { "epoch": 0.6927421632307258, "grad_norm": 3.6847868615351644, "learning_rate": 1.795698422838695e-05, "loss": 1.0689, "step": 4906 }, { "epoch": 0.6928833662807117, "grad_norm": 3.248669508406251, "learning_rate": 1.7956060785435125e-05, "loss": 1.0676, "step": 4907 }, { "epoch": 0.6930245693306976, "grad_norm": 3.283982429965398, "learning_rate": 1.7955137157587886e-05, "loss": 1.0636, "step": 4908 }, { "epoch": 0.6931657723806834, "grad_norm": 3.6653384614291715, "learning_rate": 1.7954213344866694e-05, "loss": 1.2765, "step": 4909 }, { "epoch": 0.6933069754306693, "grad_norm": 3.2693588718239797, "learning_rate": 1.795328934729302e-05, "loss": 0.9708, "step": 4910 }, { "epoch": 0.6934481784806552, "grad_norm": 3.2894821939712315, "learning_rate": 1.795236516488833e-05, "loss": 1.0232, "step": 4911 }, { "epoch": 0.6935893815306411, "grad_norm": 5.904686601776154, "learning_rate": 1.7951440797674117e-05, "loss": 1.1616, "step": 4912 }, { "epoch": 0.693730584580627, "grad_norm": 3.20207256483985, "learning_rate": 1.7950516245671848e-05, "loss": 0.9678, "step": 4913 }, { "epoch": 0.6938717876306129, "grad_norm": 3.573764548442737, "learning_rate": 1.7949591508903016e-05, "loss": 1.1044, "step": 4914 }, { "epoch": 0.6940129906805987, "grad_norm": 3.5595335588285884, "learning_rate": 1.7948666587389112e-05, "loss": 0.9634, "step": 4915 }, { "epoch": 0.6941541937305846, "grad_norm": 3.1314588481850394, "learning_rate": 1.7947741481151628e-05, "loss": 0.8725, "step": 4916 }, { "epoch": 0.6942953967805705, "grad_norm": 3.975707036517665, "learning_rate": 1.7946816190212063e-05, "loss": 1.1297, "step": 4917 }, { "epoch": 0.6944365998305564, "grad_norm": 3.354867228416297, "learning_rate": 1.7945890714591926e-05, "loss": 1.0578, "step": 4918 }, { "epoch": 0.6945778028805423, "grad_norm": 3.8502550493035628, "learning_rate": 1.7944965054312718e-05, "loss": 0.9862, "step": 4919 }, { "epoch": 0.694719005930528, "grad_norm": 4.158313788612613, "learning_rate": 1.794403920939595e-05, "loss": 1.1441, "step": 4920 }, { "epoch": 0.6948602089805139, "grad_norm": 3.622695532047422, "learning_rate": 1.7943113179863147e-05, "loss": 1.0328, "step": 4921 }, { "epoch": 0.6950014120304998, "grad_norm": 4.7711040374408205, "learning_rate": 1.794218696573582e-05, "loss": 1.3909, "step": 4922 }, { "epoch": 0.6951426150804857, "grad_norm": 4.129471713708771, "learning_rate": 1.7941260567035498e-05, "loss": 0.973, "step": 4923 }, { "epoch": 0.6952838181304716, "grad_norm": 4.097047556802675, "learning_rate": 1.7940333983783715e-05, "loss": 1.2203, "step": 4924 }, { "epoch": 0.6954250211804575, "grad_norm": 3.4975179725561465, "learning_rate": 1.7939407216001993e-05, "loss": 1.0387, "step": 4925 }, { "epoch": 0.6955662242304433, "grad_norm": 3.809718589561135, "learning_rate": 1.793848026371188e-05, "loss": 1.0785, "step": 4926 }, { "epoch": 0.6957074272804292, "grad_norm": 3.6016494986125687, "learning_rate": 1.793755312693491e-05, "loss": 0.9856, "step": 4927 }, { "epoch": 0.6958486303304151, "grad_norm": 3.76952232717789, "learning_rate": 1.793662580569264e-05, "loss": 0.9489, "step": 4928 }, { "epoch": 0.695989833380401, "grad_norm": 3.822543454458155, "learning_rate": 1.793569830000661e-05, "loss": 1.0408, "step": 4929 }, { "epoch": 0.6961310364303869, "grad_norm": 3.95015309461034, "learning_rate": 1.7934770609898377e-05, "loss": 1.1794, "step": 4930 }, { "epoch": 0.6962722394803728, "grad_norm": 3.8317423000082336, "learning_rate": 1.7933842735389505e-05, "loss": 1.0468, "step": 4931 }, { "epoch": 0.6964134425303586, "grad_norm": 3.437606999973978, "learning_rate": 1.7932914676501553e-05, "loss": 1.02, "step": 4932 }, { "epoch": 0.6965546455803445, "grad_norm": 3.458834077509404, "learning_rate": 1.7931986433256088e-05, "loss": 1.1071, "step": 4933 }, { "epoch": 0.6966958486303304, "grad_norm": 3.4634637098113426, "learning_rate": 1.7931058005674687e-05, "loss": 0.8652, "step": 4934 }, { "epoch": 0.6968370516803163, "grad_norm": 3.3396845596011304, "learning_rate": 1.793012939377892e-05, "loss": 0.8847, "step": 4935 }, { "epoch": 0.6969782547303022, "grad_norm": 4.933366992658467, "learning_rate": 1.7929200597590375e-05, "loss": 1.3717, "step": 4936 }, { "epoch": 0.697119457780288, "grad_norm": 3.9696486945254668, "learning_rate": 1.7928271617130628e-05, "loss": 1.0222, "step": 4937 }, { "epoch": 0.6972606608302739, "grad_norm": 3.6088150640999417, "learning_rate": 1.7927342452421275e-05, "loss": 1.0508, "step": 4938 }, { "epoch": 0.6974018638802598, "grad_norm": 4.641155354114272, "learning_rate": 1.7926413103483903e-05, "loss": 1.0084, "step": 4939 }, { "epoch": 0.6975430669302457, "grad_norm": 3.760359463129475, "learning_rate": 1.7925483570340118e-05, "loss": 1.1653, "step": 4940 }, { "epoch": 0.6976842699802316, "grad_norm": 3.9725904150012608, "learning_rate": 1.792455385301152e-05, "loss": 1.1374, "step": 4941 }, { "epoch": 0.6978254730302175, "grad_norm": 3.1563758725642237, "learning_rate": 1.7923623951519708e-05, "loss": 0.9656, "step": 4942 }, { "epoch": 0.6979666760802034, "grad_norm": 3.7690353501607183, "learning_rate": 1.79226938658863e-05, "loss": 1.0548, "step": 4943 }, { "epoch": 0.6981078791301892, "grad_norm": 4.060368879548283, "learning_rate": 1.7921763596132905e-05, "loss": 1.0353, "step": 4944 }, { "epoch": 0.6982490821801751, "grad_norm": 3.2512862030613716, "learning_rate": 1.7920833142281145e-05, "loss": 0.9495, "step": 4945 }, { "epoch": 0.698390285230161, "grad_norm": 4.037250239064881, "learning_rate": 1.7919902504352646e-05, "loss": 1.0587, "step": 4946 }, { "epoch": 0.6985314882801469, "grad_norm": 4.1272527856074825, "learning_rate": 1.7918971682369034e-05, "loss": 1.1974, "step": 4947 }, { "epoch": 0.6986726913301328, "grad_norm": 3.8741226736112573, "learning_rate": 1.791804067635194e-05, "loss": 0.9565, "step": 4948 }, { "epoch": 0.6988138943801186, "grad_norm": 3.837766735449058, "learning_rate": 1.7917109486322997e-05, "loss": 0.9729, "step": 4949 }, { "epoch": 0.6989550974301045, "grad_norm": 3.842350594919303, "learning_rate": 1.791617811230385e-05, "loss": 1.3002, "step": 4950 }, { "epoch": 0.6990963004800904, "grad_norm": 3.448583000639665, "learning_rate": 1.7915246554316145e-05, "loss": 0.9534, "step": 4951 }, { "epoch": 0.6992375035300763, "grad_norm": 3.639507876968727, "learning_rate": 1.7914314812381524e-05, "loss": 0.9045, "step": 4952 }, { "epoch": 0.6993787065800622, "grad_norm": 3.4731095447808484, "learning_rate": 1.7913382886521648e-05, "loss": 1.0376, "step": 4953 }, { "epoch": 0.699519909630048, "grad_norm": 3.1854397052921097, "learning_rate": 1.791245077675817e-05, "loss": 0.9268, "step": 4954 }, { "epoch": 0.6996611126800338, "grad_norm": 3.767447903031259, "learning_rate": 1.7911518483112752e-05, "loss": 0.9883, "step": 4955 }, { "epoch": 0.6998023157300197, "grad_norm": 3.6923455642089755, "learning_rate": 1.7910586005607063e-05, "loss": 0.8594, "step": 4956 }, { "epoch": 0.6999435187800056, "grad_norm": 3.6448247612937488, "learning_rate": 1.790965334426277e-05, "loss": 1.1476, "step": 4957 }, { "epoch": 0.7000847218299915, "grad_norm": 3.5810133998126537, "learning_rate": 1.7908720499101552e-05, "loss": 1.1233, "step": 4958 }, { "epoch": 0.7002259248799774, "grad_norm": 4.027853212120315, "learning_rate": 1.790778747014508e-05, "loss": 1.2545, "step": 4959 }, { "epoch": 0.7003671279299633, "grad_norm": 2.8741132307290016, "learning_rate": 1.7906854257415048e-05, "loss": 0.8527, "step": 4960 }, { "epoch": 0.7005083309799491, "grad_norm": 3.3094312860045125, "learning_rate": 1.7905920860933136e-05, "loss": 0.9097, "step": 4961 }, { "epoch": 0.700649534029935, "grad_norm": 4.3227834235897875, "learning_rate": 1.7904987280721037e-05, "loss": 1.1097, "step": 4962 }, { "epoch": 0.7007907370799209, "grad_norm": 4.319445616616094, "learning_rate": 1.7904053516800448e-05, "loss": 1.2249, "step": 4963 }, { "epoch": 0.7009319401299068, "grad_norm": 3.011031964933952, "learning_rate": 1.7903119569193066e-05, "loss": 0.8272, "step": 4964 }, { "epoch": 0.7010731431798927, "grad_norm": 3.5753962019663765, "learning_rate": 1.7902185437920603e-05, "loss": 1.0165, "step": 4965 }, { "epoch": 0.7012143462298785, "grad_norm": 3.8001370096272042, "learning_rate": 1.790125112300476e-05, "loss": 0.9125, "step": 4966 }, { "epoch": 0.7013555492798644, "grad_norm": 3.476174346380946, "learning_rate": 1.7900316624467254e-05, "loss": 0.9976, "step": 4967 }, { "epoch": 0.7014967523298503, "grad_norm": 3.6753109904495953, "learning_rate": 1.78993819423298e-05, "loss": 0.9658, "step": 4968 }, { "epoch": 0.7016379553798362, "grad_norm": 3.881491751430802, "learning_rate": 1.7898447076614123e-05, "loss": 0.9928, "step": 4969 }, { "epoch": 0.7017791584298221, "grad_norm": 3.2837016528548935, "learning_rate": 1.7897512027341945e-05, "loss": 0.8974, "step": 4970 }, { "epoch": 0.701920361479808, "grad_norm": 3.7316128013845646, "learning_rate": 1.7896576794535002e-05, "loss": 1.0198, "step": 4971 }, { "epoch": 0.7020615645297938, "grad_norm": 3.0379806003765197, "learning_rate": 1.789564137821502e-05, "loss": 0.9311, "step": 4972 }, { "epoch": 0.7022027675797797, "grad_norm": 3.8890201402248343, "learning_rate": 1.7894705778403746e-05, "loss": 1.2104, "step": 4973 }, { "epoch": 0.7023439706297656, "grad_norm": 3.3544549652210276, "learning_rate": 1.7893769995122916e-05, "loss": 0.8811, "step": 4974 }, { "epoch": 0.7024851736797515, "grad_norm": 3.670011825573082, "learning_rate": 1.7892834028394285e-05, "loss": 1.0226, "step": 4975 }, { "epoch": 0.7026263767297374, "grad_norm": 4.956363878007812, "learning_rate": 1.7891897878239595e-05, "loss": 1.2145, "step": 4976 }, { "epoch": 0.7027675797797233, "grad_norm": 4.070265763567169, "learning_rate": 1.789096154468061e-05, "loss": 1.4238, "step": 4977 }, { "epoch": 0.7029087828297091, "grad_norm": 3.687184915341469, "learning_rate": 1.7890025027739084e-05, "loss": 0.8404, "step": 4978 }, { "epoch": 0.703049985879695, "grad_norm": 3.407299740962904, "learning_rate": 1.7889088327436783e-05, "loss": 1.0777, "step": 4979 }, { "epoch": 0.7031911889296809, "grad_norm": 3.3293450041563113, "learning_rate": 1.7888151443795478e-05, "loss": 0.8899, "step": 4980 }, { "epoch": 0.7033323919796668, "grad_norm": 3.407754301552625, "learning_rate": 1.788721437683694e-05, "loss": 0.949, "step": 4981 }, { "epoch": 0.7034735950296527, "grad_norm": 3.5328900152966285, "learning_rate": 1.7886277126582947e-05, "loss": 1.1086, "step": 4982 }, { "epoch": 0.7036147980796386, "grad_norm": 3.296893849798958, "learning_rate": 1.7885339693055276e-05, "loss": 0.9262, "step": 4983 }, { "epoch": 0.7037560011296244, "grad_norm": 3.5264997601460455, "learning_rate": 1.7884402076275723e-05, "loss": 1.1062, "step": 4984 }, { "epoch": 0.7038972041796103, "grad_norm": 3.157365329339565, "learning_rate": 1.7883464276266064e-05, "loss": 0.9598, "step": 4985 }, { "epoch": 0.7040384072295962, "grad_norm": 3.517264938511991, "learning_rate": 1.7882526293048102e-05, "loss": 1.1385, "step": 4986 }, { "epoch": 0.7041796102795821, "grad_norm": 3.556560858212387, "learning_rate": 1.7881588126643632e-05, "loss": 0.9751, "step": 4987 }, { "epoch": 0.704320813329568, "grad_norm": 3.8115058338522463, "learning_rate": 1.788064977707446e-05, "loss": 1.0367, "step": 4988 }, { "epoch": 0.7044620163795537, "grad_norm": 3.5583813298827756, "learning_rate": 1.787971124436239e-05, "loss": 1.1144, "step": 4989 }, { "epoch": 0.7046032194295396, "grad_norm": 3.791512971049549, "learning_rate": 1.7878772528529232e-05, "loss": 1.2657, "step": 4990 }, { "epoch": 0.7047444224795255, "grad_norm": 3.8455394478153235, "learning_rate": 1.7877833629596805e-05, "loss": 1.0708, "step": 4991 }, { "epoch": 0.7048856255295114, "grad_norm": 3.971044506253799, "learning_rate": 1.7876894547586924e-05, "loss": 1.2719, "step": 4992 }, { "epoch": 0.7050268285794973, "grad_norm": 4.185304546464157, "learning_rate": 1.787595528252142e-05, "loss": 1.4954, "step": 4993 }, { "epoch": 0.7051680316294832, "grad_norm": 2.917869695438468, "learning_rate": 1.7875015834422113e-05, "loss": 0.9402, "step": 4994 }, { "epoch": 0.705309234679469, "grad_norm": 4.054121132892206, "learning_rate": 1.787407620331084e-05, "loss": 1.1962, "step": 4995 }, { "epoch": 0.7054504377294549, "grad_norm": 3.1935513773725073, "learning_rate": 1.7873136389209435e-05, "loss": 0.9174, "step": 4996 }, { "epoch": 0.7055916407794408, "grad_norm": 5.998002559019988, "learning_rate": 1.7872196392139745e-05, "loss": 1.3476, "step": 4997 }, { "epoch": 0.7057328438294267, "grad_norm": 2.8882888103723654, "learning_rate": 1.7871256212123605e-05, "loss": 0.8645, "step": 4998 }, { "epoch": 0.7058740468794126, "grad_norm": 3.430810723438634, "learning_rate": 1.7870315849182874e-05, "loss": 1.0197, "step": 4999 }, { "epoch": 0.7060152499293985, "grad_norm": 3.5172910882865307, "learning_rate": 1.78693753033394e-05, "loss": 1.0992, "step": 5000 }, { "epoch": 0.7061564529793843, "grad_norm": 4.414330373432635, "learning_rate": 1.7868434574615042e-05, "loss": 1.0446, "step": 5001 }, { "epoch": 0.7062976560293702, "grad_norm": 3.9425613439416622, "learning_rate": 1.7867493663031664e-05, "loss": 1.3116, "step": 5002 }, { "epoch": 0.7064388590793561, "grad_norm": 3.934442679748222, "learning_rate": 1.7866552568611132e-05, "loss": 1.0943, "step": 5003 }, { "epoch": 0.706580062129342, "grad_norm": 3.9271576494670213, "learning_rate": 1.7865611291375313e-05, "loss": 1.1351, "step": 5004 }, { "epoch": 0.7067212651793279, "grad_norm": 3.4389659743332732, "learning_rate": 1.7864669831346084e-05, "loss": 0.8963, "step": 5005 }, { "epoch": 0.7068624682293138, "grad_norm": 4.212484468729581, "learning_rate": 1.7863728188545326e-05, "loss": 1.0828, "step": 5006 }, { "epoch": 0.7070036712792996, "grad_norm": 4.963815904634519, "learning_rate": 1.7862786362994922e-05, "loss": 1.3729, "step": 5007 }, { "epoch": 0.7071448743292855, "grad_norm": 3.357223101408097, "learning_rate": 1.7861844354716757e-05, "loss": 1.1, "step": 5008 }, { "epoch": 0.7072860773792714, "grad_norm": 3.505451631658313, "learning_rate": 1.7860902163732725e-05, "loss": 1.0501, "step": 5009 }, { "epoch": 0.7074272804292573, "grad_norm": 3.6379377163761486, "learning_rate": 1.7859959790064723e-05, "loss": 1.145, "step": 5010 }, { "epoch": 0.7075684834792432, "grad_norm": 3.488825591389287, "learning_rate": 1.785901723373465e-05, "loss": 0.9841, "step": 5011 }, { "epoch": 0.707709686529229, "grad_norm": 3.9156291433782573, "learning_rate": 1.7858074494764406e-05, "loss": 1.0521, "step": 5012 }, { "epoch": 0.7078508895792149, "grad_norm": 4.380465896517571, "learning_rate": 1.7857131573175906e-05, "loss": 1.0925, "step": 5013 }, { "epoch": 0.7079920926292008, "grad_norm": 4.086376363375203, "learning_rate": 1.7856188468991064e-05, "loss": 1.1517, "step": 5014 }, { "epoch": 0.7081332956791867, "grad_norm": 3.6495049561481427, "learning_rate": 1.7855245182231794e-05, "loss": 0.906, "step": 5015 }, { "epoch": 0.7082744987291726, "grad_norm": 3.635346614516239, "learning_rate": 1.785430171292002e-05, "loss": 1.1495, "step": 5016 }, { "epoch": 0.7084157017791585, "grad_norm": 3.0679473220294065, "learning_rate": 1.785335806107766e-05, "loss": 0.852, "step": 5017 }, { "epoch": 0.7085569048291444, "grad_norm": 3.2887875834953566, "learning_rate": 1.7852414226726654e-05, "loss": 1.0946, "step": 5018 }, { "epoch": 0.7086981078791302, "grad_norm": 3.1590412233372263, "learning_rate": 1.7851470209888938e-05, "loss": 1.006, "step": 5019 }, { "epoch": 0.7088393109291161, "grad_norm": 3.057353740719704, "learning_rate": 1.7850526010586437e-05, "loss": 0.7995, "step": 5020 }, { "epoch": 0.708980513979102, "grad_norm": 2.941570959866705, "learning_rate": 1.7849581628841106e-05, "loss": 0.7424, "step": 5021 }, { "epoch": 0.7091217170290879, "grad_norm": 3.415629161590476, "learning_rate": 1.7848637064674887e-05, "loss": 0.8821, "step": 5022 }, { "epoch": 0.7092629200790737, "grad_norm": 3.351485111044392, "learning_rate": 1.7847692318109732e-05, "loss": 0.8917, "step": 5023 }, { "epoch": 0.7094041231290595, "grad_norm": 3.2219728283150033, "learning_rate": 1.78467473891676e-05, "loss": 0.8681, "step": 5024 }, { "epoch": 0.7095453261790454, "grad_norm": 4.686022726707394, "learning_rate": 1.7845802277870442e-05, "loss": 1.4153, "step": 5025 }, { "epoch": 0.7096865292290313, "grad_norm": 3.587896315770786, "learning_rate": 1.784485698424023e-05, "loss": 0.9595, "step": 5026 }, { "epoch": 0.7098277322790172, "grad_norm": 4.563153552415818, "learning_rate": 1.784391150829893e-05, "loss": 1.1391, "step": 5027 }, { "epoch": 0.7099689353290031, "grad_norm": 3.99692112190469, "learning_rate": 1.784296585006851e-05, "loss": 1.2172, "step": 5028 }, { "epoch": 0.710110138378989, "grad_norm": 4.741025411681182, "learning_rate": 1.7842020009570955e-05, "loss": 1.4888, "step": 5029 }, { "epoch": 0.7102513414289748, "grad_norm": 3.7659797914817594, "learning_rate": 1.784107398682824e-05, "loss": 1.1441, "step": 5030 }, { "epoch": 0.7103925444789607, "grad_norm": 3.534957265534807, "learning_rate": 1.7840127781862354e-05, "loss": 1.0008, "step": 5031 }, { "epoch": 0.7105337475289466, "grad_norm": 3.524731622798178, "learning_rate": 1.7839181394695285e-05, "loss": 0.8784, "step": 5032 }, { "epoch": 0.7106749505789325, "grad_norm": 2.9918341177888887, "learning_rate": 1.7838234825349023e-05, "loss": 0.971, "step": 5033 }, { "epoch": 0.7108161536289184, "grad_norm": 3.190053295666956, "learning_rate": 1.7837288073845566e-05, "loss": 0.8066, "step": 5034 }, { "epoch": 0.7109573566789043, "grad_norm": 3.6128173505441077, "learning_rate": 1.7836341140206924e-05, "loss": 1.1073, "step": 5035 }, { "epoch": 0.7110985597288901, "grad_norm": 3.2123705674930005, "learning_rate": 1.7835394024455097e-05, "loss": 0.9778, "step": 5036 }, { "epoch": 0.711239762778876, "grad_norm": 3.4135099829700737, "learning_rate": 1.78344467266121e-05, "loss": 1.0221, "step": 5037 }, { "epoch": 0.7113809658288619, "grad_norm": 3.2736195231841223, "learning_rate": 1.783349924669994e-05, "loss": 0.9635, "step": 5038 }, { "epoch": 0.7115221688788478, "grad_norm": 3.2241616495603385, "learning_rate": 1.783255158474064e-05, "loss": 0.9324, "step": 5039 }, { "epoch": 0.7116633719288337, "grad_norm": 4.154404588747539, "learning_rate": 1.7831603740756223e-05, "loss": 1.1938, "step": 5040 }, { "epoch": 0.7118045749788195, "grad_norm": 3.3759124875802358, "learning_rate": 1.7830655714768717e-05, "loss": 0.9166, "step": 5041 }, { "epoch": 0.7119457780288054, "grad_norm": 5.793011529970844, "learning_rate": 1.7829707506800157e-05, "loss": 1.1978, "step": 5042 }, { "epoch": 0.7120869810787913, "grad_norm": 3.522288964839806, "learning_rate": 1.7828759116872575e-05, "loss": 0.9847, "step": 5043 }, { "epoch": 0.7122281841287772, "grad_norm": 3.7627963890101475, "learning_rate": 1.782781054500801e-05, "loss": 1.1176, "step": 5044 }, { "epoch": 0.7123693871787631, "grad_norm": 3.4483716001159066, "learning_rate": 1.7826861791228516e-05, "loss": 0.9441, "step": 5045 }, { "epoch": 0.712510590228749, "grad_norm": 3.541461830271571, "learning_rate": 1.782591285555613e-05, "loss": 1.0603, "step": 5046 }, { "epoch": 0.7126517932787348, "grad_norm": 3.8626755324588897, "learning_rate": 1.7824963738012907e-05, "loss": 1.1205, "step": 5047 }, { "epoch": 0.7127929963287207, "grad_norm": 3.5992067680540565, "learning_rate": 1.7824014438620906e-05, "loss": 0.83, "step": 5048 }, { "epoch": 0.7129341993787066, "grad_norm": 3.1195219912610805, "learning_rate": 1.782306495740219e-05, "loss": 0.8612, "step": 5049 }, { "epoch": 0.7130754024286925, "grad_norm": 3.7175836391658903, "learning_rate": 1.7822115294378824e-05, "loss": 1.3601, "step": 5050 }, { "epoch": 0.7132166054786784, "grad_norm": 3.7838242310272285, "learning_rate": 1.7821165449572873e-05, "loss": 1.1783, "step": 5051 }, { "epoch": 0.7133578085286643, "grad_norm": 3.1669302770651684, "learning_rate": 1.7820215423006418e-05, "loss": 0.9582, "step": 5052 }, { "epoch": 0.7134990115786501, "grad_norm": 4.404494648621697, "learning_rate": 1.7819265214701532e-05, "loss": 1.13, "step": 5053 }, { "epoch": 0.713640214628636, "grad_norm": 5.172759099502781, "learning_rate": 1.78183148246803e-05, "loss": 1.1508, "step": 5054 }, { "epoch": 0.7137814176786219, "grad_norm": 3.2643971305389443, "learning_rate": 1.781736425296481e-05, "loss": 0.9786, "step": 5055 }, { "epoch": 0.7139226207286078, "grad_norm": 3.671970175994379, "learning_rate": 1.7816413499577146e-05, "loss": 1.2046, "step": 5056 }, { "epoch": 0.7140638237785936, "grad_norm": 3.259658493142142, "learning_rate": 1.781546256453941e-05, "loss": 0.8722, "step": 5057 }, { "epoch": 0.7142050268285794, "grad_norm": 3.291482008584362, "learning_rate": 1.78145114478737e-05, "loss": 0.9473, "step": 5058 }, { "epoch": 0.7143462298785653, "grad_norm": 3.75874963431193, "learning_rate": 1.781356014960212e-05, "loss": 1.1589, "step": 5059 }, { "epoch": 0.7144874329285512, "grad_norm": 3.442225654403889, "learning_rate": 1.7812608669746774e-05, "loss": 0.9354, "step": 5060 }, { "epoch": 0.7146286359785371, "grad_norm": 3.284929141986392, "learning_rate": 1.7811657008329776e-05, "loss": 1.0478, "step": 5061 }, { "epoch": 0.714769839028523, "grad_norm": 3.6133507544518886, "learning_rate": 1.7810705165373245e-05, "loss": 0.9145, "step": 5062 }, { "epoch": 0.7149110420785089, "grad_norm": 3.218780051137876, "learning_rate": 1.78097531408993e-05, "loss": 0.8709, "step": 5063 }, { "epoch": 0.7150522451284947, "grad_norm": 3.558911364493531, "learning_rate": 1.7808800934930062e-05, "loss": 1.0947, "step": 5064 }, { "epoch": 0.7151934481784806, "grad_norm": 3.5387865257996407, "learning_rate": 1.7807848547487664e-05, "loss": 1.1608, "step": 5065 }, { "epoch": 0.7153346512284665, "grad_norm": 3.8036642634771423, "learning_rate": 1.7806895978594237e-05, "loss": 1.1467, "step": 5066 }, { "epoch": 0.7154758542784524, "grad_norm": 3.458201833981553, "learning_rate": 1.780594322827192e-05, "loss": 0.9128, "step": 5067 }, { "epoch": 0.7156170573284383, "grad_norm": 2.9247954961783695, "learning_rate": 1.7804990296542856e-05, "loss": 0.8129, "step": 5068 }, { "epoch": 0.7157582603784242, "grad_norm": 3.881738686810293, "learning_rate": 1.7804037183429185e-05, "loss": 1.1715, "step": 5069 }, { "epoch": 0.71589946342841, "grad_norm": 3.8123525999979977, "learning_rate": 1.7803083888953058e-05, "loss": 1.1961, "step": 5070 }, { "epoch": 0.7160406664783959, "grad_norm": 3.636348554253174, "learning_rate": 1.7802130413136636e-05, "loss": 1.093, "step": 5071 }, { "epoch": 0.7161818695283818, "grad_norm": 2.995315705611547, "learning_rate": 1.780117675600207e-05, "loss": 0.8119, "step": 5072 }, { "epoch": 0.7163230725783677, "grad_norm": 3.1185258477851607, "learning_rate": 1.7800222917571526e-05, "loss": 0.7732, "step": 5073 }, { "epoch": 0.7164642756283536, "grad_norm": 3.0746745441981016, "learning_rate": 1.779926889786717e-05, "loss": 0.7486, "step": 5074 }, { "epoch": 0.7166054786783395, "grad_norm": 4.174964883702475, "learning_rate": 1.779831469691117e-05, "loss": 1.2721, "step": 5075 }, { "epoch": 0.7167466817283253, "grad_norm": 3.1536227369832046, "learning_rate": 1.7797360314725707e-05, "loss": 0.9134, "step": 5076 }, { "epoch": 0.7168878847783112, "grad_norm": 3.2854668208042233, "learning_rate": 1.779640575133296e-05, "loss": 0.8518, "step": 5077 }, { "epoch": 0.7170290878282971, "grad_norm": 3.4837145114725363, "learning_rate": 1.779545100675511e-05, "loss": 1.1309, "step": 5078 }, { "epoch": 0.717170290878283, "grad_norm": 3.7979437586953613, "learning_rate": 1.779449608101434e-05, "loss": 1.2562, "step": 5079 }, { "epoch": 0.7173114939282689, "grad_norm": 4.0271371633795345, "learning_rate": 1.779354097413285e-05, "loss": 1.2333, "step": 5080 }, { "epoch": 0.7174526969782548, "grad_norm": 3.6100887282124137, "learning_rate": 1.7792585686132837e-05, "loss": 1.0177, "step": 5081 }, { "epoch": 0.7175939000282406, "grad_norm": 3.9297868860480603, "learning_rate": 1.7791630217036492e-05, "loss": 1.0506, "step": 5082 }, { "epoch": 0.7177351030782265, "grad_norm": 3.2609509996038835, "learning_rate": 1.779067456686603e-05, "loss": 1.0649, "step": 5083 }, { "epoch": 0.7178763061282124, "grad_norm": 3.5556084944592437, "learning_rate": 1.7789718735643655e-05, "loss": 1.162, "step": 5084 }, { "epoch": 0.7180175091781983, "grad_norm": 3.2958968160034234, "learning_rate": 1.778876272339158e-05, "loss": 0.902, "step": 5085 }, { "epoch": 0.7181587122281842, "grad_norm": 3.071537562898388, "learning_rate": 1.7787806530132022e-05, "loss": 0.8784, "step": 5086 }, { "epoch": 0.71829991527817, "grad_norm": 3.661685776591576, "learning_rate": 1.7786850155887206e-05, "loss": 1.0425, "step": 5087 }, { "epoch": 0.7184411183281559, "grad_norm": 3.686946783694264, "learning_rate": 1.7785893600679353e-05, "loss": 0.9308, "step": 5088 }, { "epoch": 0.7185823213781418, "grad_norm": 2.9861377903476454, "learning_rate": 1.7784936864530698e-05, "loss": 0.9348, "step": 5089 }, { "epoch": 0.7187235244281277, "grad_norm": 3.5021656173929014, "learning_rate": 1.778397994746347e-05, "loss": 1.019, "step": 5090 }, { "epoch": 0.7188647274781135, "grad_norm": 3.9235734904843844, "learning_rate": 1.778302284949991e-05, "loss": 1.2335, "step": 5091 }, { "epoch": 0.7190059305280994, "grad_norm": 3.709096823737312, "learning_rate": 1.7782065570662263e-05, "loss": 1.0457, "step": 5092 }, { "epoch": 0.7191471335780852, "grad_norm": 4.607491061376305, "learning_rate": 1.7781108110972768e-05, "loss": 1.1687, "step": 5093 }, { "epoch": 0.7192883366280711, "grad_norm": 3.501245519790058, "learning_rate": 1.7780150470453682e-05, "loss": 0.8517, "step": 5094 }, { "epoch": 0.719429539678057, "grad_norm": 3.9407611820845374, "learning_rate": 1.7779192649127262e-05, "loss": 1.2203, "step": 5095 }, { "epoch": 0.7195707427280429, "grad_norm": 3.8159200515798406, "learning_rate": 1.777823464701576e-05, "loss": 1.0593, "step": 5096 }, { "epoch": 0.7197119457780288, "grad_norm": 4.041044550350458, "learning_rate": 1.777727646414145e-05, "loss": 1.1471, "step": 5097 }, { "epoch": 0.7198531488280147, "grad_norm": 3.9478324406004552, "learning_rate": 1.777631810052659e-05, "loss": 1.0179, "step": 5098 }, { "epoch": 0.7199943518780005, "grad_norm": 3.3538356267017724, "learning_rate": 1.7775359556193455e-05, "loss": 0.8945, "step": 5099 }, { "epoch": 0.7201355549279864, "grad_norm": 3.338922118068307, "learning_rate": 1.777440083116432e-05, "loss": 0.8278, "step": 5100 }, { "epoch": 0.7202767579779723, "grad_norm": 3.8363885656517036, "learning_rate": 1.7773441925461473e-05, "loss": 1.0449, "step": 5101 }, { "epoch": 0.7204179610279582, "grad_norm": 3.1603997632519425, "learning_rate": 1.777248283910719e-05, "loss": 0.8904, "step": 5102 }, { "epoch": 0.7205591640779441, "grad_norm": 3.396208901937564, "learning_rate": 1.7771523572123763e-05, "loss": 1.0726, "step": 5103 }, { "epoch": 0.72070036712793, "grad_norm": 2.9810037967954126, "learning_rate": 1.777056412453348e-05, "loss": 0.8956, "step": 5104 }, { "epoch": 0.7208415701779158, "grad_norm": 3.503331931999316, "learning_rate": 1.776960449635865e-05, "loss": 0.9719, "step": 5105 }, { "epoch": 0.7209827732279017, "grad_norm": 3.4494466982548366, "learning_rate": 1.776864468762156e-05, "loss": 1.1753, "step": 5106 }, { "epoch": 0.7211239762778876, "grad_norm": 3.726840993206747, "learning_rate": 1.776768469834453e-05, "loss": 1.0176, "step": 5107 }, { "epoch": 0.7212651793278735, "grad_norm": 3.118234141425858, "learning_rate": 1.7766724528549856e-05, "loss": 0.9714, "step": 5108 }, { "epoch": 0.7214063823778594, "grad_norm": 3.124577026653579, "learning_rate": 1.7765764178259863e-05, "loss": 0.9835, "step": 5109 }, { "epoch": 0.7215475854278453, "grad_norm": 3.463917223470069, "learning_rate": 1.776480364749686e-05, "loss": 0.9261, "step": 5110 }, { "epoch": 0.7216887884778311, "grad_norm": 4.960275524805102, "learning_rate": 1.7763842936283175e-05, "loss": 1.3761, "step": 5111 }, { "epoch": 0.721829991527817, "grad_norm": 3.7958156803643153, "learning_rate": 1.7762882044641133e-05, "loss": 1.0542, "step": 5112 }, { "epoch": 0.7219711945778029, "grad_norm": 3.812607035413126, "learning_rate": 1.7761920972593064e-05, "loss": 1.2381, "step": 5113 }, { "epoch": 0.7221123976277888, "grad_norm": 3.923127410259805, "learning_rate": 1.7760959720161306e-05, "loss": 1.1529, "step": 5114 }, { "epoch": 0.7222536006777747, "grad_norm": 3.294678110341738, "learning_rate": 1.7759998287368193e-05, "loss": 0.9417, "step": 5115 }, { "epoch": 0.7223948037277605, "grad_norm": 3.654121588182083, "learning_rate": 1.7759036674236074e-05, "loss": 1.1682, "step": 5116 }, { "epoch": 0.7225360067777464, "grad_norm": 3.464707382759429, "learning_rate": 1.775807488078729e-05, "loss": 1.0189, "step": 5117 }, { "epoch": 0.7226772098277323, "grad_norm": 3.75361908722945, "learning_rate": 1.77571129070442e-05, "loss": 1.0253, "step": 5118 }, { "epoch": 0.7228184128777182, "grad_norm": 4.309776150982811, "learning_rate": 1.7756150753029154e-05, "loss": 1.4386, "step": 5119 }, { "epoch": 0.7229596159277041, "grad_norm": 3.7745096111707466, "learning_rate": 1.7755188418764517e-05, "loss": 1.0582, "step": 5120 }, { "epoch": 0.72310081897769, "grad_norm": 3.262589332901122, "learning_rate": 1.7754225904272647e-05, "loss": 0.973, "step": 5121 }, { "epoch": 0.7232420220276758, "grad_norm": 3.94200604833333, "learning_rate": 1.7753263209575914e-05, "loss": 1.0729, "step": 5122 }, { "epoch": 0.7233832250776617, "grad_norm": 3.5112783456773453, "learning_rate": 1.7752300334696696e-05, "loss": 1.0494, "step": 5123 }, { "epoch": 0.7235244281276476, "grad_norm": 3.5473434477661607, "learning_rate": 1.7751337279657365e-05, "loss": 0.9607, "step": 5124 }, { "epoch": 0.7236656311776334, "grad_norm": 3.702047363538064, "learning_rate": 1.7750374044480306e-05, "loss": 1.0263, "step": 5125 }, { "epoch": 0.7238068342276193, "grad_norm": 3.190481682500292, "learning_rate": 1.77494106291879e-05, "loss": 0.798, "step": 5126 }, { "epoch": 0.7239480372776051, "grad_norm": 3.4267621930970407, "learning_rate": 1.7748447033802533e-05, "loss": 1.0998, "step": 5127 }, { "epoch": 0.724089240327591, "grad_norm": 3.5568968669454732, "learning_rate": 1.7747483258346607e-05, "loss": 1.1395, "step": 5128 }, { "epoch": 0.7242304433775769, "grad_norm": 3.13554059679409, "learning_rate": 1.7746519302842514e-05, "loss": 0.8204, "step": 5129 }, { "epoch": 0.7243716464275628, "grad_norm": 3.6085516122329127, "learning_rate": 1.774555516731266e-05, "loss": 0.9007, "step": 5130 }, { "epoch": 0.7245128494775487, "grad_norm": 3.1023955675044195, "learning_rate": 1.774459085177945e-05, "loss": 0.9034, "step": 5131 }, { "epoch": 0.7246540525275346, "grad_norm": 3.5405036681756137, "learning_rate": 1.7743626356265292e-05, "loss": 1.1103, "step": 5132 }, { "epoch": 0.7247952555775204, "grad_norm": 3.5052599388501537, "learning_rate": 1.77426616807926e-05, "loss": 1.2092, "step": 5133 }, { "epoch": 0.7249364586275063, "grad_norm": 3.952595911177927, "learning_rate": 1.7741696825383797e-05, "loss": 1.1712, "step": 5134 }, { "epoch": 0.7250776616774922, "grad_norm": 3.9611151880889244, "learning_rate": 1.77407317900613e-05, "loss": 1.1648, "step": 5135 }, { "epoch": 0.7252188647274781, "grad_norm": 3.2303050088664125, "learning_rate": 1.7739766574847542e-05, "loss": 0.9367, "step": 5136 }, { "epoch": 0.725360067777464, "grad_norm": 3.366954061334709, "learning_rate": 1.773880117976495e-05, "loss": 1.0085, "step": 5137 }, { "epoch": 0.7255012708274499, "grad_norm": 4.335838459237132, "learning_rate": 1.7737835604835962e-05, "loss": 1.0369, "step": 5138 }, { "epoch": 0.7256424738774357, "grad_norm": 3.926134625480553, "learning_rate": 1.7736869850083013e-05, "loss": 1.2109, "step": 5139 }, { "epoch": 0.7257836769274216, "grad_norm": 3.9139457218285965, "learning_rate": 1.7735903915528553e-05, "loss": 1.0184, "step": 5140 }, { "epoch": 0.7259248799774075, "grad_norm": 3.7784900087469144, "learning_rate": 1.7734937801195027e-05, "loss": 1.0831, "step": 5141 }, { "epoch": 0.7260660830273934, "grad_norm": 2.6121135854296518, "learning_rate": 1.7733971507104887e-05, "loss": 0.7419, "step": 5142 }, { "epoch": 0.7262072860773793, "grad_norm": 4.05245068669235, "learning_rate": 1.7733005033280587e-05, "loss": 1.1432, "step": 5143 }, { "epoch": 0.7263484891273652, "grad_norm": 3.7942933763605615, "learning_rate": 1.7732038379744592e-05, "loss": 1.0541, "step": 5144 }, { "epoch": 0.726489692177351, "grad_norm": 4.009094162179761, "learning_rate": 1.7731071546519364e-05, "loss": 1.2215, "step": 5145 }, { "epoch": 0.7266308952273369, "grad_norm": 3.8059071421581474, "learning_rate": 1.773010453362737e-05, "loss": 1.0558, "step": 5146 }, { "epoch": 0.7267720982773228, "grad_norm": 3.4347692515434023, "learning_rate": 1.7729137341091088e-05, "loss": 0.9663, "step": 5147 }, { "epoch": 0.7269133013273087, "grad_norm": 4.569768528993115, "learning_rate": 1.772816996893299e-05, "loss": 0.9832, "step": 5148 }, { "epoch": 0.7270545043772946, "grad_norm": 3.222475836378053, "learning_rate": 1.772720241717556e-05, "loss": 0.8673, "step": 5149 }, { "epoch": 0.7271957074272805, "grad_norm": 3.435753983737089, "learning_rate": 1.7726234685841283e-05, "loss": 0.7729, "step": 5150 }, { "epoch": 0.7273369104772663, "grad_norm": 3.255167032402089, "learning_rate": 1.772526677495265e-05, "loss": 1.0498, "step": 5151 }, { "epoch": 0.7274781135272522, "grad_norm": 3.6806673764563254, "learning_rate": 1.772429868453215e-05, "loss": 1.1301, "step": 5152 }, { "epoch": 0.7276193165772381, "grad_norm": 3.8892305824580853, "learning_rate": 1.7723330414602288e-05, "loss": 0.947, "step": 5153 }, { "epoch": 0.727760519627224, "grad_norm": 3.0963346839530925, "learning_rate": 1.772236196518556e-05, "loss": 0.9215, "step": 5154 }, { "epoch": 0.7279017226772099, "grad_norm": 2.933047855785967, "learning_rate": 1.7721393336304474e-05, "loss": 0.9151, "step": 5155 }, { "epoch": 0.7280429257271958, "grad_norm": 4.01678990634645, "learning_rate": 1.7720424527981545e-05, "loss": 0.9247, "step": 5156 }, { "epoch": 0.7281841287771816, "grad_norm": 3.402040162913891, "learning_rate": 1.7719455540239283e-05, "loss": 1.0262, "step": 5157 }, { "epoch": 0.7283253318271675, "grad_norm": 3.8477869156667377, "learning_rate": 1.7718486373100207e-05, "loss": 1.1099, "step": 5158 }, { "epoch": 0.7284665348771533, "grad_norm": 3.498431461414149, "learning_rate": 1.7717517026586844e-05, "loss": 0.9643, "step": 5159 }, { "epoch": 0.7286077379271392, "grad_norm": 3.5677650908186234, "learning_rate": 1.7716547500721715e-05, "loss": 0.8665, "step": 5160 }, { "epoch": 0.7287489409771251, "grad_norm": 3.8930545598142245, "learning_rate": 1.7715577795527355e-05, "loss": 0.8956, "step": 5161 }, { "epoch": 0.7288901440271109, "grad_norm": 3.2216476353570904, "learning_rate": 1.77146079110263e-05, "loss": 0.7613, "step": 5162 }, { "epoch": 0.7290313470770968, "grad_norm": 4.020711676814464, "learning_rate": 1.771363784724109e-05, "loss": 1.0128, "step": 5163 }, { "epoch": 0.7291725501270827, "grad_norm": 3.137472657516944, "learning_rate": 1.7712667604194263e-05, "loss": 0.9861, "step": 5164 }, { "epoch": 0.7293137531770686, "grad_norm": 4.31219302904973, "learning_rate": 1.7711697181908376e-05, "loss": 1.2672, "step": 5165 }, { "epoch": 0.7294549562270545, "grad_norm": 4.282622160731298, "learning_rate": 1.7710726580405977e-05, "loss": 1.07, "step": 5166 }, { "epoch": 0.7295961592770404, "grad_norm": 3.468123119075137, "learning_rate": 1.770975579970962e-05, "loss": 0.9296, "step": 5167 }, { "epoch": 0.7297373623270262, "grad_norm": 3.849691387555377, "learning_rate": 1.770878483984187e-05, "loss": 1.2036, "step": 5168 }, { "epoch": 0.7298785653770121, "grad_norm": 3.5910642348711117, "learning_rate": 1.7707813700825288e-05, "loss": 1.2382, "step": 5169 }, { "epoch": 0.730019768426998, "grad_norm": 4.390141005309064, "learning_rate": 1.7706842382682445e-05, "loss": 1.1034, "step": 5170 }, { "epoch": 0.7301609714769839, "grad_norm": 4.002095614860221, "learning_rate": 1.770587088543591e-05, "loss": 1.2416, "step": 5171 }, { "epoch": 0.7303021745269698, "grad_norm": 3.203054407182655, "learning_rate": 1.770489920910827e-05, "loss": 0.9528, "step": 5172 }, { "epoch": 0.7304433775769557, "grad_norm": 3.7856891365510936, "learning_rate": 1.7703927353722096e-05, "loss": 1.1024, "step": 5173 }, { "epoch": 0.7305845806269415, "grad_norm": 3.966621153668162, "learning_rate": 1.770295531929998e-05, "loss": 1.4033, "step": 5174 }, { "epoch": 0.7307257836769274, "grad_norm": 3.6039883294406136, "learning_rate": 1.7701983105864506e-05, "loss": 1.0743, "step": 5175 }, { "epoch": 0.7308669867269133, "grad_norm": 3.3328610690320737, "learning_rate": 1.770101071343827e-05, "loss": 0.9686, "step": 5176 }, { "epoch": 0.7310081897768992, "grad_norm": 3.8073395357213276, "learning_rate": 1.7700038142043875e-05, "loss": 1.0549, "step": 5177 }, { "epoch": 0.7311493928268851, "grad_norm": 3.325732526560263, "learning_rate": 1.7699065391703914e-05, "loss": 0.9593, "step": 5178 }, { "epoch": 0.731290595876871, "grad_norm": 3.839317805384115, "learning_rate": 1.7698092462441003e-05, "loss": 1.0577, "step": 5179 }, { "epoch": 0.7314317989268568, "grad_norm": 3.788039397428166, "learning_rate": 1.7697119354277746e-05, "loss": 1.1351, "step": 5180 }, { "epoch": 0.7315730019768427, "grad_norm": 3.2791278864865805, "learning_rate": 1.7696146067236758e-05, "loss": 0.7441, "step": 5181 }, { "epoch": 0.7317142050268286, "grad_norm": 3.35427769434779, "learning_rate": 1.769517260134066e-05, "loss": 1.1672, "step": 5182 }, { "epoch": 0.7318554080768145, "grad_norm": 3.7705413624901833, "learning_rate": 1.7694198956612074e-05, "loss": 0.9819, "step": 5183 }, { "epoch": 0.7319966111268004, "grad_norm": 3.7667321721586955, "learning_rate": 1.769322513307363e-05, "loss": 1.0534, "step": 5184 }, { "epoch": 0.7321378141767863, "grad_norm": 3.5721051618504567, "learning_rate": 1.769225113074795e-05, "loss": 1.2151, "step": 5185 }, { "epoch": 0.7322790172267721, "grad_norm": 3.647920302535526, "learning_rate": 1.769127694965768e-05, "loss": 0.9356, "step": 5186 }, { "epoch": 0.732420220276758, "grad_norm": 3.763200520435438, "learning_rate": 1.7690302589825455e-05, "loss": 1.0483, "step": 5187 }, { "epoch": 0.7325614233267439, "grad_norm": 3.7199292020346646, "learning_rate": 1.768932805127392e-05, "loss": 1.1347, "step": 5188 }, { "epoch": 0.7327026263767298, "grad_norm": 3.658978160772491, "learning_rate": 1.768835333402572e-05, "loss": 0.9018, "step": 5189 }, { "epoch": 0.7328438294267157, "grad_norm": 3.841981055971075, "learning_rate": 1.768737843810351e-05, "loss": 1.0282, "step": 5190 }, { "epoch": 0.7329850324767015, "grad_norm": 3.7216067642755886, "learning_rate": 1.768640336352994e-05, "loss": 1.0669, "step": 5191 }, { "epoch": 0.7331262355266874, "grad_norm": 3.6052685057509866, "learning_rate": 1.7685428110327683e-05, "loss": 1.0075, "step": 5192 }, { "epoch": 0.7332674385766732, "grad_norm": 3.2833593338194267, "learning_rate": 1.7684452678519393e-05, "loss": 0.7798, "step": 5193 }, { "epoch": 0.7334086416266591, "grad_norm": 3.206096260320886, "learning_rate": 1.7683477068127742e-05, "loss": 0.8312, "step": 5194 }, { "epoch": 0.733549844676645, "grad_norm": 3.7313288739670796, "learning_rate": 1.76825012791754e-05, "loss": 1.2218, "step": 5195 }, { "epoch": 0.7336910477266309, "grad_norm": 3.986072999137443, "learning_rate": 1.7681525311685046e-05, "loss": 1.0977, "step": 5196 }, { "epoch": 0.7338322507766167, "grad_norm": 4.128323338782933, "learning_rate": 1.7680549165679362e-05, "loss": 1.0499, "step": 5197 }, { "epoch": 0.7339734538266026, "grad_norm": 4.175860787342354, "learning_rate": 1.7679572841181033e-05, "loss": 1.1421, "step": 5198 }, { "epoch": 0.7341146568765885, "grad_norm": 3.7739005875436993, "learning_rate": 1.7678596338212747e-05, "loss": 0.9572, "step": 5199 }, { "epoch": 0.7342558599265744, "grad_norm": 3.9305353984342584, "learning_rate": 1.76776196567972e-05, "loss": 1.2202, "step": 5200 }, { "epoch": 0.7343970629765603, "grad_norm": 4.225679492044602, "learning_rate": 1.7676642796957086e-05, "loss": 1.0551, "step": 5201 }, { "epoch": 0.7345382660265461, "grad_norm": 3.386513496963224, "learning_rate": 1.767566575871511e-05, "loss": 1.1148, "step": 5202 }, { "epoch": 0.734679469076532, "grad_norm": 3.345487639984392, "learning_rate": 1.7674688542093977e-05, "loss": 1.1752, "step": 5203 }, { "epoch": 0.7348206721265179, "grad_norm": 3.2110146012065974, "learning_rate": 1.7673711147116392e-05, "loss": 0.8906, "step": 5204 }, { "epoch": 0.7349618751765038, "grad_norm": 3.451130523935677, "learning_rate": 1.767273357380508e-05, "loss": 0.9435, "step": 5205 }, { "epoch": 0.7351030782264897, "grad_norm": 3.9884920365468877, "learning_rate": 1.767175582218275e-05, "loss": 1.1512, "step": 5206 }, { "epoch": 0.7352442812764756, "grad_norm": 4.049866100642281, "learning_rate": 1.7670777892272127e-05, "loss": 1.28, "step": 5207 }, { "epoch": 0.7353854843264614, "grad_norm": 3.6113124581335287, "learning_rate": 1.766979978409594e-05, "loss": 0.9274, "step": 5208 }, { "epoch": 0.7355266873764473, "grad_norm": 3.3263554113426115, "learning_rate": 1.766882149767692e-05, "loss": 1.1465, "step": 5209 }, { "epoch": 0.7356678904264332, "grad_norm": 3.1929389283408773, "learning_rate": 1.76678430330378e-05, "loss": 0.901, "step": 5210 }, { "epoch": 0.7358090934764191, "grad_norm": 3.3494554321808763, "learning_rate": 1.7666864390201316e-05, "loss": 0.9073, "step": 5211 }, { "epoch": 0.735950296526405, "grad_norm": 3.4371131614234134, "learning_rate": 1.766588556919022e-05, "loss": 1.0814, "step": 5212 }, { "epoch": 0.7360914995763909, "grad_norm": 3.3378803297672826, "learning_rate": 1.7664906570027248e-05, "loss": 1.0004, "step": 5213 }, { "epoch": 0.7362327026263767, "grad_norm": 3.4893553281891725, "learning_rate": 1.766392739273516e-05, "loss": 1.0507, "step": 5214 }, { "epoch": 0.7363739056763626, "grad_norm": 3.512892620575974, "learning_rate": 1.7662948037336712e-05, "loss": 0.946, "step": 5215 }, { "epoch": 0.7365151087263485, "grad_norm": 3.903656291842149, "learning_rate": 1.766196850385466e-05, "loss": 1.0506, "step": 5216 }, { "epoch": 0.7366563117763344, "grad_norm": 4.1261989567664, "learning_rate": 1.7660988792311766e-05, "loss": 1.1422, "step": 5217 }, { "epoch": 0.7367975148263203, "grad_norm": 3.5265977026392106, "learning_rate": 1.7660008902730804e-05, "loss": 1.1161, "step": 5218 }, { "epoch": 0.7369387178763062, "grad_norm": 3.9651582668346275, "learning_rate": 1.765902883513454e-05, "loss": 1.1428, "step": 5219 }, { "epoch": 0.737079920926292, "grad_norm": 3.471093127144974, "learning_rate": 1.7658048589545757e-05, "loss": 1.132, "step": 5220 }, { "epoch": 0.7372211239762779, "grad_norm": 3.4770560955620504, "learning_rate": 1.765706816598723e-05, "loss": 1.002, "step": 5221 }, { "epoch": 0.7373623270262638, "grad_norm": 3.593611062719353, "learning_rate": 1.7656087564481746e-05, "loss": 0.9708, "step": 5222 }, { "epoch": 0.7375035300762497, "grad_norm": 4.222664087697859, "learning_rate": 1.7655106785052093e-05, "loss": 1.3272, "step": 5223 }, { "epoch": 0.7376447331262356, "grad_norm": 4.10434730451653, "learning_rate": 1.7654125827721066e-05, "loss": 1.0104, "step": 5224 }, { "epoch": 0.7377859361762215, "grad_norm": 4.473908021972094, "learning_rate": 1.7653144692511457e-05, "loss": 1.3244, "step": 5225 }, { "epoch": 0.7379271392262073, "grad_norm": 3.7077664029708375, "learning_rate": 1.7652163379446073e-05, "loss": 1.1665, "step": 5226 }, { "epoch": 0.7380683422761931, "grad_norm": 2.883075461939255, "learning_rate": 1.765118188854772e-05, "loss": 0.8076, "step": 5227 }, { "epoch": 0.738209545326179, "grad_norm": 3.3771721415184595, "learning_rate": 1.7650200219839198e-05, "loss": 1.0456, "step": 5228 }, { "epoch": 0.7383507483761649, "grad_norm": 4.589219589508171, "learning_rate": 1.7649218373343327e-05, "loss": 1.1142, "step": 5229 }, { "epoch": 0.7384919514261508, "grad_norm": 3.7225635184465626, "learning_rate": 1.7648236349082928e-05, "loss": 1.1035, "step": 5230 }, { "epoch": 0.7386331544761366, "grad_norm": 3.2727647205613106, "learning_rate": 1.7647254147080817e-05, "loss": 0.9359, "step": 5231 }, { "epoch": 0.7387743575261225, "grad_norm": 3.6846080407813253, "learning_rate": 1.7646271767359824e-05, "loss": 1.1587, "step": 5232 }, { "epoch": 0.7389155605761084, "grad_norm": 3.6534726703244154, "learning_rate": 1.7645289209942776e-05, "loss": 1.1327, "step": 5233 }, { "epoch": 0.7390567636260943, "grad_norm": 3.5328163068772307, "learning_rate": 1.764430647485251e-05, "loss": 0.9535, "step": 5234 }, { "epoch": 0.7391979666760802, "grad_norm": 3.7887242368557668, "learning_rate": 1.7643323562111864e-05, "loss": 1.049, "step": 5235 }, { "epoch": 0.7393391697260661, "grad_norm": 3.288233810620734, "learning_rate": 1.7642340471743675e-05, "loss": 0.7307, "step": 5236 }, { "epoch": 0.7394803727760519, "grad_norm": 3.8576367387850223, "learning_rate": 1.7641357203770793e-05, "loss": 1.0743, "step": 5237 }, { "epoch": 0.7396215758260378, "grad_norm": 4.412853091996168, "learning_rate": 1.7640373758216075e-05, "loss": 1.1327, "step": 5238 }, { "epoch": 0.7397627788760237, "grad_norm": 3.7197698532486623, "learning_rate": 1.7639390135102367e-05, "loss": 1.4073, "step": 5239 }, { "epoch": 0.7399039819260096, "grad_norm": 3.104639617932623, "learning_rate": 1.7638406334452535e-05, "loss": 0.9323, "step": 5240 }, { "epoch": 0.7400451849759955, "grad_norm": 3.396733964642443, "learning_rate": 1.763742235628944e-05, "loss": 0.8689, "step": 5241 }, { "epoch": 0.7401863880259814, "grad_norm": 3.4498789420370413, "learning_rate": 1.7636438200635942e-05, "loss": 0.7899, "step": 5242 }, { "epoch": 0.7403275910759672, "grad_norm": 3.5573532413711293, "learning_rate": 1.763545386751492e-05, "loss": 1.0146, "step": 5243 }, { "epoch": 0.7404687941259531, "grad_norm": 2.749949074573413, "learning_rate": 1.7634469356949246e-05, "loss": 0.8445, "step": 5244 }, { "epoch": 0.740609997175939, "grad_norm": 3.7812262892611135, "learning_rate": 1.7633484668961803e-05, "loss": 1.159, "step": 5245 }, { "epoch": 0.7407512002259249, "grad_norm": 3.4289242786044163, "learning_rate": 1.7632499803575473e-05, "loss": 0.8263, "step": 5246 }, { "epoch": 0.7408924032759108, "grad_norm": 3.7642393483032857, "learning_rate": 1.7631514760813146e-05, "loss": 0.8606, "step": 5247 }, { "epoch": 0.7410336063258967, "grad_norm": 3.4989436153831783, "learning_rate": 1.7630529540697708e-05, "loss": 1.1192, "step": 5248 }, { "epoch": 0.7411748093758825, "grad_norm": 3.7285764613430485, "learning_rate": 1.762954414325206e-05, "loss": 1.1583, "step": 5249 }, { "epoch": 0.7413160124258684, "grad_norm": 3.305235068239984, "learning_rate": 1.7628558568499103e-05, "loss": 1.0447, "step": 5250 }, { "epoch": 0.7414572154758543, "grad_norm": 3.526833282379295, "learning_rate": 1.7627572816461736e-05, "loss": 1.0123, "step": 5251 }, { "epoch": 0.7415984185258402, "grad_norm": 3.5635320757670383, "learning_rate": 1.7626586887162875e-05, "loss": 1.1391, "step": 5252 }, { "epoch": 0.7417396215758261, "grad_norm": 3.5667605992656144, "learning_rate": 1.7625600780625425e-05, "loss": 1.1746, "step": 5253 }, { "epoch": 0.741880824625812, "grad_norm": 4.026572701741333, "learning_rate": 1.7624614496872304e-05, "loss": 1.0284, "step": 5254 }, { "epoch": 0.7420220276757978, "grad_norm": 3.5205526561313825, "learning_rate": 1.762362803592644e-05, "loss": 1.282, "step": 5255 }, { "epoch": 0.7421632307257837, "grad_norm": 3.1102377815637468, "learning_rate": 1.762264139781075e-05, "loss": 0.956, "step": 5256 }, { "epoch": 0.7423044337757696, "grad_norm": 3.168249888149406, "learning_rate": 1.762165458254817e-05, "loss": 0.975, "step": 5257 }, { "epoch": 0.7424456368257555, "grad_norm": 3.8671052566676645, "learning_rate": 1.7620667590161626e-05, "loss": 1.1254, "step": 5258 }, { "epoch": 0.7425868398757414, "grad_norm": 3.9261245165406278, "learning_rate": 1.7619680420674057e-05, "loss": 1.0099, "step": 5259 }, { "epoch": 0.7427280429257272, "grad_norm": 3.203499670483618, "learning_rate": 1.7618693074108405e-05, "loss": 0.7872, "step": 5260 }, { "epoch": 0.742869245975713, "grad_norm": 3.812647330840194, "learning_rate": 1.761770555048762e-05, "loss": 1.0235, "step": 5261 }, { "epoch": 0.7430104490256989, "grad_norm": 3.514385294845092, "learning_rate": 1.7616717849834644e-05, "loss": 1.1711, "step": 5262 }, { "epoch": 0.7431516520756848, "grad_norm": 4.437277939150923, "learning_rate": 1.7615729972172437e-05, "loss": 1.1246, "step": 5263 }, { "epoch": 0.7432928551256707, "grad_norm": 4.015851152418311, "learning_rate": 1.7614741917523956e-05, "loss": 1.0916, "step": 5264 }, { "epoch": 0.7434340581756566, "grad_norm": 3.124947656555935, "learning_rate": 1.7613753685912155e-05, "loss": 0.9798, "step": 5265 }, { "epoch": 0.7435752612256424, "grad_norm": 4.160254950067781, "learning_rate": 1.7612765277360013e-05, "loss": 1.2165, "step": 5266 }, { "epoch": 0.7437164642756283, "grad_norm": 3.6338217951078082, "learning_rate": 1.761177669189049e-05, "loss": 0.9146, "step": 5267 }, { "epoch": 0.7438576673256142, "grad_norm": 3.1805582969404225, "learning_rate": 1.761078792952657e-05, "loss": 0.8451, "step": 5268 }, { "epoch": 0.7439988703756001, "grad_norm": 3.4395830253350717, "learning_rate": 1.760979899029122e-05, "loss": 0.9423, "step": 5269 }, { "epoch": 0.744140073425586, "grad_norm": 3.783780339717336, "learning_rate": 1.7608809874207426e-05, "loss": 1.0166, "step": 5270 }, { "epoch": 0.7442812764755719, "grad_norm": 3.6207270801734275, "learning_rate": 1.760782058129818e-05, "loss": 1.115, "step": 5271 }, { "epoch": 0.7444224795255577, "grad_norm": 3.2276139764169156, "learning_rate": 1.7606831111586467e-05, "loss": 0.9483, "step": 5272 }, { "epoch": 0.7445636825755436, "grad_norm": 3.795702391371701, "learning_rate": 1.7605841465095287e-05, "loss": 1.1897, "step": 5273 }, { "epoch": 0.7447048856255295, "grad_norm": 3.5461080889903847, "learning_rate": 1.7604851641847633e-05, "loss": 0.9511, "step": 5274 }, { "epoch": 0.7448460886755154, "grad_norm": 3.4178716346569615, "learning_rate": 1.7603861641866517e-05, "loss": 1.0617, "step": 5275 }, { "epoch": 0.7449872917255013, "grad_norm": 3.7477211860272233, "learning_rate": 1.7602871465174934e-05, "loss": 1.1985, "step": 5276 }, { "epoch": 0.7451284947754871, "grad_norm": 2.994505576322276, "learning_rate": 1.76018811117959e-05, "loss": 0.7673, "step": 5277 }, { "epoch": 0.745269697825473, "grad_norm": 3.4740876623412857, "learning_rate": 1.7600890581752435e-05, "loss": 0.9432, "step": 5278 }, { "epoch": 0.7454109008754589, "grad_norm": 3.600328736812069, "learning_rate": 1.7599899875067557e-05, "loss": 0.8368, "step": 5279 }, { "epoch": 0.7455521039254448, "grad_norm": 3.342449146957272, "learning_rate": 1.7598908991764288e-05, "loss": 1.0104, "step": 5280 }, { "epoch": 0.7456933069754307, "grad_norm": 3.7531478965551393, "learning_rate": 1.7597917931865655e-05, "loss": 1.0059, "step": 5281 }, { "epoch": 0.7458345100254166, "grad_norm": 3.102679602615667, "learning_rate": 1.7596926695394692e-05, "loss": 0.9659, "step": 5282 }, { "epoch": 0.7459757130754024, "grad_norm": 3.458213762328175, "learning_rate": 1.759593528237443e-05, "loss": 0.979, "step": 5283 }, { "epoch": 0.7461169161253883, "grad_norm": 4.005589914557624, "learning_rate": 1.7594943692827913e-05, "loss": 1.2169, "step": 5284 }, { "epoch": 0.7462581191753742, "grad_norm": 3.1859866924520754, "learning_rate": 1.759395192677819e-05, "loss": 0.7548, "step": 5285 }, { "epoch": 0.7463993222253601, "grad_norm": 3.8687154766677065, "learning_rate": 1.75929599842483e-05, "loss": 1.1065, "step": 5286 }, { "epoch": 0.746540525275346, "grad_norm": 3.610186990714733, "learning_rate": 1.7591967865261296e-05, "loss": 1.0946, "step": 5287 }, { "epoch": 0.7466817283253319, "grad_norm": 3.5461150690814436, "learning_rate": 1.759097556984024e-05, "loss": 1.0372, "step": 5288 }, { "epoch": 0.7468229313753177, "grad_norm": 3.431802686057019, "learning_rate": 1.7589983098008193e-05, "loss": 0.8818, "step": 5289 }, { "epoch": 0.7469641344253036, "grad_norm": 3.596560044346133, "learning_rate": 1.7588990449788213e-05, "loss": 1.0804, "step": 5290 }, { "epoch": 0.7471053374752895, "grad_norm": 3.538349967940706, "learning_rate": 1.7587997625203374e-05, "loss": 0.9982, "step": 5291 }, { "epoch": 0.7472465405252754, "grad_norm": 3.5242698351862383, "learning_rate": 1.7587004624276747e-05, "loss": 1.0503, "step": 5292 }, { "epoch": 0.7473877435752613, "grad_norm": 3.559411905785136, "learning_rate": 1.7586011447031407e-05, "loss": 1.0064, "step": 5293 }, { "epoch": 0.7475289466252472, "grad_norm": 4.2104462209446805, "learning_rate": 1.758501809349044e-05, "loss": 1.3989, "step": 5294 }, { "epoch": 0.7476701496752329, "grad_norm": 3.97111030692772, "learning_rate": 1.7584024563676925e-05, "loss": 1.3128, "step": 5295 }, { "epoch": 0.7478113527252188, "grad_norm": 3.6564760797107128, "learning_rate": 1.7583030857613957e-05, "loss": 1.0056, "step": 5296 }, { "epoch": 0.7479525557752047, "grad_norm": 3.900718121153533, "learning_rate": 1.7582036975324626e-05, "loss": 1.05, "step": 5297 }, { "epoch": 0.7480937588251906, "grad_norm": 3.4538854344746537, "learning_rate": 1.7581042916832028e-05, "loss": 1.0057, "step": 5298 }, { "epoch": 0.7482349618751765, "grad_norm": 3.7996806827768443, "learning_rate": 1.758004868215927e-05, "loss": 1.2415, "step": 5299 }, { "epoch": 0.7483761649251623, "grad_norm": 3.8438051131908546, "learning_rate": 1.7579054271329457e-05, "loss": 1.0376, "step": 5300 }, { "epoch": 0.7485173679751482, "grad_norm": 3.1111986790033948, "learning_rate": 1.757805968436569e-05, "loss": 0.8757, "step": 5301 }, { "epoch": 0.7486585710251341, "grad_norm": 3.5937512691396383, "learning_rate": 1.7577064921291092e-05, "loss": 1.1634, "step": 5302 }, { "epoch": 0.74879977407512, "grad_norm": 3.6351106258747037, "learning_rate": 1.7576069982128774e-05, "loss": 0.9912, "step": 5303 }, { "epoch": 0.7489409771251059, "grad_norm": 3.142698079000596, "learning_rate": 1.7575074866901863e-05, "loss": 0.985, "step": 5304 }, { "epoch": 0.7490821801750918, "grad_norm": 4.407185830881482, "learning_rate": 1.7574079575633485e-05, "loss": 1.0153, "step": 5305 }, { "epoch": 0.7492233832250776, "grad_norm": 3.7289964165424707, "learning_rate": 1.757308410834677e-05, "loss": 1.2923, "step": 5306 }, { "epoch": 0.7493645862750635, "grad_norm": 3.4073322049966652, "learning_rate": 1.7572088465064847e-05, "loss": 0.9226, "step": 5307 }, { "epoch": 0.7495057893250494, "grad_norm": 4.049079965490176, "learning_rate": 1.757109264581086e-05, "loss": 1.2077, "step": 5308 }, { "epoch": 0.7496469923750353, "grad_norm": 4.466656591077825, "learning_rate": 1.757009665060795e-05, "loss": 1.5774, "step": 5309 }, { "epoch": 0.7497881954250212, "grad_norm": 2.827904860133254, "learning_rate": 1.756910047947926e-05, "loss": 0.878, "step": 5310 }, { "epoch": 0.7499293984750071, "grad_norm": 3.636550670263667, "learning_rate": 1.7568104132447946e-05, "loss": 0.962, "step": 5311 }, { "epoch": 0.7500706015249929, "grad_norm": 3.094501391307671, "learning_rate": 1.7567107609537163e-05, "loss": 0.9145, "step": 5312 }, { "epoch": 0.7502118045749788, "grad_norm": 3.5700771215770635, "learning_rate": 1.7566110910770064e-05, "loss": 1.0685, "step": 5313 }, { "epoch": 0.7503530076249647, "grad_norm": 3.5244796937086877, "learning_rate": 1.756511403616982e-05, "loss": 0.9721, "step": 5314 }, { "epoch": 0.7504942106749506, "grad_norm": 4.046433876412351, "learning_rate": 1.7564116985759584e-05, "loss": 0.9965, "step": 5315 }, { "epoch": 0.7506354137249365, "grad_norm": 3.788355814384214, "learning_rate": 1.7563119759562545e-05, "loss": 1.1643, "step": 5316 }, { "epoch": 0.7507766167749224, "grad_norm": 3.8064634587656223, "learning_rate": 1.7562122357601863e-05, "loss": 1.046, "step": 5317 }, { "epoch": 0.7509178198249082, "grad_norm": 3.950865928092699, "learning_rate": 1.7561124779900723e-05, "loss": 1.2353, "step": 5318 }, { "epoch": 0.7510590228748941, "grad_norm": 3.732554163554491, "learning_rate": 1.756012702648231e-05, "loss": 1.2607, "step": 5319 }, { "epoch": 0.75120022592488, "grad_norm": 3.8534297469548, "learning_rate": 1.755912909736981e-05, "loss": 1.0448, "step": 5320 }, { "epoch": 0.7513414289748659, "grad_norm": 3.976469870990217, "learning_rate": 1.7558130992586417e-05, "loss": 0.8862, "step": 5321 }, { "epoch": 0.7514826320248518, "grad_norm": 4.338312663839415, "learning_rate": 1.755713271215532e-05, "loss": 1.3358, "step": 5322 }, { "epoch": 0.7516238350748377, "grad_norm": 3.3519702965714604, "learning_rate": 1.7556134256099726e-05, "loss": 0.9989, "step": 5323 }, { "epoch": 0.7517650381248235, "grad_norm": 3.3925007376362726, "learning_rate": 1.7555135624442833e-05, "loss": 0.9702, "step": 5324 }, { "epoch": 0.7519062411748094, "grad_norm": 3.4231730859100593, "learning_rate": 1.755413681720785e-05, "loss": 0.8433, "step": 5325 }, { "epoch": 0.7520474442247953, "grad_norm": 3.780981959948629, "learning_rate": 1.755313783441799e-05, "loss": 1.1882, "step": 5326 }, { "epoch": 0.7521886472747812, "grad_norm": 3.512217577546415, "learning_rate": 1.755213867609647e-05, "loss": 0.9939, "step": 5327 }, { "epoch": 0.7523298503247671, "grad_norm": 3.2403075274646946, "learning_rate": 1.755113934226651e-05, "loss": 0.9148, "step": 5328 }, { "epoch": 0.7524710533747528, "grad_norm": 3.7949361627032907, "learning_rate": 1.755013983295133e-05, "loss": 1.2714, "step": 5329 }, { "epoch": 0.7526122564247387, "grad_norm": 3.90548510334465, "learning_rate": 1.754914014817416e-05, "loss": 1.0152, "step": 5330 }, { "epoch": 0.7527534594747246, "grad_norm": 3.86075182380805, "learning_rate": 1.7548140287958237e-05, "loss": 1.0784, "step": 5331 }, { "epoch": 0.7528946625247105, "grad_norm": 3.9216945701964336, "learning_rate": 1.754714025232679e-05, "loss": 1.0934, "step": 5332 }, { "epoch": 0.7530358655746964, "grad_norm": 3.393529562853154, "learning_rate": 1.7546140041303065e-05, "loss": 0.9881, "step": 5333 }, { "epoch": 0.7531770686246823, "grad_norm": 3.770490748168645, "learning_rate": 1.7545139654910302e-05, "loss": 1.0408, "step": 5334 }, { "epoch": 0.7533182716746681, "grad_norm": 4.168491592260939, "learning_rate": 1.7544139093171754e-05, "loss": 1.1088, "step": 5335 }, { "epoch": 0.753459474724654, "grad_norm": 4.582936892355896, "learning_rate": 1.7543138356110675e-05, "loss": 1.2007, "step": 5336 }, { "epoch": 0.7536006777746399, "grad_norm": 4.370261802808565, "learning_rate": 1.7542137443750313e-05, "loss": 1.1121, "step": 5337 }, { "epoch": 0.7537418808246258, "grad_norm": 4.10197373058056, "learning_rate": 1.7541136356113934e-05, "loss": 1.0637, "step": 5338 }, { "epoch": 0.7538830838746117, "grad_norm": 3.750455648501595, "learning_rate": 1.7540135093224803e-05, "loss": 1.1916, "step": 5339 }, { "epoch": 0.7540242869245976, "grad_norm": 3.049261729741092, "learning_rate": 1.753913365510619e-05, "loss": 0.84, "step": 5340 }, { "epoch": 0.7541654899745834, "grad_norm": 2.9758741506028565, "learning_rate": 1.753813204178137e-05, "loss": 0.8051, "step": 5341 }, { "epoch": 0.7543066930245693, "grad_norm": 3.1747014626869174, "learning_rate": 1.7537130253273613e-05, "loss": 1.0031, "step": 5342 }, { "epoch": 0.7544478960745552, "grad_norm": 3.8712398508181134, "learning_rate": 1.7536128289606206e-05, "loss": 1.0987, "step": 5343 }, { "epoch": 0.7545890991245411, "grad_norm": 4.679282789397256, "learning_rate": 1.7535126150802428e-05, "loss": 1.4102, "step": 5344 }, { "epoch": 0.754730302174527, "grad_norm": 4.030503397269633, "learning_rate": 1.7534123836885576e-05, "loss": 0.958, "step": 5345 }, { "epoch": 0.7548715052245129, "grad_norm": 3.5641902534767813, "learning_rate": 1.753312134787894e-05, "loss": 0.945, "step": 5346 }, { "epoch": 0.7550127082744987, "grad_norm": 3.9170753611460443, "learning_rate": 1.7532118683805816e-05, "loss": 1.1039, "step": 5347 }, { "epoch": 0.7551539113244846, "grad_norm": 4.175391321711326, "learning_rate": 1.7531115844689505e-05, "loss": 0.9655, "step": 5348 }, { "epoch": 0.7552951143744705, "grad_norm": 4.56019463921606, "learning_rate": 1.7530112830553318e-05, "loss": 1.0388, "step": 5349 }, { "epoch": 0.7554363174244564, "grad_norm": 3.7962916163520783, "learning_rate": 1.7529109641420557e-05, "loss": 1.0387, "step": 5350 }, { "epoch": 0.7555775204744423, "grad_norm": 4.760329118159397, "learning_rate": 1.7528106277314544e-05, "loss": 1.1884, "step": 5351 }, { "epoch": 0.7557187235244281, "grad_norm": 3.6578711328040323, "learning_rate": 1.7527102738258588e-05, "loss": 1.1826, "step": 5352 }, { "epoch": 0.755859926574414, "grad_norm": 3.860137271061851, "learning_rate": 1.7526099024276017e-05, "loss": 0.831, "step": 5353 }, { "epoch": 0.7560011296243999, "grad_norm": 3.5821823596119864, "learning_rate": 1.7525095135390152e-05, "loss": 0.8613, "step": 5354 }, { "epoch": 0.7561423326743858, "grad_norm": 3.5438383645336233, "learning_rate": 1.7524091071624333e-05, "loss": 1.0042, "step": 5355 }, { "epoch": 0.7562835357243717, "grad_norm": 4.007601416335219, "learning_rate": 1.752308683300188e-05, "loss": 1.1738, "step": 5356 }, { "epoch": 0.7564247387743576, "grad_norm": 3.74948697446228, "learning_rate": 1.752208241954614e-05, "loss": 0.9997, "step": 5357 }, { "epoch": 0.7565659418243434, "grad_norm": 3.4818225770743085, "learning_rate": 1.7521077831280453e-05, "loss": 0.9952, "step": 5358 }, { "epoch": 0.7567071448743293, "grad_norm": 3.1569259826482283, "learning_rate": 1.7520073068228166e-05, "loss": 0.853, "step": 5359 }, { "epoch": 0.7568483479243152, "grad_norm": 3.0880340703628497, "learning_rate": 1.751906813041263e-05, "loss": 0.8761, "step": 5360 }, { "epoch": 0.7569895509743011, "grad_norm": 3.330844088529482, "learning_rate": 1.7518063017857196e-05, "loss": 1.0737, "step": 5361 }, { "epoch": 0.757130754024287, "grad_norm": 3.2035100808879595, "learning_rate": 1.7517057730585224e-05, "loss": 0.7782, "step": 5362 }, { "epoch": 0.7572719570742728, "grad_norm": 3.618181341571531, "learning_rate": 1.7516052268620076e-05, "loss": 1.2048, "step": 5363 }, { "epoch": 0.7574131601242586, "grad_norm": 3.3004561402653505, "learning_rate": 1.751504663198512e-05, "loss": 0.8033, "step": 5364 }, { "epoch": 0.7575543631742445, "grad_norm": 3.638346055797656, "learning_rate": 1.751404082070373e-05, "loss": 1.0316, "step": 5365 }, { "epoch": 0.7576955662242304, "grad_norm": 3.9447209242817425, "learning_rate": 1.751303483479927e-05, "loss": 1.2032, "step": 5366 }, { "epoch": 0.7578367692742163, "grad_norm": 3.8317708009186355, "learning_rate": 1.7512028674295127e-05, "loss": 0.9018, "step": 5367 }, { "epoch": 0.7579779723242022, "grad_norm": 3.2837921856125076, "learning_rate": 1.7511022339214682e-05, "loss": 0.87, "step": 5368 }, { "epoch": 0.758119175374188, "grad_norm": 3.2129933919856666, "learning_rate": 1.7510015829581325e-05, "loss": 0.9365, "step": 5369 }, { "epoch": 0.7582603784241739, "grad_norm": 4.026588059677428, "learning_rate": 1.750900914541844e-05, "loss": 1.3919, "step": 5370 }, { "epoch": 0.7584015814741598, "grad_norm": 3.6047314512897675, "learning_rate": 1.7508002286749426e-05, "loss": 0.9642, "step": 5371 }, { "epoch": 0.7585427845241457, "grad_norm": 3.4349750066376856, "learning_rate": 1.750699525359768e-05, "loss": 1.052, "step": 5372 }, { "epoch": 0.7586839875741316, "grad_norm": 2.8483640155252354, "learning_rate": 1.750598804598661e-05, "loss": 0.8874, "step": 5373 }, { "epoch": 0.7588251906241175, "grad_norm": 3.1331039715751166, "learning_rate": 1.7504980663939614e-05, "loss": 0.8907, "step": 5374 }, { "epoch": 0.7589663936741033, "grad_norm": 3.8786223690064556, "learning_rate": 1.7503973107480112e-05, "loss": 0.9395, "step": 5375 }, { "epoch": 0.7591075967240892, "grad_norm": 3.5313911154494693, "learning_rate": 1.7502965376631515e-05, "loss": 0.789, "step": 5376 }, { "epoch": 0.7592487997740751, "grad_norm": 3.440949995923055, "learning_rate": 1.7501957471417242e-05, "loss": 0.9767, "step": 5377 }, { "epoch": 0.759390002824061, "grad_norm": 3.5194118101119347, "learning_rate": 1.750094939186072e-05, "loss": 1.0327, "step": 5378 }, { "epoch": 0.7595312058740469, "grad_norm": 3.238977218838803, "learning_rate": 1.749994113798537e-05, "loss": 1.0892, "step": 5379 }, { "epoch": 0.7596724089240328, "grad_norm": 3.928450417173385, "learning_rate": 1.749893270981463e-05, "loss": 0.9276, "step": 5380 }, { "epoch": 0.7598136119740186, "grad_norm": 3.0767471377721747, "learning_rate": 1.7497924107371932e-05, "loss": 0.8921, "step": 5381 }, { "epoch": 0.7599548150240045, "grad_norm": 3.418801860647783, "learning_rate": 1.7496915330680713e-05, "loss": 1.041, "step": 5382 }, { "epoch": 0.7600960180739904, "grad_norm": 3.0620524424025812, "learning_rate": 1.7495906379764423e-05, "loss": 0.7479, "step": 5383 }, { "epoch": 0.7602372211239763, "grad_norm": 4.275355539634467, "learning_rate": 1.7494897254646503e-05, "loss": 1.114, "step": 5384 }, { "epoch": 0.7603784241739622, "grad_norm": 4.048008794177169, "learning_rate": 1.749388795535041e-05, "loss": 1.1489, "step": 5385 }, { "epoch": 0.7605196272239481, "grad_norm": 3.818522516299592, "learning_rate": 1.7492878481899595e-05, "loss": 1.126, "step": 5386 }, { "epoch": 0.7606608302739339, "grad_norm": 3.27184299819054, "learning_rate": 1.7491868834317523e-05, "loss": 1.049, "step": 5387 }, { "epoch": 0.7608020333239198, "grad_norm": 2.8113222162309444, "learning_rate": 1.7490859012627652e-05, "loss": 0.7611, "step": 5388 }, { "epoch": 0.7609432363739057, "grad_norm": 3.9440174035547435, "learning_rate": 1.7489849016853452e-05, "loss": 1.0258, "step": 5389 }, { "epoch": 0.7610844394238916, "grad_norm": 4.037069280774431, "learning_rate": 1.7488838847018397e-05, "loss": 1.1217, "step": 5390 }, { "epoch": 0.7612256424738775, "grad_norm": 3.4308820595533525, "learning_rate": 1.7487828503145962e-05, "loss": 1.021, "step": 5391 }, { "epoch": 0.7613668455238634, "grad_norm": 3.317210516151756, "learning_rate": 1.7486817985259627e-05, "loss": 0.9406, "step": 5392 }, { "epoch": 0.7615080485738492, "grad_norm": 3.339148874346066, "learning_rate": 1.7485807293382872e-05, "loss": 0.9242, "step": 5393 }, { "epoch": 0.7616492516238351, "grad_norm": 3.104794939958571, "learning_rate": 1.7484796427539194e-05, "loss": 0.962, "step": 5394 }, { "epoch": 0.761790454673821, "grad_norm": 3.1822786717850677, "learning_rate": 1.7483785387752075e-05, "loss": 0.9293, "step": 5395 }, { "epoch": 0.7619316577238069, "grad_norm": 3.7753569725066867, "learning_rate": 1.7482774174045017e-05, "loss": 1.0524, "step": 5396 }, { "epoch": 0.7620728607737927, "grad_norm": 3.656345856196439, "learning_rate": 1.7481762786441515e-05, "loss": 1.0434, "step": 5397 }, { "epoch": 0.7622140638237785, "grad_norm": 3.1266808820681122, "learning_rate": 1.7480751224965083e-05, "loss": 1.0026, "step": 5398 }, { "epoch": 0.7623552668737644, "grad_norm": 3.8877828809385897, "learning_rate": 1.7479739489639218e-05, "loss": 1.1445, "step": 5399 }, { "epoch": 0.7624964699237503, "grad_norm": 3.426374720496311, "learning_rate": 1.747872758048744e-05, "loss": 1.0968, "step": 5400 }, { "epoch": 0.7626376729737362, "grad_norm": 3.7151113864288843, "learning_rate": 1.7477715497533263e-05, "loss": 1.2411, "step": 5401 }, { "epoch": 0.7627788760237221, "grad_norm": 4.3837234890510155, "learning_rate": 1.747670324080021e-05, "loss": 1.1182, "step": 5402 }, { "epoch": 0.762920079073708, "grad_norm": 3.704644420485445, "learning_rate": 1.7475690810311798e-05, "loss": 1.0697, "step": 5403 }, { "epoch": 0.7630612821236938, "grad_norm": 3.1797511884255023, "learning_rate": 1.7474678206091563e-05, "loss": 0.8888, "step": 5404 }, { "epoch": 0.7632024851736797, "grad_norm": 3.6668347240063075, "learning_rate": 1.747366542816303e-05, "loss": 1.1185, "step": 5405 }, { "epoch": 0.7633436882236656, "grad_norm": 3.877043989708006, "learning_rate": 1.7472652476549747e-05, "loss": 1.0538, "step": 5406 }, { "epoch": 0.7634848912736515, "grad_norm": 2.975033408962581, "learning_rate": 1.7471639351275243e-05, "loss": 0.9002, "step": 5407 }, { "epoch": 0.7636260943236374, "grad_norm": 3.7696452402881255, "learning_rate": 1.7470626052363068e-05, "loss": 1.0987, "step": 5408 }, { "epoch": 0.7637672973736233, "grad_norm": 3.616400703390129, "learning_rate": 1.746961257983677e-05, "loss": 0.9001, "step": 5409 }, { "epoch": 0.7639085004236091, "grad_norm": 5.079507275996488, "learning_rate": 1.74685989337199e-05, "loss": 1.0879, "step": 5410 }, { "epoch": 0.764049703473595, "grad_norm": 3.758807796492364, "learning_rate": 1.746758511403602e-05, "loss": 0.9876, "step": 5411 }, { "epoch": 0.7641909065235809, "grad_norm": 4.315461009830911, "learning_rate": 1.7466571120808684e-05, "loss": 1.2768, "step": 5412 }, { "epoch": 0.7643321095735668, "grad_norm": 3.904467858299182, "learning_rate": 1.746555695406146e-05, "loss": 0.962, "step": 5413 }, { "epoch": 0.7644733126235527, "grad_norm": 4.048332341990393, "learning_rate": 1.746454261381792e-05, "loss": 1.061, "step": 5414 }, { "epoch": 0.7646145156735386, "grad_norm": 2.89725901657611, "learning_rate": 1.746352810010163e-05, "loss": 0.7986, "step": 5415 }, { "epoch": 0.7647557187235244, "grad_norm": 3.675640570372638, "learning_rate": 1.7462513412936168e-05, "loss": 1.0599, "step": 5416 }, { "epoch": 0.7648969217735103, "grad_norm": 4.130022945934367, "learning_rate": 1.746149855234512e-05, "loss": 1.2213, "step": 5417 }, { "epoch": 0.7650381248234962, "grad_norm": 4.340604375697738, "learning_rate": 1.7460483518352068e-05, "loss": 1.0833, "step": 5418 }, { "epoch": 0.7651793278734821, "grad_norm": 4.234077591550618, "learning_rate": 1.74594683109806e-05, "loss": 1.1927, "step": 5419 }, { "epoch": 0.765320530923468, "grad_norm": 3.6938625175461, "learning_rate": 1.745845293025431e-05, "loss": 1.0849, "step": 5420 }, { "epoch": 0.7654617339734539, "grad_norm": 3.174687021745017, "learning_rate": 1.7457437376196796e-05, "loss": 0.887, "step": 5421 }, { "epoch": 0.7656029370234397, "grad_norm": 3.3389908803388235, "learning_rate": 1.7456421648831658e-05, "loss": 1.0011, "step": 5422 }, { "epoch": 0.7657441400734256, "grad_norm": 3.253803332742542, "learning_rate": 1.74554057481825e-05, "loss": 0.9493, "step": 5423 }, { "epoch": 0.7658853431234115, "grad_norm": 3.704802917420437, "learning_rate": 1.745438967427293e-05, "loss": 1.0061, "step": 5424 }, { "epoch": 0.7660265461733974, "grad_norm": 4.484934165129177, "learning_rate": 1.745337342712657e-05, "loss": 1.2865, "step": 5425 }, { "epoch": 0.7661677492233833, "grad_norm": 3.509162679541262, "learning_rate": 1.7452357006767026e-05, "loss": 1.2294, "step": 5426 }, { "epoch": 0.7663089522733691, "grad_norm": 3.7455921833957726, "learning_rate": 1.7451340413217925e-05, "loss": 1.0694, "step": 5427 }, { "epoch": 0.766450155323355, "grad_norm": 2.9745703764914815, "learning_rate": 1.745032364650289e-05, "loss": 0.8589, "step": 5428 }, { "epoch": 0.7665913583733409, "grad_norm": 3.41854172843559, "learning_rate": 1.744930670664555e-05, "loss": 1.0238, "step": 5429 }, { "epoch": 0.7667325614233268, "grad_norm": 4.339301578153021, "learning_rate": 1.744828959366954e-05, "loss": 1.3126, "step": 5430 }, { "epoch": 0.7668737644733126, "grad_norm": 3.764292027619486, "learning_rate": 1.74472723075985e-05, "loss": 1.1256, "step": 5431 }, { "epoch": 0.7670149675232985, "grad_norm": 3.696481527413238, "learning_rate": 1.744625484845606e-05, "loss": 1.2617, "step": 5432 }, { "epoch": 0.7671561705732843, "grad_norm": 4.644200044629729, "learning_rate": 1.7445237216265877e-05, "loss": 1.3092, "step": 5433 }, { "epoch": 0.7672973736232702, "grad_norm": 3.2806309193983894, "learning_rate": 1.7444219411051598e-05, "loss": 0.7684, "step": 5434 }, { "epoch": 0.7674385766732561, "grad_norm": 4.518475474337838, "learning_rate": 1.7443201432836874e-05, "loss": 1.0505, "step": 5435 }, { "epoch": 0.767579779723242, "grad_norm": 3.7840102976092695, "learning_rate": 1.744218328164536e-05, "loss": 1.0776, "step": 5436 }, { "epoch": 0.7677209827732279, "grad_norm": 2.9856873075148673, "learning_rate": 1.7441164957500728e-05, "loss": 0.9342, "step": 5437 }, { "epoch": 0.7678621858232137, "grad_norm": 3.1755142624079937, "learning_rate": 1.744014646042663e-05, "loss": 0.9841, "step": 5438 }, { "epoch": 0.7680033888731996, "grad_norm": 3.345074315074015, "learning_rate": 1.7439127790446743e-05, "loss": 0.876, "step": 5439 }, { "epoch": 0.7681445919231855, "grad_norm": 3.751632042020778, "learning_rate": 1.7438108947584737e-05, "loss": 1.0794, "step": 5440 }, { "epoch": 0.7682857949731714, "grad_norm": 3.000512645335439, "learning_rate": 1.7437089931864292e-05, "loss": 0.8496, "step": 5441 }, { "epoch": 0.7684269980231573, "grad_norm": 4.340099870446398, "learning_rate": 1.7436070743309093e-05, "loss": 1.2383, "step": 5442 }, { "epoch": 0.7685682010731432, "grad_norm": 4.187557147082218, "learning_rate": 1.7435051381942817e-05, "loss": 1.243, "step": 5443 }, { "epoch": 0.768709404123129, "grad_norm": 3.7940743329894837, "learning_rate": 1.743403184778916e-05, "loss": 0.9338, "step": 5444 }, { "epoch": 0.7688506071731149, "grad_norm": 3.7226482026615093, "learning_rate": 1.743301214087181e-05, "loss": 1.0496, "step": 5445 }, { "epoch": 0.7689918102231008, "grad_norm": 4.007191339929926, "learning_rate": 1.743199226121447e-05, "loss": 1.0488, "step": 5446 }, { "epoch": 0.7691330132730867, "grad_norm": 3.024329206614843, "learning_rate": 1.743097220884084e-05, "loss": 0.8295, "step": 5447 }, { "epoch": 0.7692742163230726, "grad_norm": 3.6449986142668096, "learning_rate": 1.7429951983774626e-05, "loss": 0.8466, "step": 5448 }, { "epoch": 0.7694154193730585, "grad_norm": 3.2101255872679757, "learning_rate": 1.7428931586039538e-05, "loss": 1.111, "step": 5449 }, { "epoch": 0.7695566224230443, "grad_norm": 3.313580892439186, "learning_rate": 1.742791101565928e-05, "loss": 0.9577, "step": 5450 }, { "epoch": 0.7696978254730302, "grad_norm": 3.505627211405446, "learning_rate": 1.7426890272657585e-05, "loss": 1.0076, "step": 5451 }, { "epoch": 0.7698390285230161, "grad_norm": 3.0838992554599916, "learning_rate": 1.7425869357058167e-05, "loss": 1.0761, "step": 5452 }, { "epoch": 0.769980231573002, "grad_norm": 3.595019344386333, "learning_rate": 1.7424848268884752e-05, "loss": 1.1028, "step": 5453 }, { "epoch": 0.7701214346229879, "grad_norm": 3.6288427831493277, "learning_rate": 1.742382700816107e-05, "loss": 1.1409, "step": 5454 }, { "epoch": 0.7702626376729738, "grad_norm": 3.3321765766156926, "learning_rate": 1.7422805574910856e-05, "loss": 0.93, "step": 5455 }, { "epoch": 0.7704038407229596, "grad_norm": 3.464115670158806, "learning_rate": 1.742178396915784e-05, "loss": 1.0121, "step": 5456 }, { "epoch": 0.7705450437729455, "grad_norm": 4.224355370678105, "learning_rate": 1.7420762190925774e-05, "loss": 1.1307, "step": 5457 }, { "epoch": 0.7706862468229314, "grad_norm": 3.5041705622031567, "learning_rate": 1.74197402402384e-05, "loss": 1.0237, "step": 5458 }, { "epoch": 0.7708274498729173, "grad_norm": 2.6948047971716114, "learning_rate": 1.7418718117119465e-05, "loss": 0.7643, "step": 5459 }, { "epoch": 0.7709686529229032, "grad_norm": 2.9406067347511216, "learning_rate": 1.7417695821592727e-05, "loss": 0.86, "step": 5460 }, { "epoch": 0.7711098559728891, "grad_norm": 3.351453901775548, "learning_rate": 1.741667335368194e-05, "loss": 1.0129, "step": 5461 }, { "epoch": 0.7712510590228749, "grad_norm": 4.208230882773419, "learning_rate": 1.7415650713410867e-05, "loss": 1.1629, "step": 5462 }, { "epoch": 0.7713922620728608, "grad_norm": 4.019132949998647, "learning_rate": 1.7414627900803274e-05, "loss": 1.1561, "step": 5463 }, { "epoch": 0.7715334651228467, "grad_norm": 4.247148358457009, "learning_rate": 1.7413604915882932e-05, "loss": 0.9532, "step": 5464 }, { "epoch": 0.7716746681728325, "grad_norm": 3.569117274691089, "learning_rate": 1.7412581758673612e-05, "loss": 0.9867, "step": 5465 }, { "epoch": 0.7718158712228184, "grad_norm": 3.1162766539873985, "learning_rate": 1.7411558429199095e-05, "loss": 0.9167, "step": 5466 }, { "epoch": 0.7719570742728042, "grad_norm": 5.46008178870321, "learning_rate": 1.741053492748316e-05, "loss": 1.2995, "step": 5467 }, { "epoch": 0.7720982773227901, "grad_norm": 3.2082701757948655, "learning_rate": 1.7409511253549592e-05, "loss": 0.8864, "step": 5468 }, { "epoch": 0.772239480372776, "grad_norm": 2.8525684895234895, "learning_rate": 1.7408487407422186e-05, "loss": 0.7714, "step": 5469 }, { "epoch": 0.7723806834227619, "grad_norm": 3.6576185486850905, "learning_rate": 1.7407463389124728e-05, "loss": 1.0568, "step": 5470 }, { "epoch": 0.7725218864727478, "grad_norm": 4.286791900303226, "learning_rate": 1.7406439198681024e-05, "loss": 1.1446, "step": 5471 }, { "epoch": 0.7726630895227337, "grad_norm": 3.1759813703188375, "learning_rate": 1.7405414836114868e-05, "loss": 0.8352, "step": 5472 }, { "epoch": 0.7728042925727195, "grad_norm": 2.6397627933379724, "learning_rate": 1.740439030145007e-05, "loss": 0.7177, "step": 5473 }, { "epoch": 0.7729454956227054, "grad_norm": 4.462811800151406, "learning_rate": 1.740336559471044e-05, "loss": 1.0938, "step": 5474 }, { "epoch": 0.7730866986726913, "grad_norm": 3.95150236802696, "learning_rate": 1.7402340715919793e-05, "loss": 1.0352, "step": 5475 }, { "epoch": 0.7732279017226772, "grad_norm": 3.273378117230015, "learning_rate": 1.7401315665101942e-05, "loss": 1.0301, "step": 5476 }, { "epoch": 0.7733691047726631, "grad_norm": 5.649363911289255, "learning_rate": 1.740029044228071e-05, "loss": 1.0424, "step": 5477 }, { "epoch": 0.773510307822649, "grad_norm": 3.7934081582467614, "learning_rate": 1.7399265047479926e-05, "loss": 1.1031, "step": 5478 }, { "epoch": 0.7736515108726348, "grad_norm": 3.1056314494622077, "learning_rate": 1.739823948072342e-05, "loss": 0.8952, "step": 5479 }, { "epoch": 0.7737927139226207, "grad_norm": 3.026185934311214, "learning_rate": 1.739721374203502e-05, "loss": 0.9753, "step": 5480 }, { "epoch": 0.7739339169726066, "grad_norm": 3.8915820198743676, "learning_rate": 1.7396187831438568e-05, "loss": 1.2617, "step": 5481 }, { "epoch": 0.7740751200225925, "grad_norm": 3.4740854816840283, "learning_rate": 1.7395161748957905e-05, "loss": 0.9645, "step": 5482 }, { "epoch": 0.7742163230725784, "grad_norm": 3.629940180428351, "learning_rate": 1.7394135494616876e-05, "loss": 1.1941, "step": 5483 }, { "epoch": 0.7743575261225643, "grad_norm": 3.574778499354198, "learning_rate": 1.7393109068439336e-05, "loss": 1.1333, "step": 5484 }, { "epoch": 0.7744987291725501, "grad_norm": 3.9754269387608394, "learning_rate": 1.739208247044913e-05, "loss": 1.0417, "step": 5485 }, { "epoch": 0.774639932222536, "grad_norm": 3.701183823612949, "learning_rate": 1.739105570067012e-05, "loss": 0.9895, "step": 5486 }, { "epoch": 0.7747811352725219, "grad_norm": 3.3709946006546323, "learning_rate": 1.7390028759126165e-05, "loss": 0.8507, "step": 5487 }, { "epoch": 0.7749223383225078, "grad_norm": 3.8490580919742188, "learning_rate": 1.7389001645841137e-05, "loss": 1.2366, "step": 5488 }, { "epoch": 0.7750635413724937, "grad_norm": 3.251808414210018, "learning_rate": 1.73879743608389e-05, "loss": 0.7669, "step": 5489 }, { "epoch": 0.7752047444224796, "grad_norm": 3.927139522898013, "learning_rate": 1.738694690414333e-05, "loss": 1.0259, "step": 5490 }, { "epoch": 0.7753459474724654, "grad_norm": 3.748144603127959, "learning_rate": 1.7385919275778306e-05, "loss": 1.0627, "step": 5491 }, { "epoch": 0.7754871505224513, "grad_norm": 3.3794624734631613, "learning_rate": 1.7384891475767706e-05, "loss": 0.9415, "step": 5492 }, { "epoch": 0.7756283535724372, "grad_norm": 3.7116643244219176, "learning_rate": 1.7383863504135416e-05, "loss": 0.9309, "step": 5493 }, { "epoch": 0.7757695566224231, "grad_norm": 3.3049839921968474, "learning_rate": 1.738283536090533e-05, "loss": 0.8912, "step": 5494 }, { "epoch": 0.775910759672409, "grad_norm": 3.681256149724283, "learning_rate": 1.7381807046101336e-05, "loss": 0.9897, "step": 5495 }, { "epoch": 0.7760519627223949, "grad_norm": 3.672725583650286, "learning_rate": 1.7380778559747335e-05, "loss": 0.9971, "step": 5496 }, { "epoch": 0.7761931657723807, "grad_norm": 3.3309420464260393, "learning_rate": 1.7379749901867227e-05, "loss": 0.9425, "step": 5497 }, { "epoch": 0.7763343688223666, "grad_norm": 3.311376863797063, "learning_rate": 1.7378721072484923e-05, "loss": 0.9884, "step": 5498 }, { "epoch": 0.7764755718723524, "grad_norm": 3.717433860806013, "learning_rate": 1.7377692071624323e-05, "loss": 1.0926, "step": 5499 }, { "epoch": 0.7766167749223383, "grad_norm": 3.4785140139755706, "learning_rate": 1.7376662899309346e-05, "loss": 1.0404, "step": 5500 }, { "epoch": 0.7767579779723242, "grad_norm": 4.332064995389705, "learning_rate": 1.737563355556391e-05, "loss": 1.1124, "step": 5501 }, { "epoch": 0.77689918102231, "grad_norm": 4.2221848129915704, "learning_rate": 1.7374604040411934e-05, "loss": 1.3102, "step": 5502 }, { "epoch": 0.7770403840722959, "grad_norm": 3.8195297291987615, "learning_rate": 1.7373574353877346e-05, "loss": 1.0377, "step": 5503 }, { "epoch": 0.7771815871222818, "grad_norm": 3.6650804155583843, "learning_rate": 1.7372544495984076e-05, "loss": 1.2207, "step": 5504 }, { "epoch": 0.7773227901722677, "grad_norm": 3.552013961931948, "learning_rate": 1.7371514466756055e-05, "loss": 1.029, "step": 5505 }, { "epoch": 0.7774639932222536, "grad_norm": 4.007375262694977, "learning_rate": 1.7370484266217223e-05, "loss": 1.0972, "step": 5506 }, { "epoch": 0.7776051962722395, "grad_norm": 3.9234146059004296, "learning_rate": 1.7369453894391513e-05, "loss": 1.0395, "step": 5507 }, { "epoch": 0.7777463993222253, "grad_norm": 3.3864143224196317, "learning_rate": 1.7368423351302884e-05, "loss": 0.9433, "step": 5508 }, { "epoch": 0.7778876023722112, "grad_norm": 3.4138152270609163, "learning_rate": 1.7367392636975275e-05, "loss": 0.9627, "step": 5509 }, { "epoch": 0.7780288054221971, "grad_norm": 5.298940634031672, "learning_rate": 1.7366361751432645e-05, "loss": 1.2785, "step": 5510 }, { "epoch": 0.778170008472183, "grad_norm": 3.450417246640402, "learning_rate": 1.7365330694698947e-05, "loss": 0.907, "step": 5511 }, { "epoch": 0.7783112115221689, "grad_norm": 3.612703100238923, "learning_rate": 1.7364299466798146e-05, "loss": 1.1738, "step": 5512 }, { "epoch": 0.7784524145721547, "grad_norm": 3.396398651059966, "learning_rate": 1.7363268067754205e-05, "loss": 0.9299, "step": 5513 }, { "epoch": 0.7785936176221406, "grad_norm": 4.312523998912026, "learning_rate": 1.7362236497591097e-05, "loss": 0.9635, "step": 5514 }, { "epoch": 0.7787348206721265, "grad_norm": 4.188675861491466, "learning_rate": 1.7361204756332788e-05, "loss": 1.0595, "step": 5515 }, { "epoch": 0.7788760237221124, "grad_norm": 3.6056031724780397, "learning_rate": 1.7360172844003263e-05, "loss": 0.972, "step": 5516 }, { "epoch": 0.7790172267720983, "grad_norm": 3.6793624464795034, "learning_rate": 1.7359140760626497e-05, "loss": 1.188, "step": 5517 }, { "epoch": 0.7791584298220842, "grad_norm": 4.366159804164966, "learning_rate": 1.7358108506226477e-05, "loss": 0.9186, "step": 5518 }, { "epoch": 0.77929963287207, "grad_norm": 3.0893745697555515, "learning_rate": 1.7357076080827195e-05, "loss": 0.8816, "step": 5519 }, { "epoch": 0.7794408359220559, "grad_norm": 3.030196049102594, "learning_rate": 1.7356043484452643e-05, "loss": 0.7042, "step": 5520 }, { "epoch": 0.7795820389720418, "grad_norm": 3.7040992261152614, "learning_rate": 1.7355010717126817e-05, "loss": 1.0734, "step": 5521 }, { "epoch": 0.7797232420220277, "grad_norm": 4.264404868495973, "learning_rate": 1.7353977778873718e-05, "loss": 1.1105, "step": 5522 }, { "epoch": 0.7798644450720136, "grad_norm": 4.357580447683498, "learning_rate": 1.7352944669717352e-05, "loss": 1.2296, "step": 5523 }, { "epoch": 0.7800056481219995, "grad_norm": 3.063175072050123, "learning_rate": 1.7351911389681725e-05, "loss": 0.8829, "step": 5524 }, { "epoch": 0.7801468511719853, "grad_norm": 3.9241556927208645, "learning_rate": 1.7350877938790855e-05, "loss": 1.1596, "step": 5525 }, { "epoch": 0.7802880542219712, "grad_norm": 3.5182196371223218, "learning_rate": 1.7349844317068754e-05, "loss": 0.9067, "step": 5526 }, { "epoch": 0.7804292572719571, "grad_norm": 3.586726406258281, "learning_rate": 1.7348810524539447e-05, "loss": 0.9022, "step": 5527 }, { "epoch": 0.780570460321943, "grad_norm": 3.8174445189897477, "learning_rate": 1.7347776561226956e-05, "loss": 1.0311, "step": 5528 }, { "epoch": 0.7807116633719289, "grad_norm": 3.4182166002878267, "learning_rate": 1.734674242715531e-05, "loss": 0.9857, "step": 5529 }, { "epoch": 0.7808528664219148, "grad_norm": 2.9570319719723925, "learning_rate": 1.7345708122348543e-05, "loss": 0.803, "step": 5530 }, { "epoch": 0.7809940694719006, "grad_norm": 4.15145507734481, "learning_rate": 1.7344673646830696e-05, "loss": 1.3433, "step": 5531 }, { "epoch": 0.7811352725218865, "grad_norm": 3.7864881117714413, "learning_rate": 1.73436390006258e-05, "loss": 0.9908, "step": 5532 }, { "epoch": 0.7812764755718723, "grad_norm": 3.5576658542729933, "learning_rate": 1.734260418375791e-05, "loss": 1.0185, "step": 5533 }, { "epoch": 0.7814176786218582, "grad_norm": 3.7861328871430175, "learning_rate": 1.7341569196251065e-05, "loss": 1.0762, "step": 5534 }, { "epoch": 0.7815588816718441, "grad_norm": 3.955852146748055, "learning_rate": 1.7340534038129324e-05, "loss": 1.1641, "step": 5535 }, { "epoch": 0.78170008472183, "grad_norm": 3.514410541832739, "learning_rate": 1.7339498709416744e-05, "loss": 1.08, "step": 5536 }, { "epoch": 0.7818412877718158, "grad_norm": 3.7785978993521465, "learning_rate": 1.733846321013738e-05, "loss": 0.9411, "step": 5537 }, { "epoch": 0.7819824908218017, "grad_norm": 3.680362821417703, "learning_rate": 1.7337427540315305e-05, "loss": 1.1397, "step": 5538 }, { "epoch": 0.7821236938717876, "grad_norm": 3.172021116652061, "learning_rate": 1.733639169997458e-05, "loss": 0.9694, "step": 5539 }, { "epoch": 0.7822648969217735, "grad_norm": 3.2258629903312883, "learning_rate": 1.733535568913928e-05, "loss": 0.9221, "step": 5540 }, { "epoch": 0.7824060999717594, "grad_norm": 4.3784446453653185, "learning_rate": 1.733431950783348e-05, "loss": 1.0239, "step": 5541 }, { "epoch": 0.7825473030217452, "grad_norm": 3.743959887502583, "learning_rate": 1.7333283156081266e-05, "loss": 0.9902, "step": 5542 }, { "epoch": 0.7826885060717311, "grad_norm": 3.225849988433045, "learning_rate": 1.7332246633906717e-05, "loss": 0.8535, "step": 5543 }, { "epoch": 0.782829709121717, "grad_norm": 3.423264695366045, "learning_rate": 1.733120994133392e-05, "loss": 0.8664, "step": 5544 }, { "epoch": 0.7829709121717029, "grad_norm": 3.0580152853401135, "learning_rate": 1.7330173078386975e-05, "loss": 0.9779, "step": 5545 }, { "epoch": 0.7831121152216888, "grad_norm": 4.930223593609097, "learning_rate": 1.732913604508997e-05, "loss": 1.2184, "step": 5546 }, { "epoch": 0.7832533182716747, "grad_norm": 4.649803431847359, "learning_rate": 1.7328098841467008e-05, "loss": 1.0283, "step": 5547 }, { "epoch": 0.7833945213216605, "grad_norm": 3.193176297270458, "learning_rate": 1.73270614675422e-05, "loss": 0.8722, "step": 5548 }, { "epoch": 0.7835357243716464, "grad_norm": 2.9981894439731076, "learning_rate": 1.732602392333964e-05, "loss": 0.8468, "step": 5549 }, { "epoch": 0.7836769274216323, "grad_norm": 3.7753569711488444, "learning_rate": 1.732498620888345e-05, "loss": 1.1134, "step": 5550 }, { "epoch": 0.7838181304716182, "grad_norm": 4.385082287075697, "learning_rate": 1.7323948324197747e-05, "loss": 1.1967, "step": 5551 }, { "epoch": 0.7839593335216041, "grad_norm": 3.7639666428052667, "learning_rate": 1.7322910269306645e-05, "loss": 1.1527, "step": 5552 }, { "epoch": 0.78410053657159, "grad_norm": 3.175049290044388, "learning_rate": 1.732187204423427e-05, "loss": 0.8871, "step": 5553 }, { "epoch": 0.7842417396215758, "grad_norm": 3.383394788299492, "learning_rate": 1.7320833649004754e-05, "loss": 1.0735, "step": 5554 }, { "epoch": 0.7843829426715617, "grad_norm": 3.0986046529606424, "learning_rate": 1.7319795083642223e-05, "loss": 0.8646, "step": 5555 }, { "epoch": 0.7845241457215476, "grad_norm": 4.0506815446207565, "learning_rate": 1.7318756348170817e-05, "loss": 1.1784, "step": 5556 }, { "epoch": 0.7846653487715335, "grad_norm": 3.5914037911705186, "learning_rate": 1.7317717442614673e-05, "loss": 1.1047, "step": 5557 }, { "epoch": 0.7848065518215194, "grad_norm": 3.424726557642637, "learning_rate": 1.7316678366997935e-05, "loss": 0.9891, "step": 5558 }, { "epoch": 0.7849477548715053, "grad_norm": 3.0875944220221303, "learning_rate": 1.7315639121344755e-05, "loss": 0.857, "step": 5559 }, { "epoch": 0.7850889579214911, "grad_norm": 3.238190377724806, "learning_rate": 1.731459970567928e-05, "loss": 0.9024, "step": 5560 }, { "epoch": 0.785230160971477, "grad_norm": 3.1130475442057364, "learning_rate": 1.7313560120025667e-05, "loss": 0.9403, "step": 5561 }, { "epoch": 0.7853713640214629, "grad_norm": 3.2170217222508013, "learning_rate": 1.731252036440807e-05, "loss": 0.9293, "step": 5562 }, { "epoch": 0.7855125670714488, "grad_norm": 3.3374823425453752, "learning_rate": 1.7311480438850664e-05, "loss": 0.9483, "step": 5563 }, { "epoch": 0.7856537701214347, "grad_norm": 3.266927980915045, "learning_rate": 1.7310440343377608e-05, "loss": 0.9346, "step": 5564 }, { "epoch": 0.7857949731714206, "grad_norm": 3.7127208274997727, "learning_rate": 1.7309400078013077e-05, "loss": 1.3239, "step": 5565 }, { "epoch": 0.7859361762214064, "grad_norm": 3.6147743841381796, "learning_rate": 1.730835964278124e-05, "loss": 1.0566, "step": 5566 }, { "epoch": 0.7860773792713922, "grad_norm": 3.316404727212875, "learning_rate": 1.7307319037706286e-05, "loss": 0.9942, "step": 5567 }, { "epoch": 0.7862185823213781, "grad_norm": 2.9725723729494695, "learning_rate": 1.7306278262812393e-05, "loss": 0.8373, "step": 5568 }, { "epoch": 0.786359785371364, "grad_norm": 3.2510958108970844, "learning_rate": 1.7305237318123748e-05, "loss": 1.1214, "step": 5569 }, { "epoch": 0.7865009884213499, "grad_norm": 3.213887722487075, "learning_rate": 1.7304196203664544e-05, "loss": 1.0394, "step": 5570 }, { "epoch": 0.7866421914713357, "grad_norm": 2.7703904675542472, "learning_rate": 1.7303154919458972e-05, "loss": 0.8856, "step": 5571 }, { "epoch": 0.7867833945213216, "grad_norm": 4.089659752634055, "learning_rate": 1.7302113465531233e-05, "loss": 1.0658, "step": 5572 }, { "epoch": 0.7869245975713075, "grad_norm": 3.5849388316931887, "learning_rate": 1.7301071841905535e-05, "loss": 0.9173, "step": 5573 }, { "epoch": 0.7870658006212934, "grad_norm": 4.092817944480415, "learning_rate": 1.7300030048606077e-05, "loss": 0.8767, "step": 5574 }, { "epoch": 0.7872070036712793, "grad_norm": 4.039515390228647, "learning_rate": 1.7298988085657073e-05, "loss": 1.1206, "step": 5575 }, { "epoch": 0.7873482067212652, "grad_norm": 3.2167899777118953, "learning_rate": 1.729794595308274e-05, "loss": 0.9047, "step": 5576 }, { "epoch": 0.787489409771251, "grad_norm": 3.375398191699207, "learning_rate": 1.729690365090729e-05, "loss": 1.0103, "step": 5577 }, { "epoch": 0.7876306128212369, "grad_norm": 3.979955602085848, "learning_rate": 1.7295861179154954e-05, "loss": 1.0797, "step": 5578 }, { "epoch": 0.7877718158712228, "grad_norm": 3.9673889872253842, "learning_rate": 1.729481853784996e-05, "loss": 1.1959, "step": 5579 }, { "epoch": 0.7879130189212087, "grad_norm": 3.194145157451402, "learning_rate": 1.729377572701653e-05, "loss": 0.8583, "step": 5580 }, { "epoch": 0.7880542219711946, "grad_norm": 4.618724150636717, "learning_rate": 1.7292732746678898e-05, "loss": 1.3627, "step": 5581 }, { "epoch": 0.7881954250211805, "grad_norm": 3.5027224212787846, "learning_rate": 1.729168959686131e-05, "loss": 1.0713, "step": 5582 }, { "epoch": 0.7883366280711663, "grad_norm": 3.617407610343722, "learning_rate": 1.7290646277588004e-05, "loss": 1.0417, "step": 5583 }, { "epoch": 0.7884778311211522, "grad_norm": 3.414663293788455, "learning_rate": 1.7289602788883227e-05, "loss": 0.8317, "step": 5584 }, { "epoch": 0.7886190341711381, "grad_norm": 3.354513127406612, "learning_rate": 1.728855913077123e-05, "loss": 0.9047, "step": 5585 }, { "epoch": 0.788760237221124, "grad_norm": 3.614118585685528, "learning_rate": 1.728751530327627e-05, "loss": 1.3129, "step": 5586 }, { "epoch": 0.7889014402711099, "grad_norm": 3.6826238586648383, "learning_rate": 1.7286471306422594e-05, "loss": 0.8989, "step": 5587 }, { "epoch": 0.7890426433210957, "grad_norm": 3.328047564233775, "learning_rate": 1.7285427140234476e-05, "loss": 0.8334, "step": 5588 }, { "epoch": 0.7891838463710816, "grad_norm": 3.8771571349971143, "learning_rate": 1.7284382804736178e-05, "loss": 1.1559, "step": 5589 }, { "epoch": 0.7893250494210675, "grad_norm": 3.7381213192988447, "learning_rate": 1.728333829995197e-05, "loss": 1.1182, "step": 5590 }, { "epoch": 0.7894662524710534, "grad_norm": 3.7583253552563036, "learning_rate": 1.7282293625906123e-05, "loss": 1.0599, "step": 5591 }, { "epoch": 0.7896074555210393, "grad_norm": 3.4245662368654277, "learning_rate": 1.7281248782622916e-05, "loss": 0.9355, "step": 5592 }, { "epoch": 0.7897486585710252, "grad_norm": 3.815138867480394, "learning_rate": 1.7280203770126634e-05, "loss": 1.114, "step": 5593 }, { "epoch": 0.789889861621011, "grad_norm": 3.2577437247835865, "learning_rate": 1.7279158588441558e-05, "loss": 1.0135, "step": 5594 }, { "epoch": 0.7900310646709969, "grad_norm": 4.249401455340824, "learning_rate": 1.7278113237591985e-05, "loss": 0.9072, "step": 5595 }, { "epoch": 0.7901722677209828, "grad_norm": 3.8572822756403586, "learning_rate": 1.7277067717602197e-05, "loss": 1.1375, "step": 5596 }, { "epoch": 0.7903134707709687, "grad_norm": 3.1692793142160265, "learning_rate": 1.7276022028496505e-05, "loss": 0.7544, "step": 5597 }, { "epoch": 0.7904546738209546, "grad_norm": 3.6550327120836466, "learning_rate": 1.7274976170299197e-05, "loss": 0.9915, "step": 5598 }, { "epoch": 0.7905958768709405, "grad_norm": 4.551089517885977, "learning_rate": 1.727393014303459e-05, "loss": 1.0764, "step": 5599 }, { "epoch": 0.7907370799209263, "grad_norm": 2.8715677140076714, "learning_rate": 1.7272883946726986e-05, "loss": 0.8208, "step": 5600 }, { "epoch": 0.7908782829709121, "grad_norm": 3.6483004810001427, "learning_rate": 1.72718375814007e-05, "loss": 0.9627, "step": 5601 }, { "epoch": 0.791019486020898, "grad_norm": 3.6195621959955124, "learning_rate": 1.727079104708005e-05, "loss": 1.3154, "step": 5602 }, { "epoch": 0.7911606890708839, "grad_norm": 3.562321564611492, "learning_rate": 1.7269744343789354e-05, "loss": 0.6574, "step": 5603 }, { "epoch": 0.7913018921208698, "grad_norm": 4.176793378568638, "learning_rate": 1.7268697471552937e-05, "loss": 1.2392, "step": 5604 }, { "epoch": 0.7914430951708556, "grad_norm": 3.0482972944825506, "learning_rate": 1.7267650430395134e-05, "loss": 0.8102, "step": 5605 }, { "epoch": 0.7915842982208415, "grad_norm": 3.5540292381404504, "learning_rate": 1.7266603220340273e-05, "loss": 0.9388, "step": 5606 }, { "epoch": 0.7917255012708274, "grad_norm": 2.959431400209971, "learning_rate": 1.726555584141269e-05, "loss": 0.8839, "step": 5607 }, { "epoch": 0.7918667043208133, "grad_norm": 3.3973481605813642, "learning_rate": 1.7264508293636726e-05, "loss": 0.9087, "step": 5608 }, { "epoch": 0.7920079073707992, "grad_norm": 3.52378967886429, "learning_rate": 1.726346057703673e-05, "loss": 1.0837, "step": 5609 }, { "epoch": 0.7921491104207851, "grad_norm": 3.559858126373494, "learning_rate": 1.7262412691637044e-05, "loss": 0.9548, "step": 5610 }, { "epoch": 0.792290313470771, "grad_norm": 3.5478044614050295, "learning_rate": 1.7261364637462026e-05, "loss": 1.2252, "step": 5611 }, { "epoch": 0.7924315165207568, "grad_norm": 3.1313948131891074, "learning_rate": 1.7260316414536026e-05, "loss": 0.8664, "step": 5612 }, { "epoch": 0.7925727195707427, "grad_norm": 3.345390008898074, "learning_rate": 1.725926802288341e-05, "loss": 0.9052, "step": 5613 }, { "epoch": 0.7927139226207286, "grad_norm": 3.6354923570935886, "learning_rate": 1.7258219462528543e-05, "loss": 1.059, "step": 5614 }, { "epoch": 0.7928551256707145, "grad_norm": 4.028224192322754, "learning_rate": 1.7257170733495786e-05, "loss": 1.2128, "step": 5615 }, { "epoch": 0.7929963287207004, "grad_norm": 3.8016071756978715, "learning_rate": 1.725612183580952e-05, "loss": 0.9815, "step": 5616 }, { "epoch": 0.7931375317706862, "grad_norm": 3.7766241341128053, "learning_rate": 1.725507276949411e-05, "loss": 1.2226, "step": 5617 }, { "epoch": 0.7932787348206721, "grad_norm": 3.293212639459275, "learning_rate": 1.7254023534573946e-05, "loss": 1.0946, "step": 5618 }, { "epoch": 0.793419937870658, "grad_norm": 3.8491280066730287, "learning_rate": 1.7252974131073407e-05, "loss": 1.1051, "step": 5619 }, { "epoch": 0.7935611409206439, "grad_norm": 4.397739877632676, "learning_rate": 1.7251924559016885e-05, "loss": 1.1929, "step": 5620 }, { "epoch": 0.7937023439706298, "grad_norm": 3.213885060430707, "learning_rate": 1.7250874818428763e-05, "loss": 0.9268, "step": 5621 }, { "epoch": 0.7938435470206157, "grad_norm": 3.426073763377816, "learning_rate": 1.7249824909333445e-05, "loss": 0.9959, "step": 5622 }, { "epoch": 0.7939847500706015, "grad_norm": 4.23477758167533, "learning_rate": 1.7248774831755324e-05, "loss": 1.1895, "step": 5623 }, { "epoch": 0.7941259531205874, "grad_norm": 3.9337431366398428, "learning_rate": 1.7247724585718807e-05, "loss": 1.2488, "step": 5624 }, { "epoch": 0.7942671561705733, "grad_norm": 3.384446178218081, "learning_rate": 1.7246674171248304e-05, "loss": 1.2503, "step": 5625 }, { "epoch": 0.7944083592205592, "grad_norm": 2.766893726732559, "learning_rate": 1.7245623588368217e-05, "loss": 0.8349, "step": 5626 }, { "epoch": 0.7945495622705451, "grad_norm": 4.462461431382719, "learning_rate": 1.7244572837102974e-05, "loss": 1.3379, "step": 5627 }, { "epoch": 0.794690765320531, "grad_norm": 2.958669721434582, "learning_rate": 1.7243521917476984e-05, "loss": 0.8713, "step": 5628 }, { "epoch": 0.7948319683705168, "grad_norm": 3.2921073505695304, "learning_rate": 1.7242470829514674e-05, "loss": 0.9114, "step": 5629 }, { "epoch": 0.7949731714205027, "grad_norm": 4.045780646736548, "learning_rate": 1.7241419573240463e-05, "loss": 1.2129, "step": 5630 }, { "epoch": 0.7951143744704886, "grad_norm": 3.6477694825745983, "learning_rate": 1.7240368148678793e-05, "loss": 1.1109, "step": 5631 }, { "epoch": 0.7952555775204745, "grad_norm": 3.3786596971451597, "learning_rate": 1.7239316555854096e-05, "loss": 0.9392, "step": 5632 }, { "epoch": 0.7953967805704604, "grad_norm": 4.218556220562897, "learning_rate": 1.723826479479081e-05, "loss": 0.8799, "step": 5633 }, { "epoch": 0.7955379836204463, "grad_norm": 4.3397071483583884, "learning_rate": 1.723721286551337e-05, "loss": 1.0307, "step": 5634 }, { "epoch": 0.795679186670432, "grad_norm": 2.988290376687753, "learning_rate": 1.7236160768046234e-05, "loss": 0.8105, "step": 5635 }, { "epoch": 0.7958203897204179, "grad_norm": 4.108123455540468, "learning_rate": 1.7235108502413844e-05, "loss": 0.9541, "step": 5636 }, { "epoch": 0.7959615927704038, "grad_norm": 3.2496570351757277, "learning_rate": 1.7234056068640658e-05, "loss": 1.0448, "step": 5637 }, { "epoch": 0.7961027958203897, "grad_norm": 3.1702481861816754, "learning_rate": 1.7233003466751133e-05, "loss": 0.7888, "step": 5638 }, { "epoch": 0.7962439988703756, "grad_norm": 3.696681412077238, "learning_rate": 1.7231950696769733e-05, "loss": 0.9871, "step": 5639 }, { "epoch": 0.7963852019203614, "grad_norm": 3.3361634641335605, "learning_rate": 1.7230897758720916e-05, "loss": 0.9872, "step": 5640 }, { "epoch": 0.7965264049703473, "grad_norm": 3.2023248677367007, "learning_rate": 1.722984465262916e-05, "loss": 0.8924, "step": 5641 }, { "epoch": 0.7966676080203332, "grad_norm": 3.9216539566302124, "learning_rate": 1.722879137851894e-05, "loss": 0.9518, "step": 5642 }, { "epoch": 0.7968088110703191, "grad_norm": 2.8873382949743056, "learning_rate": 1.7227737936414733e-05, "loss": 0.9408, "step": 5643 }, { "epoch": 0.796950014120305, "grad_norm": 3.616981459946964, "learning_rate": 1.7226684326341014e-05, "loss": 0.8917, "step": 5644 }, { "epoch": 0.7970912171702909, "grad_norm": 3.2696074315812793, "learning_rate": 1.722563054832227e-05, "loss": 1.0187, "step": 5645 }, { "epoch": 0.7972324202202767, "grad_norm": 3.2816344178091335, "learning_rate": 1.7224576602382993e-05, "loss": 0.9796, "step": 5646 }, { "epoch": 0.7973736232702626, "grad_norm": 3.4429876637984056, "learning_rate": 1.7223522488547678e-05, "loss": 1.0807, "step": 5647 }, { "epoch": 0.7975148263202485, "grad_norm": 2.981552798263127, "learning_rate": 1.722246820684082e-05, "loss": 0.7922, "step": 5648 }, { "epoch": 0.7976560293702344, "grad_norm": 3.116658450966832, "learning_rate": 1.7221413757286916e-05, "loss": 0.8143, "step": 5649 }, { "epoch": 0.7977972324202203, "grad_norm": 3.2953140457742176, "learning_rate": 1.722035913991048e-05, "loss": 0.9233, "step": 5650 }, { "epoch": 0.7979384354702062, "grad_norm": 3.739178375582755, "learning_rate": 1.7219304354736013e-05, "loss": 1.1204, "step": 5651 }, { "epoch": 0.798079638520192, "grad_norm": 3.876233024730047, "learning_rate": 1.7218249401788033e-05, "loss": 1.1663, "step": 5652 }, { "epoch": 0.7982208415701779, "grad_norm": 3.577865630515231, "learning_rate": 1.721719428109105e-05, "loss": 0.8175, "step": 5653 }, { "epoch": 0.7983620446201638, "grad_norm": 3.486035862473454, "learning_rate": 1.7216138992669593e-05, "loss": 1.1233, "step": 5654 }, { "epoch": 0.7985032476701497, "grad_norm": 3.634782829218257, "learning_rate": 1.721508353654818e-05, "loss": 1.1401, "step": 5655 }, { "epoch": 0.7986444507201356, "grad_norm": 3.115458851223582, "learning_rate": 1.7214027912751342e-05, "loss": 0.8681, "step": 5656 }, { "epoch": 0.7987856537701215, "grad_norm": 3.3529372948495695, "learning_rate": 1.721297212130361e-05, "loss": 0.9279, "step": 5657 }, { "epoch": 0.7989268568201073, "grad_norm": 3.259706039622505, "learning_rate": 1.7211916162229524e-05, "loss": 1.1726, "step": 5658 }, { "epoch": 0.7990680598700932, "grad_norm": 3.5016640836178152, "learning_rate": 1.7210860035553617e-05, "loss": 0.9302, "step": 5659 }, { "epoch": 0.7992092629200791, "grad_norm": 4.3452420322325915, "learning_rate": 1.720980374130044e-05, "loss": 0.8945, "step": 5660 }, { "epoch": 0.799350465970065, "grad_norm": 3.42804758024367, "learning_rate": 1.7208747279494535e-05, "loss": 0.988, "step": 5661 }, { "epoch": 0.7994916690200509, "grad_norm": 3.794524794938975, "learning_rate": 1.720769065016046e-05, "loss": 1.0704, "step": 5662 }, { "epoch": 0.7996328720700367, "grad_norm": 3.6253327499766836, "learning_rate": 1.7206633853322766e-05, "loss": 1.0324, "step": 5663 }, { "epoch": 0.7997740751200226, "grad_norm": 3.624499999657056, "learning_rate": 1.720557688900601e-05, "loss": 0.889, "step": 5664 }, { "epoch": 0.7999152781700085, "grad_norm": 2.9038279622004164, "learning_rate": 1.7204519757234763e-05, "loss": 0.8118, "step": 5665 }, { "epoch": 0.8000564812199944, "grad_norm": 3.0475602519073988, "learning_rate": 1.7203462458033586e-05, "loss": 0.8897, "step": 5666 }, { "epoch": 0.8001976842699803, "grad_norm": 3.366591855794279, "learning_rate": 1.7202404991427054e-05, "loss": 0.8562, "step": 5667 }, { "epoch": 0.8003388873199662, "grad_norm": 3.3871864290407667, "learning_rate": 1.720134735743974e-05, "loss": 1.0064, "step": 5668 }, { "epoch": 0.800480090369952, "grad_norm": 3.7088562720499287, "learning_rate": 1.7200289556096224e-05, "loss": 1.071, "step": 5669 }, { "epoch": 0.8006212934199378, "grad_norm": 3.4901118839522223, "learning_rate": 1.7199231587421087e-05, "loss": 1.1796, "step": 5670 }, { "epoch": 0.8007624964699237, "grad_norm": 3.2479831352701716, "learning_rate": 1.719817345143892e-05, "loss": 1.0612, "step": 5671 }, { "epoch": 0.8009036995199096, "grad_norm": 3.299421339600392, "learning_rate": 1.7197115148174305e-05, "loss": 0.9246, "step": 5672 }, { "epoch": 0.8010449025698955, "grad_norm": 3.4681165837982104, "learning_rate": 1.7196056677651846e-05, "loss": 0.9879, "step": 5673 }, { "epoch": 0.8011861056198814, "grad_norm": 3.563022849693697, "learning_rate": 1.7194998039896134e-05, "loss": 1.0384, "step": 5674 }, { "epoch": 0.8013273086698672, "grad_norm": 3.4676801413233345, "learning_rate": 1.719393923493178e-05, "loss": 1.0407, "step": 5675 }, { "epoch": 0.8014685117198531, "grad_norm": 3.3572882118116474, "learning_rate": 1.719288026278338e-05, "loss": 0.7867, "step": 5676 }, { "epoch": 0.801609714769839, "grad_norm": 3.8246798797378734, "learning_rate": 1.7191821123475555e-05, "loss": 1.0487, "step": 5677 }, { "epoch": 0.8017509178198249, "grad_norm": 3.329061192527636, "learning_rate": 1.719076181703291e-05, "loss": 0.7192, "step": 5678 }, { "epoch": 0.8018921208698108, "grad_norm": 3.7891338625566786, "learning_rate": 1.7189702343480067e-05, "loss": 1.0386, "step": 5679 }, { "epoch": 0.8020333239197966, "grad_norm": 3.9969102595485118, "learning_rate": 1.7188642702841643e-05, "loss": 1.2247, "step": 5680 }, { "epoch": 0.8021745269697825, "grad_norm": 3.058334834296358, "learning_rate": 1.7187582895142266e-05, "loss": 0.856, "step": 5681 }, { "epoch": 0.8023157300197684, "grad_norm": 3.9333074466022175, "learning_rate": 1.7186522920406572e-05, "loss": 1.0895, "step": 5682 }, { "epoch": 0.8024569330697543, "grad_norm": 3.89759237703865, "learning_rate": 1.7185462778659187e-05, "loss": 1.1567, "step": 5683 }, { "epoch": 0.8025981361197402, "grad_norm": 4.750843469383844, "learning_rate": 1.7184402469924748e-05, "loss": 1.2414, "step": 5684 }, { "epoch": 0.8027393391697261, "grad_norm": 3.0172101828239355, "learning_rate": 1.71833419942279e-05, "loss": 0.8523, "step": 5685 }, { "epoch": 0.802880542219712, "grad_norm": 4.182358050626544, "learning_rate": 1.7182281351593288e-05, "loss": 1.2095, "step": 5686 }, { "epoch": 0.8030217452696978, "grad_norm": 3.783317136705109, "learning_rate": 1.7181220542045557e-05, "loss": 0.8191, "step": 5687 }, { "epoch": 0.8031629483196837, "grad_norm": 3.91678778156472, "learning_rate": 1.718015956560936e-05, "loss": 0.9489, "step": 5688 }, { "epoch": 0.8033041513696696, "grad_norm": 3.3075166175090662, "learning_rate": 1.717909842230936e-05, "loss": 0.8831, "step": 5689 }, { "epoch": 0.8034453544196555, "grad_norm": 4.017990949078927, "learning_rate": 1.7178037112170213e-05, "loss": 1.0205, "step": 5690 }, { "epoch": 0.8035865574696414, "grad_norm": 3.7930383952931814, "learning_rate": 1.717697563521658e-05, "loss": 1.1714, "step": 5691 }, { "epoch": 0.8037277605196272, "grad_norm": 2.8983915495163775, "learning_rate": 1.7175913991473137e-05, "loss": 0.7216, "step": 5692 }, { "epoch": 0.8038689635696131, "grad_norm": 3.74032180819834, "learning_rate": 1.7174852180964546e-05, "loss": 1.0592, "step": 5693 }, { "epoch": 0.804010166619599, "grad_norm": 3.4109752323304385, "learning_rate": 1.7173790203715494e-05, "loss": 0.8605, "step": 5694 }, { "epoch": 0.8041513696695849, "grad_norm": 3.4763099917619544, "learning_rate": 1.7172728059750655e-05, "loss": 0.8975, "step": 5695 }, { "epoch": 0.8042925727195708, "grad_norm": 3.694446840350058, "learning_rate": 1.7171665749094713e-05, "loss": 1.0667, "step": 5696 }, { "epoch": 0.8044337757695567, "grad_norm": 3.569246880532397, "learning_rate": 1.7170603271772354e-05, "loss": 0.9342, "step": 5697 }, { "epoch": 0.8045749788195425, "grad_norm": 4.028246080786916, "learning_rate": 1.7169540627808276e-05, "loss": 1.1244, "step": 5698 }, { "epoch": 0.8047161818695284, "grad_norm": 3.63315635152293, "learning_rate": 1.7168477817227166e-05, "loss": 0.9091, "step": 5699 }, { "epoch": 0.8048573849195143, "grad_norm": 3.3931946474695924, "learning_rate": 1.716741484005373e-05, "loss": 1.0119, "step": 5700 }, { "epoch": 0.8049985879695002, "grad_norm": 3.0992890855834085, "learning_rate": 1.7166351696312665e-05, "loss": 0.8549, "step": 5701 }, { "epoch": 0.8051397910194861, "grad_norm": 3.4271780881199843, "learning_rate": 1.7165288386028683e-05, "loss": 0.8898, "step": 5702 }, { "epoch": 0.805280994069472, "grad_norm": 4.13970380393167, "learning_rate": 1.7164224909226497e-05, "loss": 1.1469, "step": 5703 }, { "epoch": 0.8054221971194577, "grad_norm": 4.039115144119851, "learning_rate": 1.7163161265930814e-05, "loss": 1.0747, "step": 5704 }, { "epoch": 0.8055634001694436, "grad_norm": 3.3159786008994776, "learning_rate": 1.716209745616636e-05, "loss": 1.0783, "step": 5705 }, { "epoch": 0.8057046032194295, "grad_norm": 3.379172070121552, "learning_rate": 1.716103347995785e-05, "loss": 0.8885, "step": 5706 }, { "epoch": 0.8058458062694154, "grad_norm": 3.777939441472578, "learning_rate": 1.7159969337330018e-05, "loss": 1.2025, "step": 5707 }, { "epoch": 0.8059870093194013, "grad_norm": 3.6344828918316936, "learning_rate": 1.715890502830759e-05, "loss": 0.9674, "step": 5708 }, { "epoch": 0.8061282123693871, "grad_norm": 3.3375923475952436, "learning_rate": 1.71578405529153e-05, "loss": 0.9814, "step": 5709 }, { "epoch": 0.806269415419373, "grad_norm": 3.926671106156196, "learning_rate": 1.7156775911177888e-05, "loss": 1.0768, "step": 5710 }, { "epoch": 0.8064106184693589, "grad_norm": 3.2149902656105436, "learning_rate": 1.715571110312009e-05, "loss": 0.8192, "step": 5711 }, { "epoch": 0.8065518215193448, "grad_norm": 4.063355332553528, "learning_rate": 1.7154646128766663e-05, "loss": 1.1364, "step": 5712 }, { "epoch": 0.8066930245693307, "grad_norm": 3.678149002894032, "learning_rate": 1.7153580988142348e-05, "loss": 1.0788, "step": 5713 }, { "epoch": 0.8068342276193166, "grad_norm": 3.6157646197990516, "learning_rate": 1.7152515681271896e-05, "loss": 1.1353, "step": 5714 }, { "epoch": 0.8069754306693024, "grad_norm": 3.782277933358547, "learning_rate": 1.7151450208180075e-05, "loss": 1.0758, "step": 5715 }, { "epoch": 0.8071166337192883, "grad_norm": 3.6944131199965327, "learning_rate": 1.7150384568891634e-05, "loss": 1.0843, "step": 5716 }, { "epoch": 0.8072578367692742, "grad_norm": 3.629072456929432, "learning_rate": 1.7149318763431345e-05, "loss": 0.9361, "step": 5717 }, { "epoch": 0.8073990398192601, "grad_norm": 3.4966032581795607, "learning_rate": 1.714825279182398e-05, "loss": 0.7638, "step": 5718 }, { "epoch": 0.807540242869246, "grad_norm": 3.3749154071831753, "learning_rate": 1.7147186654094304e-05, "loss": 0.994, "step": 5719 }, { "epoch": 0.8076814459192319, "grad_norm": 3.4403068126410785, "learning_rate": 1.7146120350267094e-05, "loss": 0.9372, "step": 5720 }, { "epoch": 0.8078226489692177, "grad_norm": 3.166200927672278, "learning_rate": 1.7145053880367134e-05, "loss": 1.0126, "step": 5721 }, { "epoch": 0.8079638520192036, "grad_norm": 4.108168600389199, "learning_rate": 1.7143987244419212e-05, "loss": 1.3065, "step": 5722 }, { "epoch": 0.8081050550691895, "grad_norm": 2.8857495921785494, "learning_rate": 1.7142920442448107e-05, "loss": 0.8709, "step": 5723 }, { "epoch": 0.8082462581191754, "grad_norm": 3.9229217804714356, "learning_rate": 1.7141853474478618e-05, "loss": 1.3251, "step": 5724 }, { "epoch": 0.8083874611691613, "grad_norm": 3.735286256421067, "learning_rate": 1.714078634053554e-05, "loss": 1.041, "step": 5725 }, { "epoch": 0.8085286642191472, "grad_norm": 3.4298656520142585, "learning_rate": 1.713971904064367e-05, "loss": 1.0632, "step": 5726 }, { "epoch": 0.808669867269133, "grad_norm": 3.4957065073575695, "learning_rate": 1.713865157482781e-05, "loss": 0.9638, "step": 5727 }, { "epoch": 0.8088110703191189, "grad_norm": 4.335416234677013, "learning_rate": 1.7137583943112776e-05, "loss": 1.2524, "step": 5728 }, { "epoch": 0.8089522733691048, "grad_norm": 4.036844979090874, "learning_rate": 1.713651614552337e-05, "loss": 0.898, "step": 5729 }, { "epoch": 0.8090934764190907, "grad_norm": 3.707318157970143, "learning_rate": 1.713544818208441e-05, "loss": 1.1559, "step": 5730 }, { "epoch": 0.8092346794690766, "grad_norm": 3.476462421570581, "learning_rate": 1.7134380052820715e-05, "loss": 0.9229, "step": 5731 }, { "epoch": 0.8093758825190625, "grad_norm": 3.3862946190682455, "learning_rate": 1.7133311757757112e-05, "loss": 1.0165, "step": 5732 }, { "epoch": 0.8095170855690483, "grad_norm": 3.6055314726454815, "learning_rate": 1.7132243296918424e-05, "loss": 0.9641, "step": 5733 }, { "epoch": 0.8096582886190342, "grad_norm": 4.2205854658364155, "learning_rate": 1.713117467032948e-05, "loss": 1.2135, "step": 5734 }, { "epoch": 0.8097994916690201, "grad_norm": 2.962013328187995, "learning_rate": 1.7130105878015116e-05, "loss": 0.8187, "step": 5735 }, { "epoch": 0.809940694719006, "grad_norm": 3.7191913867899586, "learning_rate": 1.7129036920000174e-05, "loss": 0.9997, "step": 5736 }, { "epoch": 0.8100818977689919, "grad_norm": 3.9766919009778348, "learning_rate": 1.712796779630949e-05, "loss": 1.066, "step": 5737 }, { "epoch": 0.8102231008189776, "grad_norm": 3.1475415636136352, "learning_rate": 1.7126898506967913e-05, "loss": 0.8357, "step": 5738 }, { "epoch": 0.8103643038689635, "grad_norm": 2.9716153190597936, "learning_rate": 1.7125829052000295e-05, "loss": 0.8737, "step": 5739 }, { "epoch": 0.8105055069189494, "grad_norm": 4.22819347811825, "learning_rate": 1.7124759431431485e-05, "loss": 1.1211, "step": 5740 }, { "epoch": 0.8106467099689353, "grad_norm": 4.064594185251849, "learning_rate": 1.712368964528634e-05, "loss": 1.1126, "step": 5741 }, { "epoch": 0.8107879130189212, "grad_norm": 3.253308280245384, "learning_rate": 1.712261969358973e-05, "loss": 1.1477, "step": 5742 }, { "epoch": 0.810929116068907, "grad_norm": 4.174879586133742, "learning_rate": 1.712154957636651e-05, "loss": 0.9996, "step": 5743 }, { "epoch": 0.8110703191188929, "grad_norm": 3.9047707072926405, "learning_rate": 1.7120479293641558e-05, "loss": 1.2572, "step": 5744 }, { "epoch": 0.8112115221688788, "grad_norm": 3.2668279454822895, "learning_rate": 1.7119408845439735e-05, "loss": 0.7538, "step": 5745 }, { "epoch": 0.8113527252188647, "grad_norm": 3.037128670130711, "learning_rate": 1.711833823178593e-05, "loss": 0.8262, "step": 5746 }, { "epoch": 0.8114939282688506, "grad_norm": 3.6367350454383707, "learning_rate": 1.7117267452705018e-05, "loss": 1.1567, "step": 5747 }, { "epoch": 0.8116351313188365, "grad_norm": 3.725139218296415, "learning_rate": 1.7116196508221886e-05, "loss": 1.093, "step": 5748 }, { "epoch": 0.8117763343688223, "grad_norm": 4.062659405042165, "learning_rate": 1.7115125398361418e-05, "loss": 1.114, "step": 5749 }, { "epoch": 0.8119175374188082, "grad_norm": 3.4150592867027734, "learning_rate": 1.7114054123148508e-05, "loss": 0.8329, "step": 5750 }, { "epoch": 0.8120587404687941, "grad_norm": 3.390653966675382, "learning_rate": 1.7112982682608054e-05, "loss": 1.0881, "step": 5751 }, { "epoch": 0.81219994351878, "grad_norm": 4.681011931210241, "learning_rate": 1.7111911076764954e-05, "loss": 1.0354, "step": 5752 }, { "epoch": 0.8123411465687659, "grad_norm": 3.7498116991006936, "learning_rate": 1.7110839305644113e-05, "loss": 1.0779, "step": 5753 }, { "epoch": 0.8124823496187518, "grad_norm": 3.6145229677788935, "learning_rate": 1.7109767369270437e-05, "loss": 1.0916, "step": 5754 }, { "epoch": 0.8126235526687376, "grad_norm": 3.083789066339782, "learning_rate": 1.7108695267668836e-05, "loss": 0.8599, "step": 5755 }, { "epoch": 0.8127647557187235, "grad_norm": 3.2934155765738673, "learning_rate": 1.710762300086423e-05, "loss": 0.9477, "step": 5756 }, { "epoch": 0.8129059587687094, "grad_norm": 3.309566612485001, "learning_rate": 1.710655056888153e-05, "loss": 0.9326, "step": 5757 }, { "epoch": 0.8130471618186953, "grad_norm": 3.3178397032374485, "learning_rate": 1.7105477971745668e-05, "loss": 1.0848, "step": 5758 }, { "epoch": 0.8131883648686812, "grad_norm": 3.600273685717095, "learning_rate": 1.7104405209481563e-05, "loss": 0.9988, "step": 5759 }, { "epoch": 0.8133295679186671, "grad_norm": 3.0057221608906413, "learning_rate": 1.7103332282114156e-05, "loss": 0.7617, "step": 5760 }, { "epoch": 0.813470770968653, "grad_norm": 3.0171600113905495, "learning_rate": 1.7102259189668368e-05, "loss": 0.8799, "step": 5761 }, { "epoch": 0.8136119740186388, "grad_norm": 3.2249493276914887, "learning_rate": 1.7101185932169147e-05, "loss": 0.9947, "step": 5762 }, { "epoch": 0.8137531770686247, "grad_norm": 3.961378646708445, "learning_rate": 1.710011250964143e-05, "loss": 0.9026, "step": 5763 }, { "epoch": 0.8138943801186106, "grad_norm": 3.577776642384495, "learning_rate": 1.7099038922110164e-05, "loss": 0.82, "step": 5764 }, { "epoch": 0.8140355831685965, "grad_norm": 3.0140303574821408, "learning_rate": 1.70979651696003e-05, "loss": 0.8317, "step": 5765 }, { "epoch": 0.8141767862185824, "grad_norm": 3.0322701306926225, "learning_rate": 1.709689125213679e-05, "loss": 1.0097, "step": 5766 }, { "epoch": 0.8143179892685682, "grad_norm": 3.3557782998798014, "learning_rate": 1.7095817169744596e-05, "loss": 0.8828, "step": 5767 }, { "epoch": 0.8144591923185541, "grad_norm": 3.8332590631097845, "learning_rate": 1.7094742922448674e-05, "loss": 1.0529, "step": 5768 }, { "epoch": 0.81460039536854, "grad_norm": 3.921753372221941, "learning_rate": 1.7093668510273987e-05, "loss": 1.3827, "step": 5769 }, { "epoch": 0.8147415984185259, "grad_norm": 3.654490672878271, "learning_rate": 1.7092593933245513e-05, "loss": 1.1379, "step": 5770 }, { "epoch": 0.8148828014685118, "grad_norm": 3.2634412631470946, "learning_rate": 1.7091519191388212e-05, "loss": 0.8773, "step": 5771 }, { "epoch": 0.8150240045184975, "grad_norm": 3.2965078674422728, "learning_rate": 1.709044428472707e-05, "loss": 0.9704, "step": 5772 }, { "epoch": 0.8151652075684834, "grad_norm": 3.2974919908035347, "learning_rate": 1.7089369213287068e-05, "loss": 1.0449, "step": 5773 }, { "epoch": 0.8153064106184693, "grad_norm": 3.4596732877531395, "learning_rate": 1.7088293977093187e-05, "loss": 0.9916, "step": 5774 }, { "epoch": 0.8154476136684552, "grad_norm": 3.158839372751099, "learning_rate": 1.7087218576170412e-05, "loss": 0.854, "step": 5775 }, { "epoch": 0.8155888167184411, "grad_norm": 3.2415502949942274, "learning_rate": 1.7086143010543737e-05, "loss": 0.9604, "step": 5776 }, { "epoch": 0.815730019768427, "grad_norm": 3.604498813630204, "learning_rate": 1.7085067280238164e-05, "loss": 1.2164, "step": 5777 }, { "epoch": 0.8158712228184128, "grad_norm": 3.03545057478487, "learning_rate": 1.7083991385278686e-05, "loss": 0.8376, "step": 5778 }, { "epoch": 0.8160124258683987, "grad_norm": 3.4099815374798936, "learning_rate": 1.7082915325690304e-05, "loss": 0.9554, "step": 5779 }, { "epoch": 0.8161536289183846, "grad_norm": 3.8781491276250897, "learning_rate": 1.7081839101498033e-05, "loss": 1.1385, "step": 5780 }, { "epoch": 0.8162948319683705, "grad_norm": 2.8191883704677174, "learning_rate": 1.7080762712726878e-05, "loss": 0.7949, "step": 5781 }, { "epoch": 0.8164360350183564, "grad_norm": 3.585568733818966, "learning_rate": 1.7079686159401853e-05, "loss": 1.0857, "step": 5782 }, { "epoch": 0.8165772380683423, "grad_norm": 4.859614661772849, "learning_rate": 1.7078609441547983e-05, "loss": 0.7672, "step": 5783 }, { "epoch": 0.8167184411183281, "grad_norm": 3.5293849637013635, "learning_rate": 1.7077532559190282e-05, "loss": 0.9033, "step": 5784 }, { "epoch": 0.816859644168314, "grad_norm": 3.2504820137981034, "learning_rate": 1.7076455512353782e-05, "loss": 0.9333, "step": 5785 }, { "epoch": 0.8170008472182999, "grad_norm": 4.93326422032294, "learning_rate": 1.7075378301063518e-05, "loss": 1.2205, "step": 5786 }, { "epoch": 0.8171420502682858, "grad_norm": 4.520854437932972, "learning_rate": 1.707430092534451e-05, "loss": 1.3726, "step": 5787 }, { "epoch": 0.8172832533182717, "grad_norm": 3.3688415365303674, "learning_rate": 1.7073223385221806e-05, "loss": 0.9363, "step": 5788 }, { "epoch": 0.8174244563682576, "grad_norm": 2.93170211923061, "learning_rate": 1.707214568072045e-05, "loss": 0.8056, "step": 5789 }, { "epoch": 0.8175656594182434, "grad_norm": 2.7619718391588988, "learning_rate": 1.7071067811865477e-05, "loss": 0.8103, "step": 5790 }, { "epoch": 0.8177068624682293, "grad_norm": 3.422578269565292, "learning_rate": 1.7069989778681943e-05, "loss": 0.8877, "step": 5791 }, { "epoch": 0.8178480655182152, "grad_norm": 4.440784935628404, "learning_rate": 1.70689115811949e-05, "loss": 1.1413, "step": 5792 }, { "epoch": 0.8179892685682011, "grad_norm": 3.451292086862934, "learning_rate": 1.7067833219429407e-05, "loss": 0.9585, "step": 5793 }, { "epoch": 0.818130471618187, "grad_norm": 3.2039878229268086, "learning_rate": 1.706675469341052e-05, "loss": 0.973, "step": 5794 }, { "epoch": 0.8182716746681729, "grad_norm": 3.2068002755990443, "learning_rate": 1.7065676003163308e-05, "loss": 0.8865, "step": 5795 }, { "epoch": 0.8184128777181587, "grad_norm": 3.3479492532277404, "learning_rate": 1.7064597148712834e-05, "loss": 0.9934, "step": 5796 }, { "epoch": 0.8185540807681446, "grad_norm": 3.3976171550718406, "learning_rate": 1.7063518130084172e-05, "loss": 0.9893, "step": 5797 }, { "epoch": 0.8186952838181305, "grad_norm": 3.48079273847605, "learning_rate": 1.7062438947302405e-05, "loss": 0.9776, "step": 5798 }, { "epoch": 0.8188364868681164, "grad_norm": 3.2520881394877277, "learning_rate": 1.7061359600392603e-05, "loss": 0.9642, "step": 5799 }, { "epoch": 0.8189776899181023, "grad_norm": 3.026586352850766, "learning_rate": 1.7060280089379854e-05, "loss": 0.7532, "step": 5800 }, { "epoch": 0.8191188929680882, "grad_norm": 3.690306343426726, "learning_rate": 1.7059200414289247e-05, "loss": 0.9636, "step": 5801 }, { "epoch": 0.819260096018074, "grad_norm": 3.6688792244160298, "learning_rate": 1.7058120575145868e-05, "loss": 1.1461, "step": 5802 }, { "epoch": 0.8194012990680599, "grad_norm": 3.9671312357358355, "learning_rate": 1.7057040571974816e-05, "loss": 1.2288, "step": 5803 }, { "epoch": 0.8195425021180458, "grad_norm": 3.527876885657798, "learning_rate": 1.7055960404801187e-05, "loss": 1.1018, "step": 5804 }, { "epoch": 0.8196837051680317, "grad_norm": 3.7744990629820316, "learning_rate": 1.7054880073650088e-05, "loss": 1.0157, "step": 5805 }, { "epoch": 0.8198249082180175, "grad_norm": 3.910949228800974, "learning_rate": 1.7053799578546623e-05, "loss": 1.204, "step": 5806 }, { "epoch": 0.8199661112680033, "grad_norm": 3.054887716788059, "learning_rate": 1.7052718919515904e-05, "loss": 0.8231, "step": 5807 }, { "epoch": 0.8201073143179892, "grad_norm": 3.69340035666905, "learning_rate": 1.705163809658304e-05, "loss": 1.3826, "step": 5808 }, { "epoch": 0.8202485173679751, "grad_norm": 3.962241081862201, "learning_rate": 1.705055710977315e-05, "loss": 1.1039, "step": 5809 }, { "epoch": 0.820389720417961, "grad_norm": 3.987713984058454, "learning_rate": 1.704947595911136e-05, "loss": 1.002, "step": 5810 }, { "epoch": 0.8205309234679469, "grad_norm": 3.453207982887673, "learning_rate": 1.704839464462279e-05, "loss": 1.1135, "step": 5811 }, { "epoch": 0.8206721265179328, "grad_norm": 4.218345876616529, "learning_rate": 1.704731316633258e-05, "loss": 1.2511, "step": 5812 }, { "epoch": 0.8208133295679186, "grad_norm": 3.2929394998797465, "learning_rate": 1.704623152426585e-05, "loss": 0.819, "step": 5813 }, { "epoch": 0.8209545326179045, "grad_norm": 3.344913537556092, "learning_rate": 1.7045149718447743e-05, "loss": 1.1679, "step": 5814 }, { "epoch": 0.8210957356678904, "grad_norm": 3.7168081927847347, "learning_rate": 1.7044067748903402e-05, "loss": 1.1642, "step": 5815 }, { "epoch": 0.8212369387178763, "grad_norm": 3.703046675520849, "learning_rate": 1.7042985615657964e-05, "loss": 0.9206, "step": 5816 }, { "epoch": 0.8213781417678622, "grad_norm": 3.66714145407268, "learning_rate": 1.7041903318736586e-05, "loss": 1.002, "step": 5817 }, { "epoch": 0.821519344817848, "grad_norm": 4.202229662247463, "learning_rate": 1.7040820858164413e-05, "loss": 1.1988, "step": 5818 }, { "epoch": 0.8216605478678339, "grad_norm": 3.707431859532501, "learning_rate": 1.7039738233966607e-05, "loss": 0.9763, "step": 5819 }, { "epoch": 0.8218017509178198, "grad_norm": 3.4229616456462155, "learning_rate": 1.703865544616832e-05, "loss": 0.9472, "step": 5820 }, { "epoch": 0.8219429539678057, "grad_norm": 3.5513247334096425, "learning_rate": 1.7037572494794724e-05, "loss": 0.9603, "step": 5821 }, { "epoch": 0.8220841570177916, "grad_norm": 3.167205595848142, "learning_rate": 1.7036489379870982e-05, "loss": 0.8196, "step": 5822 }, { "epoch": 0.8222253600677775, "grad_norm": 3.156262250205531, "learning_rate": 1.7035406101422264e-05, "loss": 0.8184, "step": 5823 }, { "epoch": 0.8223665631177633, "grad_norm": 3.7699004774444997, "learning_rate": 1.7034322659473748e-05, "loss": 1.076, "step": 5824 }, { "epoch": 0.8225077661677492, "grad_norm": 3.204048930980347, "learning_rate": 1.703323905405061e-05, "loss": 0.916, "step": 5825 }, { "epoch": 0.8226489692177351, "grad_norm": 3.7790144191123587, "learning_rate": 1.7032155285178036e-05, "loss": 1.0115, "step": 5826 }, { "epoch": 0.822790172267721, "grad_norm": 3.5875153584590653, "learning_rate": 1.703107135288121e-05, "loss": 0.9727, "step": 5827 }, { "epoch": 0.8229313753177069, "grad_norm": 3.6237002464377857, "learning_rate": 1.702998725718532e-05, "loss": 1.2298, "step": 5828 }, { "epoch": 0.8230725783676928, "grad_norm": 3.466637758207509, "learning_rate": 1.7028902998115566e-05, "loss": 1.0482, "step": 5829 }, { "epoch": 0.8232137814176786, "grad_norm": 3.9559017144629824, "learning_rate": 1.7027818575697136e-05, "loss": 1.1248, "step": 5830 }, { "epoch": 0.8233549844676645, "grad_norm": 4.552308853152018, "learning_rate": 1.7026733989955243e-05, "loss": 1.309, "step": 5831 }, { "epoch": 0.8234961875176504, "grad_norm": 3.8170900321759267, "learning_rate": 1.7025649240915085e-05, "loss": 0.9313, "step": 5832 }, { "epoch": 0.8236373905676363, "grad_norm": 3.2442376484374167, "learning_rate": 1.7024564328601873e-05, "loss": 0.9453, "step": 5833 }, { "epoch": 0.8237785936176222, "grad_norm": 3.42622635224423, "learning_rate": 1.7023479253040817e-05, "loss": 1.1723, "step": 5834 }, { "epoch": 0.8239197966676081, "grad_norm": 3.507803866355308, "learning_rate": 1.702239401425714e-05, "loss": 0.9912, "step": 5835 }, { "epoch": 0.824060999717594, "grad_norm": 3.098321862390577, "learning_rate": 1.7021308612276056e-05, "loss": 0.8773, "step": 5836 }, { "epoch": 0.8242022027675798, "grad_norm": 3.384633765533748, "learning_rate": 1.7020223047122794e-05, "loss": 0.9368, "step": 5837 }, { "epoch": 0.8243434058175657, "grad_norm": 3.0332608855619556, "learning_rate": 1.7019137318822577e-05, "loss": 0.8364, "step": 5838 }, { "epoch": 0.8244846088675516, "grad_norm": 3.8343875677952575, "learning_rate": 1.7018051427400643e-05, "loss": 1.0591, "step": 5839 }, { "epoch": 0.8246258119175374, "grad_norm": 3.450928433850531, "learning_rate": 1.7016965372882227e-05, "loss": 1.0085, "step": 5840 }, { "epoch": 0.8247670149675232, "grad_norm": 3.5786389932154457, "learning_rate": 1.701587915529256e-05, "loss": 0.9758, "step": 5841 }, { "epoch": 0.8249082180175091, "grad_norm": 3.49987500228449, "learning_rate": 1.7014792774656896e-05, "loss": 1.0739, "step": 5842 }, { "epoch": 0.825049421067495, "grad_norm": 2.807231638781993, "learning_rate": 1.7013706231000477e-05, "loss": 0.7708, "step": 5843 }, { "epoch": 0.8251906241174809, "grad_norm": 3.6248229437827515, "learning_rate": 1.7012619524348552e-05, "loss": 0.8654, "step": 5844 }, { "epoch": 0.8253318271674668, "grad_norm": 3.4480576869823722, "learning_rate": 1.7011532654726377e-05, "loss": 1.0496, "step": 5845 }, { "epoch": 0.8254730302174527, "grad_norm": 3.5490542754129435, "learning_rate": 1.7010445622159214e-05, "loss": 1.0609, "step": 5846 }, { "epoch": 0.8256142332674385, "grad_norm": 3.518291347350679, "learning_rate": 1.700935842667232e-05, "loss": 0.9585, "step": 5847 }, { "epoch": 0.8257554363174244, "grad_norm": 4.357876682400807, "learning_rate": 1.7008271068290966e-05, "loss": 0.8722, "step": 5848 }, { "epoch": 0.8258966393674103, "grad_norm": 3.66777770256138, "learning_rate": 1.700718354704042e-05, "loss": 0.9161, "step": 5849 }, { "epoch": 0.8260378424173962, "grad_norm": 3.623639935739178, "learning_rate": 1.7006095862945948e-05, "loss": 1.0593, "step": 5850 }, { "epoch": 0.8261790454673821, "grad_norm": 3.6210608514631053, "learning_rate": 1.700500801603284e-05, "loss": 1.0051, "step": 5851 }, { "epoch": 0.826320248517368, "grad_norm": 3.940838994880735, "learning_rate": 1.700392000632637e-05, "loss": 1.2193, "step": 5852 }, { "epoch": 0.8264614515673538, "grad_norm": 3.813050664531881, "learning_rate": 1.700283183385182e-05, "loss": 1.0671, "step": 5853 }, { "epoch": 0.8266026546173397, "grad_norm": 4.157730596431297, "learning_rate": 1.7001743498634487e-05, "loss": 1.0796, "step": 5854 }, { "epoch": 0.8267438576673256, "grad_norm": 3.7966246413260087, "learning_rate": 1.7000655000699654e-05, "loss": 1.2088, "step": 5855 }, { "epoch": 0.8268850607173115, "grad_norm": 3.279659739838586, "learning_rate": 1.6999566340072628e-05, "loss": 0.8907, "step": 5856 }, { "epoch": 0.8270262637672974, "grad_norm": 3.114988373962563, "learning_rate": 1.69984775167787e-05, "loss": 0.7916, "step": 5857 }, { "epoch": 0.8271674668172833, "grad_norm": 3.3690436191317215, "learning_rate": 1.6997388530843176e-05, "loss": 1.0451, "step": 5858 }, { "epoch": 0.8273086698672691, "grad_norm": 3.106926854888022, "learning_rate": 1.699629938229137e-05, "loss": 0.8966, "step": 5859 }, { "epoch": 0.827449872917255, "grad_norm": 3.5764869413648124, "learning_rate": 1.6995210071148582e-05, "loss": 1.0345, "step": 5860 }, { "epoch": 0.8275910759672409, "grad_norm": 3.188071626576011, "learning_rate": 1.6994120597440136e-05, "loss": 1.0039, "step": 5861 }, { "epoch": 0.8277322790172268, "grad_norm": 3.038646076122861, "learning_rate": 1.6993030961191345e-05, "loss": 0.9434, "step": 5862 }, { "epoch": 0.8278734820672127, "grad_norm": 3.4391253924124996, "learning_rate": 1.6991941162427537e-05, "loss": 1.0459, "step": 5863 }, { "epoch": 0.8280146851171986, "grad_norm": 3.8720366023995285, "learning_rate": 1.699085120117403e-05, "loss": 1.0294, "step": 5864 }, { "epoch": 0.8281558881671844, "grad_norm": 3.4408658567224726, "learning_rate": 1.6989761077456167e-05, "loss": 0.9036, "step": 5865 }, { "epoch": 0.8282970912171703, "grad_norm": 3.2247844920066435, "learning_rate": 1.6988670791299274e-05, "loss": 0.8994, "step": 5866 }, { "epoch": 0.8284382942671562, "grad_norm": 3.52698901073545, "learning_rate": 1.698758034272869e-05, "loss": 0.9014, "step": 5867 }, { "epoch": 0.8285794973171421, "grad_norm": 3.706696570315058, "learning_rate": 1.6986489731769755e-05, "loss": 1.0195, "step": 5868 }, { "epoch": 0.828720700367128, "grad_norm": 3.613141662415506, "learning_rate": 1.698539895844782e-05, "loss": 1.1392, "step": 5869 }, { "epoch": 0.8288619034171139, "grad_norm": 3.197691188627702, "learning_rate": 1.6984308022788227e-05, "loss": 0.878, "step": 5870 }, { "epoch": 0.8290031064670997, "grad_norm": 4.117332052470007, "learning_rate": 1.698321692481633e-05, "loss": 1.2604, "step": 5871 }, { "epoch": 0.8291443095170856, "grad_norm": 4.034601463089735, "learning_rate": 1.698212566455749e-05, "loss": 1.258, "step": 5872 }, { "epoch": 0.8292855125670715, "grad_norm": 4.239921057748761, "learning_rate": 1.698103424203706e-05, "loss": 1.1634, "step": 5873 }, { "epoch": 0.8294267156170573, "grad_norm": 4.465061700604938, "learning_rate": 1.6979942657280414e-05, "loss": 0.8648, "step": 5874 }, { "epoch": 0.8295679186670432, "grad_norm": 3.3805132054400167, "learning_rate": 1.6978850910312916e-05, "loss": 0.8608, "step": 5875 }, { "epoch": 0.829709121717029, "grad_norm": 3.541360255753306, "learning_rate": 1.6977759001159934e-05, "loss": 1.1114, "step": 5876 }, { "epoch": 0.8298503247670149, "grad_norm": 3.275462772088245, "learning_rate": 1.6976666929846847e-05, "loss": 1.1193, "step": 5877 }, { "epoch": 0.8299915278170008, "grad_norm": 3.291296963393997, "learning_rate": 1.6975574696399033e-05, "loss": 0.8899, "step": 5878 }, { "epoch": 0.8301327308669867, "grad_norm": 3.302637481732676, "learning_rate": 1.697448230084188e-05, "loss": 1.1653, "step": 5879 }, { "epoch": 0.8302739339169726, "grad_norm": 3.191813358649746, "learning_rate": 1.6973389743200764e-05, "loss": 1.0004, "step": 5880 }, { "epoch": 0.8304151369669585, "grad_norm": 4.228033401666042, "learning_rate": 1.6972297023501087e-05, "loss": 1.1992, "step": 5881 }, { "epoch": 0.8305563400169443, "grad_norm": 4.054907681321021, "learning_rate": 1.6971204141768235e-05, "loss": 1.0915, "step": 5882 }, { "epoch": 0.8306975430669302, "grad_norm": 3.985149596037757, "learning_rate": 1.697011109802761e-05, "loss": 0.9379, "step": 5883 }, { "epoch": 0.8308387461169161, "grad_norm": 3.06295845931574, "learning_rate": 1.696901789230461e-05, "loss": 0.9655, "step": 5884 }, { "epoch": 0.830979949166902, "grad_norm": 3.478679343141201, "learning_rate": 1.6967924524624648e-05, "loss": 0.8378, "step": 5885 }, { "epoch": 0.8311211522168879, "grad_norm": 3.0790287115607207, "learning_rate": 1.6966830995013134e-05, "loss": 0.9457, "step": 5886 }, { "epoch": 0.8312623552668738, "grad_norm": 3.453317369140833, "learning_rate": 1.6965737303495466e-05, "loss": 0.8738, "step": 5887 }, { "epoch": 0.8314035583168596, "grad_norm": 3.2240776711703405, "learning_rate": 1.6964643450097077e-05, "loss": 0.7996, "step": 5888 }, { "epoch": 0.8315447613668455, "grad_norm": 3.042950756827184, "learning_rate": 1.6963549434843383e-05, "loss": 1.0046, "step": 5889 }, { "epoch": 0.8316859644168314, "grad_norm": 3.481526637861636, "learning_rate": 1.6962455257759806e-05, "loss": 1.054, "step": 5890 }, { "epoch": 0.8318271674668173, "grad_norm": 3.138601269918471, "learning_rate": 1.6961360918871774e-05, "loss": 0.8595, "step": 5891 }, { "epoch": 0.8319683705168032, "grad_norm": 4.251834595222421, "learning_rate": 1.696026641820472e-05, "loss": 1.2265, "step": 5892 }, { "epoch": 0.832109573566789, "grad_norm": 3.3779271834790423, "learning_rate": 1.6959171755784086e-05, "loss": 0.9088, "step": 5893 }, { "epoch": 0.8322507766167749, "grad_norm": 3.3553186015781775, "learning_rate": 1.69580769316353e-05, "loss": 0.9234, "step": 5894 }, { "epoch": 0.8323919796667608, "grad_norm": 2.93984560133028, "learning_rate": 1.6956981945783814e-05, "loss": 0.8071, "step": 5895 }, { "epoch": 0.8325331827167467, "grad_norm": 3.358985809640167, "learning_rate": 1.695588679825507e-05, "loss": 0.9534, "step": 5896 }, { "epoch": 0.8326743857667326, "grad_norm": 2.8503286434034223, "learning_rate": 1.6954791489074524e-05, "loss": 0.8056, "step": 5897 }, { "epoch": 0.8328155888167185, "grad_norm": 2.898589432277315, "learning_rate": 1.6953696018267627e-05, "loss": 0.7662, "step": 5898 }, { "epoch": 0.8329567918667043, "grad_norm": 3.5211869038855683, "learning_rate": 1.6952600385859836e-05, "loss": 0.9385, "step": 5899 }, { "epoch": 0.8330979949166902, "grad_norm": 4.12796382694839, "learning_rate": 1.6951504591876614e-05, "loss": 0.9873, "step": 5900 }, { "epoch": 0.8332391979666761, "grad_norm": 4.391782705432725, "learning_rate": 1.695040863634343e-05, "loss": 1.2203, "step": 5901 }, { "epoch": 0.833380401016662, "grad_norm": 3.556783279911456, "learning_rate": 1.694931251928575e-05, "loss": 1.0176, "step": 5902 }, { "epoch": 0.8335216040666479, "grad_norm": 3.567865983044278, "learning_rate": 1.6948216240729046e-05, "loss": 1.1966, "step": 5903 }, { "epoch": 0.8336628071166338, "grad_norm": 3.5847216362340735, "learning_rate": 1.69471198006988e-05, "loss": 1.0892, "step": 5904 }, { "epoch": 0.8338040101666196, "grad_norm": 3.468717501091991, "learning_rate": 1.694602319922049e-05, "loss": 0.9586, "step": 5905 }, { "epoch": 0.8339452132166055, "grad_norm": 4.113685450513926, "learning_rate": 1.6944926436319596e-05, "loss": 1.0895, "step": 5906 }, { "epoch": 0.8340864162665914, "grad_norm": 3.350290982174312, "learning_rate": 1.6943829512021616e-05, "loss": 1.0032, "step": 5907 }, { "epoch": 0.8342276193165772, "grad_norm": 3.8931686842306403, "learning_rate": 1.6942732426352032e-05, "loss": 1.2496, "step": 5908 }, { "epoch": 0.8343688223665631, "grad_norm": 3.685521780626063, "learning_rate": 1.694163517933635e-05, "loss": 1.1995, "step": 5909 }, { "epoch": 0.834510025416549, "grad_norm": 3.7451885170354275, "learning_rate": 1.694053777100006e-05, "loss": 1.1621, "step": 5910 }, { "epoch": 0.8346512284665348, "grad_norm": 3.062268436005849, "learning_rate": 1.6939440201368675e-05, "loss": 0.8588, "step": 5911 }, { "epoch": 0.8347924315165207, "grad_norm": 3.439367838047728, "learning_rate": 1.693834247046769e-05, "loss": 0.8138, "step": 5912 }, { "epoch": 0.8349336345665066, "grad_norm": 2.754630935228184, "learning_rate": 1.693724457832263e-05, "loss": 0.9266, "step": 5913 }, { "epoch": 0.8350748376164925, "grad_norm": 3.135884338177701, "learning_rate": 1.6936146524958994e-05, "loss": 1.0845, "step": 5914 }, { "epoch": 0.8352160406664784, "grad_norm": 5.577325310002807, "learning_rate": 1.6935048310402312e-05, "loss": 0.9746, "step": 5915 }, { "epoch": 0.8353572437164642, "grad_norm": 3.476498658564067, "learning_rate": 1.6933949934678104e-05, "loss": 0.9365, "step": 5916 }, { "epoch": 0.8354984467664501, "grad_norm": 3.6545234020400486, "learning_rate": 1.6932851397811895e-05, "loss": 1.0684, "step": 5917 }, { "epoch": 0.835639649816436, "grad_norm": 3.3254833898298917, "learning_rate": 1.693175269982921e-05, "loss": 1.101, "step": 5918 }, { "epoch": 0.8357808528664219, "grad_norm": 3.609196813446882, "learning_rate": 1.6930653840755585e-05, "loss": 1.2006, "step": 5919 }, { "epoch": 0.8359220559164078, "grad_norm": 3.5192964447184707, "learning_rate": 1.692955482061656e-05, "loss": 1.1923, "step": 5920 }, { "epoch": 0.8360632589663937, "grad_norm": 3.2508983372315354, "learning_rate": 1.6928455639437677e-05, "loss": 0.8796, "step": 5921 }, { "epoch": 0.8362044620163795, "grad_norm": 4.092745206415775, "learning_rate": 1.6927356297244474e-05, "loss": 1.2865, "step": 5922 }, { "epoch": 0.8363456650663654, "grad_norm": 3.806560014850811, "learning_rate": 1.69262567940625e-05, "loss": 1.0871, "step": 5923 }, { "epoch": 0.8364868681163513, "grad_norm": 3.997069072595441, "learning_rate": 1.6925157129917308e-05, "loss": 1.0395, "step": 5924 }, { "epoch": 0.8366280711663372, "grad_norm": 4.682896849101525, "learning_rate": 1.692405730483446e-05, "loss": 1.2607, "step": 5925 }, { "epoch": 0.8367692742163231, "grad_norm": 3.4728761779722817, "learning_rate": 1.692295731883951e-05, "loss": 1.0828, "step": 5926 }, { "epoch": 0.836910477266309, "grad_norm": 3.800740244779232, "learning_rate": 1.692185717195802e-05, "loss": 1.0595, "step": 5927 }, { "epoch": 0.8370516803162948, "grad_norm": 3.408257638862654, "learning_rate": 1.6920756864215558e-05, "loss": 1.0223, "step": 5928 }, { "epoch": 0.8371928833662807, "grad_norm": 3.779486409945569, "learning_rate": 1.69196563956377e-05, "loss": 1.2249, "step": 5929 }, { "epoch": 0.8373340864162666, "grad_norm": 3.173967589899312, "learning_rate": 1.691855576625001e-05, "loss": 0.8391, "step": 5930 }, { "epoch": 0.8374752894662525, "grad_norm": 3.301197248006986, "learning_rate": 1.691745497607807e-05, "loss": 0.991, "step": 5931 }, { "epoch": 0.8376164925162384, "grad_norm": 9.22659136379249, "learning_rate": 1.6916354025147473e-05, "loss": 1.3671, "step": 5932 }, { "epoch": 0.8377576955662243, "grad_norm": 4.099686459135056, "learning_rate": 1.691525291348379e-05, "loss": 1.1944, "step": 5933 }, { "epoch": 0.8378988986162101, "grad_norm": 3.4282534497022783, "learning_rate": 1.691415164111261e-05, "loss": 0.9133, "step": 5934 }, { "epoch": 0.838040101666196, "grad_norm": 3.4967592754200263, "learning_rate": 1.691305020805954e-05, "loss": 1.079, "step": 5935 }, { "epoch": 0.8381813047161819, "grad_norm": 3.0294058077256234, "learning_rate": 1.6911948614350165e-05, "loss": 0.9381, "step": 5936 }, { "epoch": 0.8383225077661678, "grad_norm": 3.5826042618335108, "learning_rate": 1.6910846860010095e-05, "loss": 0.9539, "step": 5937 }, { "epoch": 0.8384637108161537, "grad_norm": 3.1456098157134176, "learning_rate": 1.6909744945064924e-05, "loss": 0.7363, "step": 5938 }, { "epoch": 0.8386049138661396, "grad_norm": 3.456783072159234, "learning_rate": 1.6908642869540264e-05, "loss": 0.9508, "step": 5939 }, { "epoch": 0.8387461169161254, "grad_norm": 3.7624508664004805, "learning_rate": 1.6907540633461728e-05, "loss": 1.1674, "step": 5940 }, { "epoch": 0.8388873199661113, "grad_norm": 3.3876638363264058, "learning_rate": 1.690643823685493e-05, "loss": 1.0581, "step": 5941 }, { "epoch": 0.8390285230160971, "grad_norm": 3.365417200084475, "learning_rate": 1.690533567974549e-05, "loss": 1.0508, "step": 5942 }, { "epoch": 0.839169726066083, "grad_norm": 3.4713892528827284, "learning_rate": 1.6904232962159034e-05, "loss": 1.2145, "step": 5943 }, { "epoch": 0.8393109291160689, "grad_norm": 3.2365620724164312, "learning_rate": 1.6903130084121183e-05, "loss": 0.9824, "step": 5944 }, { "epoch": 0.8394521321660547, "grad_norm": 3.4112150481914587, "learning_rate": 1.6902027045657572e-05, "loss": 1.0141, "step": 5945 }, { "epoch": 0.8395933352160406, "grad_norm": 2.785623176000223, "learning_rate": 1.6900923846793832e-05, "loss": 0.8, "step": 5946 }, { "epoch": 0.8397345382660265, "grad_norm": 3.763794435754244, "learning_rate": 1.6899820487555602e-05, "loss": 0.9531, "step": 5947 }, { "epoch": 0.8398757413160124, "grad_norm": 3.2025111701403293, "learning_rate": 1.6898716967968522e-05, "loss": 0.9809, "step": 5948 }, { "epoch": 0.8400169443659983, "grad_norm": 3.6536271121254096, "learning_rate": 1.6897613288058244e-05, "loss": 0.8872, "step": 5949 }, { "epoch": 0.8401581474159842, "grad_norm": 3.9588265021045648, "learning_rate": 1.689650944785041e-05, "loss": 1.0873, "step": 5950 }, { "epoch": 0.84029935046597, "grad_norm": 3.652977698607157, "learning_rate": 1.689540544737067e-05, "loss": 1.0716, "step": 5951 }, { "epoch": 0.8404405535159559, "grad_norm": 3.013473970093273, "learning_rate": 1.689430128664469e-05, "loss": 0.8796, "step": 5952 }, { "epoch": 0.8405817565659418, "grad_norm": 3.5355601929233242, "learning_rate": 1.6893196965698125e-05, "loss": 0.9819, "step": 5953 }, { "epoch": 0.8407229596159277, "grad_norm": 3.1864556779129543, "learning_rate": 1.6892092484556638e-05, "loss": 0.8219, "step": 5954 }, { "epoch": 0.8408641626659136, "grad_norm": 3.3019546034004628, "learning_rate": 1.68909878432459e-05, "loss": 0.9323, "step": 5955 }, { "epoch": 0.8410053657158995, "grad_norm": 3.8133673051517465, "learning_rate": 1.6889883041791578e-05, "loss": 1.0364, "step": 5956 }, { "epoch": 0.8411465687658853, "grad_norm": 3.2944054856728227, "learning_rate": 1.688877808021935e-05, "loss": 1.101, "step": 5957 }, { "epoch": 0.8412877718158712, "grad_norm": 3.0410666942275957, "learning_rate": 1.68876729585549e-05, "loss": 0.8554, "step": 5958 }, { "epoch": 0.8414289748658571, "grad_norm": 3.576712871146497, "learning_rate": 1.6886567676823897e-05, "loss": 1.0287, "step": 5959 }, { "epoch": 0.841570177915843, "grad_norm": 3.8135253175731174, "learning_rate": 1.6885462235052038e-05, "loss": 1.2731, "step": 5960 }, { "epoch": 0.8417113809658289, "grad_norm": 3.198176583481025, "learning_rate": 1.688435663326501e-05, "loss": 1.007, "step": 5961 }, { "epoch": 0.8418525840158148, "grad_norm": 3.6590216916048783, "learning_rate": 1.688325087148851e-05, "loss": 1.0171, "step": 5962 }, { "epoch": 0.8419937870658006, "grad_norm": 3.0157381286558764, "learning_rate": 1.688214494974823e-05, "loss": 0.8299, "step": 5963 }, { "epoch": 0.8421349901157865, "grad_norm": 3.478256469718886, "learning_rate": 1.6881038868069875e-05, "loss": 1.0015, "step": 5964 }, { "epoch": 0.8422761931657724, "grad_norm": 3.964501313107971, "learning_rate": 1.687993262647915e-05, "loss": 0.9969, "step": 5965 }, { "epoch": 0.8424173962157583, "grad_norm": 3.386723761461656, "learning_rate": 1.6878826225001756e-05, "loss": 1.065, "step": 5966 }, { "epoch": 0.8425585992657442, "grad_norm": 3.4690063991892974, "learning_rate": 1.6877719663663414e-05, "loss": 0.9423, "step": 5967 }, { "epoch": 0.84269980231573, "grad_norm": 3.4597259650913434, "learning_rate": 1.687661294248984e-05, "loss": 1.03, "step": 5968 }, { "epoch": 0.8428410053657159, "grad_norm": 3.415388405272459, "learning_rate": 1.6875506061506746e-05, "loss": 1.1372, "step": 5969 }, { "epoch": 0.8429822084157018, "grad_norm": 3.51721928778269, "learning_rate": 1.6874399020739865e-05, "loss": 1.0717, "step": 5970 }, { "epoch": 0.8431234114656877, "grad_norm": 3.5430361039202727, "learning_rate": 1.6873291820214917e-05, "loss": 1.2683, "step": 5971 }, { "epoch": 0.8432646145156736, "grad_norm": 3.8615169281944133, "learning_rate": 1.6872184459957637e-05, "loss": 1.1621, "step": 5972 }, { "epoch": 0.8434058175656595, "grad_norm": 3.5055952171117655, "learning_rate": 1.687107693999376e-05, "loss": 1.0403, "step": 5973 }, { "epoch": 0.8435470206156453, "grad_norm": 3.1519406536692354, "learning_rate": 1.686996926034902e-05, "loss": 0.8113, "step": 5974 }, { "epoch": 0.8436882236656312, "grad_norm": 3.8132977377416832, "learning_rate": 1.686886142104916e-05, "loss": 1.2206, "step": 5975 }, { "epoch": 0.843829426715617, "grad_norm": 3.3530982137714873, "learning_rate": 1.6867753422119926e-05, "loss": 1.165, "step": 5976 }, { "epoch": 0.8439706297656029, "grad_norm": 3.0874841179845385, "learning_rate": 1.686664526358707e-05, "loss": 0.9611, "step": 5977 }, { "epoch": 0.8441118328155888, "grad_norm": 3.2815589740995823, "learning_rate": 1.6865536945476348e-05, "loss": 0.9514, "step": 5978 }, { "epoch": 0.8442530358655747, "grad_norm": 3.371518441065829, "learning_rate": 1.6864428467813506e-05, "loss": 0.7776, "step": 5979 }, { "epoch": 0.8443942389155605, "grad_norm": 3.000636059075726, "learning_rate": 1.6863319830624313e-05, "loss": 0.9231, "step": 5980 }, { "epoch": 0.8445354419655464, "grad_norm": 3.891594633559061, "learning_rate": 1.686221103393453e-05, "loss": 1.1536, "step": 5981 }, { "epoch": 0.8446766450155323, "grad_norm": 3.7621053903455772, "learning_rate": 1.686110207776993e-05, "loss": 1.1036, "step": 5982 }, { "epoch": 0.8448178480655182, "grad_norm": 3.4883826232171273, "learning_rate": 1.685999296215628e-05, "loss": 1.1202, "step": 5983 }, { "epoch": 0.8449590511155041, "grad_norm": 4.3562200750309295, "learning_rate": 1.6858883687119353e-05, "loss": 1.2759, "step": 5984 }, { "epoch": 0.84510025416549, "grad_norm": 3.2298711827746254, "learning_rate": 1.685777425268493e-05, "loss": 0.9925, "step": 5985 }, { "epoch": 0.8452414572154758, "grad_norm": 3.650172285953768, "learning_rate": 1.6856664658878797e-05, "loss": 1.0035, "step": 5986 }, { "epoch": 0.8453826602654617, "grad_norm": 2.713121026048089, "learning_rate": 1.685555490572674e-05, "loss": 0.7634, "step": 5987 }, { "epoch": 0.8455238633154476, "grad_norm": 3.989183796569977, "learning_rate": 1.6854444993254547e-05, "loss": 1.2674, "step": 5988 }, { "epoch": 0.8456650663654335, "grad_norm": 2.898507928961729, "learning_rate": 1.6853334921488014e-05, "loss": 0.7731, "step": 5989 }, { "epoch": 0.8458062694154194, "grad_norm": 3.288237801763016, "learning_rate": 1.6852224690452937e-05, "loss": 0.8893, "step": 5990 }, { "epoch": 0.8459474724654052, "grad_norm": 3.5848687969018838, "learning_rate": 1.6851114300175114e-05, "loss": 1.018, "step": 5991 }, { "epoch": 0.8460886755153911, "grad_norm": 3.375660919661249, "learning_rate": 1.685000375068036e-05, "loss": 0.9585, "step": 5992 }, { "epoch": 0.846229878565377, "grad_norm": 3.7888331781704117, "learning_rate": 1.684889304199447e-05, "loss": 0.8954, "step": 5993 }, { "epoch": 0.8463710816153629, "grad_norm": 3.008288054464839, "learning_rate": 1.6847782174143264e-05, "loss": 0.9375, "step": 5994 }, { "epoch": 0.8465122846653488, "grad_norm": 3.9979063036484646, "learning_rate": 1.6846671147152564e-05, "loss": 1.2974, "step": 5995 }, { "epoch": 0.8466534877153347, "grad_norm": 3.4565611951760293, "learning_rate": 1.6845559961048178e-05, "loss": 0.9289, "step": 5996 }, { "epoch": 0.8467946907653205, "grad_norm": 3.3792194410367404, "learning_rate": 1.6844448615855933e-05, "loss": 1.0626, "step": 5997 }, { "epoch": 0.8469358938153064, "grad_norm": 3.4065673742013654, "learning_rate": 1.6843337111601663e-05, "loss": 0.8975, "step": 5998 }, { "epoch": 0.8470770968652923, "grad_norm": 3.4456765407452123, "learning_rate": 1.6842225448311193e-05, "loss": 0.959, "step": 5999 }, { "epoch": 0.8472182999152782, "grad_norm": 3.516367057735747, "learning_rate": 1.6841113626010358e-05, "loss": 0.9892, "step": 6000 }, { "epoch": 0.8473595029652641, "grad_norm": 3.328190891300491, "learning_rate": 1.6840001644724993e-05, "loss": 0.9731, "step": 6001 }, { "epoch": 0.84750070601525, "grad_norm": 4.283489725670041, "learning_rate": 1.683888950448095e-05, "loss": 1.2836, "step": 6002 }, { "epoch": 0.8476419090652358, "grad_norm": 4.764352510373618, "learning_rate": 1.6837777205304063e-05, "loss": 1.2849, "step": 6003 }, { "epoch": 0.8477831121152217, "grad_norm": 3.094746301008526, "learning_rate": 1.6836664747220193e-05, "loss": 0.8584, "step": 6004 }, { "epoch": 0.8479243151652076, "grad_norm": 4.095990185460272, "learning_rate": 1.683555213025518e-05, "loss": 1.3037, "step": 6005 }, { "epoch": 0.8480655182151935, "grad_norm": 3.4196961850719725, "learning_rate": 1.683443935443489e-05, "loss": 0.9915, "step": 6006 }, { "epoch": 0.8482067212651794, "grad_norm": 3.1140189128516917, "learning_rate": 1.6833326419785183e-05, "loss": 0.8285, "step": 6007 }, { "epoch": 0.8483479243151653, "grad_norm": 3.34119361508651, "learning_rate": 1.6832213326331918e-05, "loss": 1.0373, "step": 6008 }, { "epoch": 0.8484891273651511, "grad_norm": 3.539990501877801, "learning_rate": 1.683110007410097e-05, "loss": 1.0949, "step": 6009 }, { "epoch": 0.8486303304151369, "grad_norm": 3.2435457159406864, "learning_rate": 1.6829986663118203e-05, "loss": 1.0914, "step": 6010 }, { "epoch": 0.8487715334651228, "grad_norm": 3.013124626280069, "learning_rate": 1.6828873093409496e-05, "loss": 1.0341, "step": 6011 }, { "epoch": 0.8489127365151087, "grad_norm": 3.2183107993804247, "learning_rate": 1.682775936500073e-05, "loss": 1.1383, "step": 6012 }, { "epoch": 0.8490539395650946, "grad_norm": 3.260655044267242, "learning_rate": 1.6826645477917784e-05, "loss": 1.0023, "step": 6013 }, { "epoch": 0.8491951426150804, "grad_norm": 3.7719118747281093, "learning_rate": 1.6825531432186545e-05, "loss": 1.2516, "step": 6014 }, { "epoch": 0.8493363456650663, "grad_norm": 3.132923744419716, "learning_rate": 1.68244172278329e-05, "loss": 0.9442, "step": 6015 }, { "epoch": 0.8494775487150522, "grad_norm": 3.379634604139086, "learning_rate": 1.6823302864882748e-05, "loss": 0.9652, "step": 6016 }, { "epoch": 0.8496187517650381, "grad_norm": 2.9323877865091257, "learning_rate": 1.6822188343361987e-05, "loss": 0.9111, "step": 6017 }, { "epoch": 0.849759954815024, "grad_norm": 3.776131296990772, "learning_rate": 1.682107366329651e-05, "loss": 1.1022, "step": 6018 }, { "epoch": 0.8499011578650099, "grad_norm": 3.4356591301461488, "learning_rate": 1.681995882471223e-05, "loss": 0.8991, "step": 6019 }, { "epoch": 0.8500423609149957, "grad_norm": 3.644600389172983, "learning_rate": 1.6818843827635052e-05, "loss": 1.1095, "step": 6020 }, { "epoch": 0.8501835639649816, "grad_norm": 3.4491024673137884, "learning_rate": 1.681772867209089e-05, "loss": 1.112, "step": 6021 }, { "epoch": 0.8503247670149675, "grad_norm": 3.5843637280499316, "learning_rate": 1.6816613358105655e-05, "loss": 1.1463, "step": 6022 }, { "epoch": 0.8504659700649534, "grad_norm": 3.7434089887512276, "learning_rate": 1.6815497885705274e-05, "loss": 1.0804, "step": 6023 }, { "epoch": 0.8506071731149393, "grad_norm": 2.985040934111276, "learning_rate": 1.681438225491566e-05, "loss": 0.9176, "step": 6024 }, { "epoch": 0.8507483761649252, "grad_norm": 4.2778375455354976, "learning_rate": 1.6813266465762747e-05, "loss": 1.0527, "step": 6025 }, { "epoch": 0.850889579214911, "grad_norm": 3.880199394104797, "learning_rate": 1.6812150518272465e-05, "loss": 1.1534, "step": 6026 }, { "epoch": 0.8510307822648969, "grad_norm": 4.566652508726914, "learning_rate": 1.6811034412470742e-05, "loss": 1.2007, "step": 6027 }, { "epoch": 0.8511719853148828, "grad_norm": 2.8532513464112665, "learning_rate": 1.6809918148383525e-05, "loss": 0.8738, "step": 6028 }, { "epoch": 0.8513131883648687, "grad_norm": 2.8402562460432494, "learning_rate": 1.680880172603675e-05, "loss": 0.8096, "step": 6029 }, { "epoch": 0.8514543914148546, "grad_norm": 3.4594612663924447, "learning_rate": 1.680768514545637e-05, "loss": 1.2129, "step": 6030 }, { "epoch": 0.8515955944648405, "grad_norm": 3.623254865202898, "learning_rate": 1.680656840666832e-05, "loss": 0.9823, "step": 6031 }, { "epoch": 0.8517367975148263, "grad_norm": 3.282064189799054, "learning_rate": 1.6805451509698562e-05, "loss": 0.8775, "step": 6032 }, { "epoch": 0.8518780005648122, "grad_norm": 3.4803239031762163, "learning_rate": 1.680433445457305e-05, "loss": 0.987, "step": 6033 }, { "epoch": 0.8520192036147981, "grad_norm": 3.7839025260267243, "learning_rate": 1.680321724131774e-05, "loss": 0.9519, "step": 6034 }, { "epoch": 0.852160406664784, "grad_norm": 3.9929586599219893, "learning_rate": 1.6802099869958602e-05, "loss": 1.1953, "step": 6035 }, { "epoch": 0.8523016097147699, "grad_norm": 3.3931530206747915, "learning_rate": 1.6800982340521605e-05, "loss": 0.884, "step": 6036 }, { "epoch": 0.8524428127647558, "grad_norm": 3.7606841431835485, "learning_rate": 1.6799864653032712e-05, "loss": 1.0551, "step": 6037 }, { "epoch": 0.8525840158147416, "grad_norm": 3.282784215872096, "learning_rate": 1.67987468075179e-05, "loss": 1.0403, "step": 6038 }, { "epoch": 0.8527252188647275, "grad_norm": 3.374610343238798, "learning_rate": 1.6797628804003148e-05, "loss": 0.9616, "step": 6039 }, { "epoch": 0.8528664219147134, "grad_norm": 3.304789869796052, "learning_rate": 1.679651064251444e-05, "loss": 1.0208, "step": 6040 }, { "epoch": 0.8530076249646993, "grad_norm": 4.720467490072776, "learning_rate": 1.679539232307776e-05, "loss": 1.305, "step": 6041 }, { "epoch": 0.8531488280146852, "grad_norm": 3.0000338389703414, "learning_rate": 1.6794273845719096e-05, "loss": 0.9501, "step": 6042 }, { "epoch": 0.853290031064671, "grad_norm": 3.504863832783145, "learning_rate": 1.6793155210464442e-05, "loss": 1.0481, "step": 6043 }, { "epoch": 0.8534312341146568, "grad_norm": 3.776899460336821, "learning_rate": 1.6792036417339797e-05, "loss": 1.2887, "step": 6044 }, { "epoch": 0.8535724371646427, "grad_norm": 4.071407234080875, "learning_rate": 1.6790917466371156e-05, "loss": 1.2395, "step": 6045 }, { "epoch": 0.8537136402146286, "grad_norm": 3.4107939974973913, "learning_rate": 1.6789798357584524e-05, "loss": 0.9579, "step": 6046 }, { "epoch": 0.8538548432646145, "grad_norm": 3.163487330144368, "learning_rate": 1.6788679091005918e-05, "loss": 1.0122, "step": 6047 }, { "epoch": 0.8539960463146004, "grad_norm": 4.023077855125758, "learning_rate": 1.6787559666661336e-05, "loss": 1.2082, "step": 6048 }, { "epoch": 0.8541372493645862, "grad_norm": 2.777848852805815, "learning_rate": 1.6786440084576797e-05, "loss": 1.02, "step": 6049 }, { "epoch": 0.8542784524145721, "grad_norm": 4.1494539376252, "learning_rate": 1.6785320344778325e-05, "loss": 0.9826, "step": 6050 }, { "epoch": 0.854419655464558, "grad_norm": 3.186119293545814, "learning_rate": 1.6784200447291937e-05, "loss": 1.1375, "step": 6051 }, { "epoch": 0.8545608585145439, "grad_norm": 3.182507906369065, "learning_rate": 1.678308039214366e-05, "loss": 0.9583, "step": 6052 }, { "epoch": 0.8547020615645298, "grad_norm": 3.5299493249564713, "learning_rate": 1.6781960179359525e-05, "loss": 1.0502, "step": 6053 }, { "epoch": 0.8548432646145157, "grad_norm": 4.039997484424329, "learning_rate": 1.6780839808965566e-05, "loss": 0.9806, "step": 6054 }, { "epoch": 0.8549844676645015, "grad_norm": 2.8433575939759796, "learning_rate": 1.677971928098782e-05, "loss": 0.7921, "step": 6055 }, { "epoch": 0.8551256707144874, "grad_norm": 2.9517681261817112, "learning_rate": 1.6778598595452324e-05, "loss": 0.7332, "step": 6056 }, { "epoch": 0.8552668737644733, "grad_norm": 3.384608137580897, "learning_rate": 1.677747775238512e-05, "loss": 0.9591, "step": 6057 }, { "epoch": 0.8554080768144592, "grad_norm": 2.732539801205947, "learning_rate": 1.6776356751812262e-05, "loss": 0.8074, "step": 6058 }, { "epoch": 0.8555492798644451, "grad_norm": 3.2611671988352233, "learning_rate": 1.67752355937598e-05, "loss": 0.8809, "step": 6059 }, { "epoch": 0.855690482914431, "grad_norm": 2.9810093378248013, "learning_rate": 1.677411427825379e-05, "loss": 0.8528, "step": 6060 }, { "epoch": 0.8558316859644168, "grad_norm": 3.3318931141833383, "learning_rate": 1.677299280532029e-05, "loss": 0.8783, "step": 6061 }, { "epoch": 0.8559728890144027, "grad_norm": 3.9847521448834566, "learning_rate": 1.677187117498536e-05, "loss": 1.1644, "step": 6062 }, { "epoch": 0.8561140920643886, "grad_norm": 3.577017818148409, "learning_rate": 1.6770749387275067e-05, "loss": 1.1586, "step": 6063 }, { "epoch": 0.8562552951143745, "grad_norm": 2.9874068938635343, "learning_rate": 1.6769627442215485e-05, "loss": 0.7475, "step": 6064 }, { "epoch": 0.8563964981643604, "grad_norm": 9.271714399616094, "learning_rate": 1.6768505339832686e-05, "loss": 1.183, "step": 6065 }, { "epoch": 0.8565377012143462, "grad_norm": 3.3077815242553577, "learning_rate": 1.6767383080152744e-05, "loss": 0.866, "step": 6066 }, { "epoch": 0.8566789042643321, "grad_norm": 3.291177333301306, "learning_rate": 1.6766260663201742e-05, "loss": 1.0186, "step": 6067 }, { "epoch": 0.856820107314318, "grad_norm": 3.5821401809428295, "learning_rate": 1.6765138089005765e-05, "loss": 0.9695, "step": 6068 }, { "epoch": 0.8569613103643039, "grad_norm": 4.049696103271679, "learning_rate": 1.67640153575909e-05, "loss": 0.9434, "step": 6069 }, { "epoch": 0.8571025134142898, "grad_norm": 5.698188451273226, "learning_rate": 1.6762892468983237e-05, "loss": 0.9614, "step": 6070 }, { "epoch": 0.8572437164642757, "grad_norm": 3.3931653026104938, "learning_rate": 1.6761769423208877e-05, "loss": 1.0285, "step": 6071 }, { "epoch": 0.8573849195142615, "grad_norm": 3.3594930758612476, "learning_rate": 1.6760646220293916e-05, "loss": 1.0619, "step": 6072 }, { "epoch": 0.8575261225642474, "grad_norm": 3.47578477216948, "learning_rate": 1.6759522860264457e-05, "loss": 0.9932, "step": 6073 }, { "epoch": 0.8576673256142333, "grad_norm": 3.171957061139327, "learning_rate": 1.6758399343146602e-05, "loss": 0.8507, "step": 6074 }, { "epoch": 0.8578085286642192, "grad_norm": 3.2458969600722343, "learning_rate": 1.6757275668966467e-05, "loss": 1.0089, "step": 6075 }, { "epoch": 0.8579497317142051, "grad_norm": 3.2572139943459164, "learning_rate": 1.6756151837750167e-05, "loss": 0.8151, "step": 6076 }, { "epoch": 0.858090934764191, "grad_norm": 3.603855275924478, "learning_rate": 1.6755027849523812e-05, "loss": 0.9957, "step": 6077 }, { "epoch": 0.8582321378141767, "grad_norm": 3.0389813342513574, "learning_rate": 1.6753903704313527e-05, "loss": 0.888, "step": 6078 }, { "epoch": 0.8583733408641626, "grad_norm": 3.405328259071886, "learning_rate": 1.6752779402145442e-05, "loss": 1.0856, "step": 6079 }, { "epoch": 0.8585145439141485, "grad_norm": 3.9636419871648476, "learning_rate": 1.6751654943045672e-05, "loss": 0.9363, "step": 6080 }, { "epoch": 0.8586557469641344, "grad_norm": 3.1089049886091376, "learning_rate": 1.6750530327040363e-05, "loss": 1.0655, "step": 6081 }, { "epoch": 0.8587969500141203, "grad_norm": 3.9424144043238725, "learning_rate": 1.674940555415564e-05, "loss": 0.9753, "step": 6082 }, { "epoch": 0.8589381530641061, "grad_norm": 3.1623035837238636, "learning_rate": 1.674828062441765e-05, "loss": 1.0934, "step": 6083 }, { "epoch": 0.859079356114092, "grad_norm": 5.1954162679433775, "learning_rate": 1.674715553785253e-05, "loss": 1.0995, "step": 6084 }, { "epoch": 0.8592205591640779, "grad_norm": 3.175457894432726, "learning_rate": 1.6746030294486434e-05, "loss": 0.9036, "step": 6085 }, { "epoch": 0.8593617622140638, "grad_norm": 3.161042377726736, "learning_rate": 1.6744904894345504e-05, "loss": 0.7929, "step": 6086 }, { "epoch": 0.8595029652640497, "grad_norm": 3.2320082661026643, "learning_rate": 1.6743779337455896e-05, "loss": 0.9879, "step": 6087 }, { "epoch": 0.8596441683140356, "grad_norm": 2.9807276603167674, "learning_rate": 1.674265362384377e-05, "loss": 0.9225, "step": 6088 }, { "epoch": 0.8597853713640214, "grad_norm": 3.6519290063731344, "learning_rate": 1.6741527753535285e-05, "loss": 1.0014, "step": 6089 }, { "epoch": 0.8599265744140073, "grad_norm": 3.4737802403651714, "learning_rate": 1.6740401726556608e-05, "loss": 0.8189, "step": 6090 }, { "epoch": 0.8600677774639932, "grad_norm": 4.038730007206835, "learning_rate": 1.6739275542933905e-05, "loss": 1.1654, "step": 6091 }, { "epoch": 0.8602089805139791, "grad_norm": 3.2774203227422554, "learning_rate": 1.6738149202693347e-05, "loss": 1.1676, "step": 6092 }, { "epoch": 0.860350183563965, "grad_norm": 3.613306478703026, "learning_rate": 1.6737022705861113e-05, "loss": 1.0239, "step": 6093 }, { "epoch": 0.8604913866139509, "grad_norm": 3.6493783776799287, "learning_rate": 1.6735896052463384e-05, "loss": 1.1252, "step": 6094 }, { "epoch": 0.8606325896639367, "grad_norm": 3.873179412488859, "learning_rate": 1.6734769242526336e-05, "loss": 1.1744, "step": 6095 }, { "epoch": 0.8607737927139226, "grad_norm": 3.3223407311056743, "learning_rate": 1.673364227607616e-05, "loss": 1.0525, "step": 6096 }, { "epoch": 0.8609149957639085, "grad_norm": 3.0565736397021794, "learning_rate": 1.6732515153139048e-05, "loss": 0.8606, "step": 6097 }, { "epoch": 0.8610561988138944, "grad_norm": 4.134945252275968, "learning_rate": 1.673138787374119e-05, "loss": 1.2448, "step": 6098 }, { "epoch": 0.8611974018638803, "grad_norm": 2.949351626146648, "learning_rate": 1.6730260437908782e-05, "loss": 0.8708, "step": 6099 }, { "epoch": 0.8613386049138662, "grad_norm": 3.5318671757120392, "learning_rate": 1.672913284566803e-05, "loss": 1.0523, "step": 6100 }, { "epoch": 0.861479807963852, "grad_norm": 3.602606330278198, "learning_rate": 1.6728005097045134e-05, "loss": 1.1229, "step": 6101 }, { "epoch": 0.8616210110138379, "grad_norm": 3.849911338314744, "learning_rate": 1.672687719206631e-05, "loss": 0.8684, "step": 6102 }, { "epoch": 0.8617622140638238, "grad_norm": 4.071488764660193, "learning_rate": 1.6725749130757766e-05, "loss": 1.2061, "step": 6103 }, { "epoch": 0.8619034171138097, "grad_norm": 4.675815070823948, "learning_rate": 1.672462091314571e-05, "loss": 1.1088, "step": 6104 }, { "epoch": 0.8620446201637956, "grad_norm": 3.5494945156867135, "learning_rate": 1.672349253925637e-05, "loss": 1.0842, "step": 6105 }, { "epoch": 0.8621858232137815, "grad_norm": 3.9219828419128335, "learning_rate": 1.672236400911597e-05, "loss": 1.0711, "step": 6106 }, { "epoch": 0.8623270262637673, "grad_norm": 3.4306431927102827, "learning_rate": 1.6721235322750735e-05, "loss": 0.9623, "step": 6107 }, { "epoch": 0.8624682293137532, "grad_norm": 3.982466018882758, "learning_rate": 1.672010648018689e-05, "loss": 1.1817, "step": 6108 }, { "epoch": 0.8626094323637391, "grad_norm": 2.9927740312941, "learning_rate": 1.6718977481450675e-05, "loss": 0.8796, "step": 6109 }, { "epoch": 0.862750635413725, "grad_norm": 3.8395106386995823, "learning_rate": 1.6717848326568327e-05, "loss": 0.9777, "step": 6110 }, { "epoch": 0.8628918384637109, "grad_norm": 3.4804166702288253, "learning_rate": 1.671671901556608e-05, "loss": 0.9834, "step": 6111 }, { "epoch": 0.8630330415136966, "grad_norm": 2.9115105955432723, "learning_rate": 1.6715589548470187e-05, "loss": 0.7626, "step": 6112 }, { "epoch": 0.8631742445636825, "grad_norm": 3.810005976076049, "learning_rate": 1.671445992530689e-05, "loss": 1.0885, "step": 6113 }, { "epoch": 0.8633154476136684, "grad_norm": 3.2895221802070136, "learning_rate": 1.6713330146102447e-05, "loss": 1.0593, "step": 6114 }, { "epoch": 0.8634566506636543, "grad_norm": 3.531837170542692, "learning_rate": 1.6712200210883112e-05, "loss": 1.0891, "step": 6115 }, { "epoch": 0.8635978537136402, "grad_norm": 3.189103198412703, "learning_rate": 1.6711070119675138e-05, "loss": 0.9089, "step": 6116 }, { "epoch": 0.8637390567636261, "grad_norm": 3.2312887887411788, "learning_rate": 1.6709939872504794e-05, "loss": 0.9333, "step": 6117 }, { "epoch": 0.8638802598136119, "grad_norm": 2.914353376719712, "learning_rate": 1.6708809469398347e-05, "loss": 0.7367, "step": 6118 }, { "epoch": 0.8640214628635978, "grad_norm": 3.3414042749116337, "learning_rate": 1.6707678910382066e-05, "loss": 0.7874, "step": 6119 }, { "epoch": 0.8641626659135837, "grad_norm": 3.2793259080259576, "learning_rate": 1.6706548195482222e-05, "loss": 1.0546, "step": 6120 }, { "epoch": 0.8643038689635696, "grad_norm": 3.261014368967626, "learning_rate": 1.6705417324725094e-05, "loss": 1.0215, "step": 6121 }, { "epoch": 0.8644450720135555, "grad_norm": 3.489065743662714, "learning_rate": 1.6704286298136966e-05, "loss": 1.2098, "step": 6122 }, { "epoch": 0.8645862750635414, "grad_norm": 3.563452513261642, "learning_rate": 1.6703155115744118e-05, "loss": 1.0723, "step": 6123 }, { "epoch": 0.8647274781135272, "grad_norm": 3.1035699785872226, "learning_rate": 1.670202377757284e-05, "loss": 0.8516, "step": 6124 }, { "epoch": 0.8648686811635131, "grad_norm": 3.874686455997525, "learning_rate": 1.6700892283649426e-05, "loss": 0.9156, "step": 6125 }, { "epoch": 0.865009884213499, "grad_norm": 3.1560848951164404, "learning_rate": 1.6699760634000166e-05, "loss": 0.8749, "step": 6126 }, { "epoch": 0.8651510872634849, "grad_norm": 3.682451365047172, "learning_rate": 1.6698628828651363e-05, "loss": 1.1906, "step": 6127 }, { "epoch": 0.8652922903134708, "grad_norm": 3.7129858653629384, "learning_rate": 1.669749686762932e-05, "loss": 1.1444, "step": 6128 }, { "epoch": 0.8654334933634567, "grad_norm": 3.066719762723464, "learning_rate": 1.6696364750960342e-05, "loss": 0.7812, "step": 6129 }, { "epoch": 0.8655746964134425, "grad_norm": 4.179279510605568, "learning_rate": 1.669523247867074e-05, "loss": 1.1416, "step": 6130 }, { "epoch": 0.8657158994634284, "grad_norm": 3.225081554794639, "learning_rate": 1.669410005078682e-05, "loss": 0.9832, "step": 6131 }, { "epoch": 0.8658571025134143, "grad_norm": 3.560434730120382, "learning_rate": 1.6692967467334915e-05, "loss": 1.0603, "step": 6132 }, { "epoch": 0.8659983055634002, "grad_norm": 3.4413839665158106, "learning_rate": 1.6691834728341332e-05, "loss": 0.9875, "step": 6133 }, { "epoch": 0.8661395086133861, "grad_norm": 2.8964039482171304, "learning_rate": 1.6690701833832398e-05, "loss": 0.7732, "step": 6134 }, { "epoch": 0.866280711663372, "grad_norm": 3.744634570405144, "learning_rate": 1.668956878383445e-05, "loss": 1.125, "step": 6135 }, { "epoch": 0.8664219147133578, "grad_norm": 4.0361579114896395, "learning_rate": 1.668843557837381e-05, "loss": 1.3351, "step": 6136 }, { "epoch": 0.8665631177633437, "grad_norm": 3.0918469729158597, "learning_rate": 1.6687302217476808e-05, "loss": 0.9888, "step": 6137 }, { "epoch": 0.8667043208133296, "grad_norm": 4.38969457858748, "learning_rate": 1.6686168701169797e-05, "loss": 1.2701, "step": 6138 }, { "epoch": 0.8668455238633155, "grad_norm": 2.9671297091334092, "learning_rate": 1.6685035029479114e-05, "loss": 0.9548, "step": 6139 }, { "epoch": 0.8669867269133014, "grad_norm": 3.702093809567638, "learning_rate": 1.66839012024311e-05, "loss": 1.1245, "step": 6140 }, { "epoch": 0.8671279299632872, "grad_norm": 3.0508530686611066, "learning_rate": 1.668276722005211e-05, "loss": 0.8615, "step": 6141 }, { "epoch": 0.8672691330132731, "grad_norm": 3.4664240345204225, "learning_rate": 1.66816330823685e-05, "loss": 1.333, "step": 6142 }, { "epoch": 0.867410336063259, "grad_norm": 3.1170923116270677, "learning_rate": 1.6680498789406618e-05, "loss": 1.0463, "step": 6143 }, { "epoch": 0.8675515391132449, "grad_norm": 2.772677464869212, "learning_rate": 1.667936434119283e-05, "loss": 0.8761, "step": 6144 }, { "epoch": 0.8676927421632308, "grad_norm": 3.829142119377285, "learning_rate": 1.6678229737753498e-05, "loss": 1.1472, "step": 6145 }, { "epoch": 0.8678339452132166, "grad_norm": 3.1825534932746886, "learning_rate": 1.6677094979114993e-05, "loss": 0.9769, "step": 6146 }, { "epoch": 0.8679751482632024, "grad_norm": 3.7709957933903873, "learning_rate": 1.6675960065303684e-05, "loss": 1.0858, "step": 6147 }, { "epoch": 0.8681163513131883, "grad_norm": 3.5298875632061986, "learning_rate": 1.6674824996345947e-05, "loss": 0.9852, "step": 6148 }, { "epoch": 0.8682575543631742, "grad_norm": 3.1141362057383963, "learning_rate": 1.6673689772268157e-05, "loss": 0.8774, "step": 6149 }, { "epoch": 0.8683987574131601, "grad_norm": 3.715323879230092, "learning_rate": 1.66725543930967e-05, "loss": 1.2924, "step": 6150 }, { "epoch": 0.868539960463146, "grad_norm": 3.122059165368689, "learning_rate": 1.6671418858857965e-05, "loss": 1.0271, "step": 6151 }, { "epoch": 0.8686811635131318, "grad_norm": 3.212771913201258, "learning_rate": 1.6670283169578333e-05, "loss": 0.7822, "step": 6152 }, { "epoch": 0.8688223665631177, "grad_norm": 4.0291150677539775, "learning_rate": 1.66691473252842e-05, "loss": 0.9919, "step": 6153 }, { "epoch": 0.8689635696131036, "grad_norm": 3.4402579336887227, "learning_rate": 1.6668011326001962e-05, "loss": 0.99, "step": 6154 }, { "epoch": 0.8691047726630895, "grad_norm": 4.6638851077472685, "learning_rate": 1.6666875171758024e-05, "loss": 1.3781, "step": 6155 }, { "epoch": 0.8692459757130754, "grad_norm": 3.0642228246860013, "learning_rate": 1.6665738862578783e-05, "loss": 0.7877, "step": 6156 }, { "epoch": 0.8693871787630613, "grad_norm": 3.8116689461326434, "learning_rate": 1.6664602398490653e-05, "loss": 1.0016, "step": 6157 }, { "epoch": 0.8695283818130471, "grad_norm": 3.3748572862791124, "learning_rate": 1.6663465779520042e-05, "loss": 0.8441, "step": 6158 }, { "epoch": 0.869669584863033, "grad_norm": 3.677776215052552, "learning_rate": 1.666232900569336e-05, "loss": 0.932, "step": 6159 }, { "epoch": 0.8698107879130189, "grad_norm": 3.2630293030495925, "learning_rate": 1.666119207703703e-05, "loss": 0.9645, "step": 6160 }, { "epoch": 0.8699519909630048, "grad_norm": 3.8471797987768714, "learning_rate": 1.6660054993577478e-05, "loss": 0.9793, "step": 6161 }, { "epoch": 0.8700931940129907, "grad_norm": 3.8806058029924113, "learning_rate": 1.665891775534112e-05, "loss": 1.0751, "step": 6162 }, { "epoch": 0.8702343970629766, "grad_norm": 3.61055876628945, "learning_rate": 1.6657780362354386e-05, "loss": 0.832, "step": 6163 }, { "epoch": 0.8703756001129624, "grad_norm": 3.7411962708995095, "learning_rate": 1.6656642814643716e-05, "loss": 1.0581, "step": 6164 }, { "epoch": 0.8705168031629483, "grad_norm": 4.0424798529708355, "learning_rate": 1.6655505112235545e-05, "loss": 1.2854, "step": 6165 }, { "epoch": 0.8706580062129342, "grad_norm": 3.1263876317703923, "learning_rate": 1.6654367255156303e-05, "loss": 0.9762, "step": 6166 }, { "epoch": 0.8707992092629201, "grad_norm": 3.4313831745773005, "learning_rate": 1.6653229243432442e-05, "loss": 1.2639, "step": 6167 }, { "epoch": 0.870940412312906, "grad_norm": 4.337287401905371, "learning_rate": 1.6652091077090405e-05, "loss": 1.0013, "step": 6168 }, { "epoch": 0.8710816153628919, "grad_norm": 3.8162229848757225, "learning_rate": 1.6650952756156645e-05, "loss": 0.8968, "step": 6169 }, { "epoch": 0.8712228184128777, "grad_norm": 3.116915704494657, "learning_rate": 1.664981428065762e-05, "loss": 0.9348, "step": 6170 }, { "epoch": 0.8713640214628636, "grad_norm": 3.224253361841439, "learning_rate": 1.664867565061978e-05, "loss": 1.0271, "step": 6171 }, { "epoch": 0.8715052245128495, "grad_norm": 3.802448027088647, "learning_rate": 1.6647536866069587e-05, "loss": 0.897, "step": 6172 }, { "epoch": 0.8716464275628354, "grad_norm": 4.811224768398756, "learning_rate": 1.6646397927033507e-05, "loss": 1.3397, "step": 6173 }, { "epoch": 0.8717876306128213, "grad_norm": 3.258466040667012, "learning_rate": 1.6645258833538015e-05, "loss": 0.8724, "step": 6174 }, { "epoch": 0.8719288336628072, "grad_norm": 3.5506554081928305, "learning_rate": 1.664411958560957e-05, "loss": 0.9896, "step": 6175 }, { "epoch": 0.872070036712793, "grad_norm": 3.270455537782497, "learning_rate": 1.6642980183274665e-05, "loss": 0.9597, "step": 6176 }, { "epoch": 0.8722112397627789, "grad_norm": 3.9113333412561153, "learning_rate": 1.664184062655976e-05, "loss": 1.0502, "step": 6177 }, { "epoch": 0.8723524428127648, "grad_norm": 4.529919288588592, "learning_rate": 1.6640700915491354e-05, "loss": 1.2265, "step": 6178 }, { "epoch": 0.8724936458627507, "grad_norm": 3.8357593319019547, "learning_rate": 1.6639561050095926e-05, "loss": 1.0841, "step": 6179 }, { "epoch": 0.8726348489127365, "grad_norm": 3.565858855707741, "learning_rate": 1.6638421030399962e-05, "loss": 1.1105, "step": 6180 }, { "epoch": 0.8727760519627223, "grad_norm": 4.061910119463994, "learning_rate": 1.6637280856429964e-05, "loss": 1.1354, "step": 6181 }, { "epoch": 0.8729172550127082, "grad_norm": 3.516603525418769, "learning_rate": 1.6636140528212427e-05, "loss": 1.0001, "step": 6182 }, { "epoch": 0.8730584580626941, "grad_norm": 3.62786730936123, "learning_rate": 1.6635000045773843e-05, "loss": 1.0323, "step": 6183 }, { "epoch": 0.87319966111268, "grad_norm": 3.302388724996381, "learning_rate": 1.663385940914073e-05, "loss": 0.9775, "step": 6184 }, { "epoch": 0.8733408641626659, "grad_norm": 4.270190674313833, "learning_rate": 1.6632718618339584e-05, "loss": 1.2565, "step": 6185 }, { "epoch": 0.8734820672126518, "grad_norm": 3.2573198540867874, "learning_rate": 1.6631577673396925e-05, "loss": 0.945, "step": 6186 }, { "epoch": 0.8736232702626376, "grad_norm": 3.9717069534940226, "learning_rate": 1.6630436574339266e-05, "loss": 1.181, "step": 6187 }, { "epoch": 0.8737644733126235, "grad_norm": 3.0512203039525967, "learning_rate": 1.6629295321193125e-05, "loss": 0.8565, "step": 6188 }, { "epoch": 0.8739056763626094, "grad_norm": 3.942823204000492, "learning_rate": 1.662815391398502e-05, "loss": 1.035, "step": 6189 }, { "epoch": 0.8740468794125953, "grad_norm": 4.908340785805079, "learning_rate": 1.6627012352741482e-05, "loss": 1.4643, "step": 6190 }, { "epoch": 0.8741880824625812, "grad_norm": 3.28615509718307, "learning_rate": 1.662587063748904e-05, "loss": 0.992, "step": 6191 }, { "epoch": 0.8743292855125671, "grad_norm": 3.6132859459871685, "learning_rate": 1.6624728768254225e-05, "loss": 0.9488, "step": 6192 }, { "epoch": 0.8744704885625529, "grad_norm": 3.0247386337033517, "learning_rate": 1.6623586745063573e-05, "loss": 1.0996, "step": 6193 }, { "epoch": 0.8746116916125388, "grad_norm": 2.8386863019358963, "learning_rate": 1.6622444567943627e-05, "loss": 0.8484, "step": 6194 }, { "epoch": 0.8747528946625247, "grad_norm": 4.232051628092746, "learning_rate": 1.6621302236920928e-05, "loss": 1.3728, "step": 6195 }, { "epoch": 0.8748940977125106, "grad_norm": 3.8467378085212016, "learning_rate": 1.662015975202203e-05, "loss": 0.8706, "step": 6196 }, { "epoch": 0.8750353007624965, "grad_norm": 3.2642407940941203, "learning_rate": 1.6619017113273473e-05, "loss": 0.9245, "step": 6197 }, { "epoch": 0.8751765038124824, "grad_norm": 3.2589085965369518, "learning_rate": 1.6617874320701813e-05, "loss": 1.1047, "step": 6198 }, { "epoch": 0.8753177068624682, "grad_norm": 5.474735334788356, "learning_rate": 1.6616731374333622e-05, "loss": 0.9211, "step": 6199 }, { "epoch": 0.8754589099124541, "grad_norm": 5.154954649488404, "learning_rate": 1.6615588274195445e-05, "loss": 1.1485, "step": 6200 }, { "epoch": 0.87560011296244, "grad_norm": 3.696517917696646, "learning_rate": 1.6614445020313854e-05, "loss": 1.1142, "step": 6201 }, { "epoch": 0.8757413160124259, "grad_norm": 4.0640408154069405, "learning_rate": 1.661330161271542e-05, "loss": 1.1884, "step": 6202 }, { "epoch": 0.8758825190624118, "grad_norm": 2.7530014081140717, "learning_rate": 1.661215805142671e-05, "loss": 0.7921, "step": 6203 }, { "epoch": 0.8760237221123977, "grad_norm": 3.0952352581142852, "learning_rate": 1.6611014336474303e-05, "loss": 0.7491, "step": 6204 }, { "epoch": 0.8761649251623835, "grad_norm": 3.511557754079026, "learning_rate": 1.6609870467884777e-05, "loss": 0.9773, "step": 6205 }, { "epoch": 0.8763061282123694, "grad_norm": 3.371115043111173, "learning_rate": 1.6608726445684715e-05, "loss": 0.9392, "step": 6206 }, { "epoch": 0.8764473312623553, "grad_norm": 4.027351241653331, "learning_rate": 1.6607582269900707e-05, "loss": 1.2513, "step": 6207 }, { "epoch": 0.8765885343123412, "grad_norm": 3.2201505817912834, "learning_rate": 1.6606437940559342e-05, "loss": 0.8706, "step": 6208 }, { "epoch": 0.8767297373623271, "grad_norm": 3.624801513001558, "learning_rate": 1.6605293457687212e-05, "loss": 0.9893, "step": 6209 }, { "epoch": 0.876870940412313, "grad_norm": 3.491949512786377, "learning_rate": 1.6604148821310912e-05, "loss": 1.0759, "step": 6210 }, { "epoch": 0.8770121434622988, "grad_norm": 4.721126249596484, "learning_rate": 1.6603004031457043e-05, "loss": 1.3192, "step": 6211 }, { "epoch": 0.8771533465122847, "grad_norm": 3.0614910327264315, "learning_rate": 1.6601859088152215e-05, "loss": 0.7274, "step": 6212 }, { "epoch": 0.8772945495622706, "grad_norm": 3.958111122229993, "learning_rate": 1.6600713991423036e-05, "loss": 0.8517, "step": 6213 }, { "epoch": 0.8774357526122564, "grad_norm": 3.2474422359233115, "learning_rate": 1.6599568741296112e-05, "loss": 1.0641, "step": 6214 }, { "epoch": 0.8775769556622423, "grad_norm": 3.4350127734173834, "learning_rate": 1.659842333779806e-05, "loss": 1.0228, "step": 6215 }, { "epoch": 0.8777181587122281, "grad_norm": 4.329636253840294, "learning_rate": 1.6597277780955502e-05, "loss": 1.2182, "step": 6216 }, { "epoch": 0.877859361762214, "grad_norm": 2.8523648955721166, "learning_rate": 1.6596132070795054e-05, "loss": 0.7745, "step": 6217 }, { "epoch": 0.8780005648121999, "grad_norm": 4.1008291301997595, "learning_rate": 1.6594986207343343e-05, "loss": 1.1768, "step": 6218 }, { "epoch": 0.8781417678621858, "grad_norm": 3.443255216507718, "learning_rate": 1.6593840190627007e-05, "loss": 0.9041, "step": 6219 }, { "epoch": 0.8782829709121717, "grad_norm": 3.1302332857071873, "learning_rate": 1.6592694020672667e-05, "loss": 0.7826, "step": 6220 }, { "epoch": 0.8784241739621576, "grad_norm": 3.582102981258048, "learning_rate": 1.659154769750697e-05, "loss": 1.1242, "step": 6221 }, { "epoch": 0.8785653770121434, "grad_norm": 4.042865594327094, "learning_rate": 1.659040122115655e-05, "loss": 1.3694, "step": 6222 }, { "epoch": 0.8787065800621293, "grad_norm": 3.5673801196002475, "learning_rate": 1.658925459164805e-05, "loss": 1.0865, "step": 6223 }, { "epoch": 0.8788477831121152, "grad_norm": 5.1780488700183005, "learning_rate": 1.658810780900812e-05, "loss": 1.0968, "step": 6224 }, { "epoch": 0.8789889861621011, "grad_norm": 3.594870583517368, "learning_rate": 1.6586960873263412e-05, "loss": 0.9476, "step": 6225 }, { "epoch": 0.879130189212087, "grad_norm": 3.179824230257961, "learning_rate": 1.6585813784440575e-05, "loss": 0.7881, "step": 6226 }, { "epoch": 0.8792713922620728, "grad_norm": 3.797779309762246, "learning_rate": 1.658466654256627e-05, "loss": 0.9513, "step": 6227 }, { "epoch": 0.8794125953120587, "grad_norm": 3.3224478700011626, "learning_rate": 1.6583519147667157e-05, "loss": 0.9803, "step": 6228 }, { "epoch": 0.8795537983620446, "grad_norm": 3.4719079045151906, "learning_rate": 1.6582371599769908e-05, "loss": 1.0535, "step": 6229 }, { "epoch": 0.8796950014120305, "grad_norm": 3.4991666084095825, "learning_rate": 1.658122389890118e-05, "loss": 1.1369, "step": 6230 }, { "epoch": 0.8798362044620164, "grad_norm": 3.724741789072935, "learning_rate": 1.658007604508765e-05, "loss": 1.0079, "step": 6231 }, { "epoch": 0.8799774075120023, "grad_norm": 4.516147467819665, "learning_rate": 1.6578928038355998e-05, "loss": 1.0425, "step": 6232 }, { "epoch": 0.8801186105619881, "grad_norm": 3.4418366505744973, "learning_rate": 1.65777798787329e-05, "loss": 1.0312, "step": 6233 }, { "epoch": 0.880259813611974, "grad_norm": 3.4156719793020582, "learning_rate": 1.6576631566245037e-05, "loss": 1.0794, "step": 6234 }, { "epoch": 0.8804010166619599, "grad_norm": 3.336970565056934, "learning_rate": 1.6575483100919094e-05, "loss": 0.8873, "step": 6235 }, { "epoch": 0.8805422197119458, "grad_norm": 3.139629455628113, "learning_rate": 1.6574334482781768e-05, "loss": 0.8668, "step": 6236 }, { "epoch": 0.8806834227619317, "grad_norm": 3.6281350729788513, "learning_rate": 1.6573185711859748e-05, "loss": 1.1227, "step": 6237 }, { "epoch": 0.8808246258119176, "grad_norm": 3.537772902473077, "learning_rate": 1.6572036788179728e-05, "loss": 1.006, "step": 6238 }, { "epoch": 0.8809658288619034, "grad_norm": 3.74578119799036, "learning_rate": 1.657088771176841e-05, "loss": 1.1073, "step": 6239 }, { "epoch": 0.8811070319118893, "grad_norm": 3.524889044419798, "learning_rate": 1.65697384826525e-05, "loss": 1.0663, "step": 6240 }, { "epoch": 0.8812482349618752, "grad_norm": 3.0092418835995725, "learning_rate": 1.6568589100858706e-05, "loss": 1.0693, "step": 6241 }, { "epoch": 0.8813894380118611, "grad_norm": 2.9137986035655126, "learning_rate": 1.6567439566413737e-05, "loss": 0.9155, "step": 6242 }, { "epoch": 0.881530641061847, "grad_norm": 3.4154734288751807, "learning_rate": 1.6566289879344314e-05, "loss": 0.9957, "step": 6243 }, { "epoch": 0.8816718441118329, "grad_norm": 3.2052559608773095, "learning_rate": 1.6565140039677142e-05, "loss": 0.8239, "step": 6244 }, { "epoch": 0.8818130471618187, "grad_norm": 3.8599966111681385, "learning_rate": 1.6563990047438956e-05, "loss": 1.1778, "step": 6245 }, { "epoch": 0.8819542502118046, "grad_norm": 4.088854771231255, "learning_rate": 1.6562839902656476e-05, "loss": 1.1967, "step": 6246 }, { "epoch": 0.8820954532617905, "grad_norm": 4.1941405937695135, "learning_rate": 1.656168960535643e-05, "loss": 1.169, "step": 6247 }, { "epoch": 0.8822366563117763, "grad_norm": 3.2051947384646247, "learning_rate": 1.656053915556555e-05, "loss": 0.9834, "step": 6248 }, { "epoch": 0.8823778593617622, "grad_norm": 3.030973388127168, "learning_rate": 1.6559388553310574e-05, "loss": 0.7882, "step": 6249 }, { "epoch": 0.882519062411748, "grad_norm": 4.068506592870806, "learning_rate": 1.6558237798618243e-05, "loss": 1.1806, "step": 6250 }, { "epoch": 0.8826602654617339, "grad_norm": 3.6023619537525855, "learning_rate": 1.6557086891515295e-05, "loss": 1.1076, "step": 6251 }, { "epoch": 0.8828014685117198, "grad_norm": 3.0954901244041415, "learning_rate": 1.655593583202848e-05, "loss": 0.9574, "step": 6252 }, { "epoch": 0.8829426715617057, "grad_norm": 2.72457559893778, "learning_rate": 1.6554784620184546e-05, "loss": 0.766, "step": 6253 }, { "epoch": 0.8830838746116916, "grad_norm": 3.9988340871038295, "learning_rate": 1.6553633256010254e-05, "loss": 1.2421, "step": 6254 }, { "epoch": 0.8832250776616775, "grad_norm": 2.8220170000937888, "learning_rate": 1.655248173953235e-05, "loss": 0.8059, "step": 6255 }, { "epoch": 0.8833662807116633, "grad_norm": 3.7668186768315275, "learning_rate": 1.6551330070777603e-05, "loss": 1.1064, "step": 6256 }, { "epoch": 0.8835074837616492, "grad_norm": 3.664217130695584, "learning_rate": 1.6550178249772773e-05, "loss": 1.0728, "step": 6257 }, { "epoch": 0.8836486868116351, "grad_norm": 3.6203477488565583, "learning_rate": 1.6549026276544627e-05, "loss": 1.0805, "step": 6258 }, { "epoch": 0.883789889861621, "grad_norm": 3.152262362399964, "learning_rate": 1.654787415111994e-05, "loss": 0.9683, "step": 6259 }, { "epoch": 0.8839310929116069, "grad_norm": 3.9068956326584763, "learning_rate": 1.6546721873525488e-05, "loss": 1.0462, "step": 6260 }, { "epoch": 0.8840722959615928, "grad_norm": 3.7651160621958146, "learning_rate": 1.6545569443788047e-05, "loss": 1.0376, "step": 6261 }, { "epoch": 0.8842134990115786, "grad_norm": 3.633450733541613, "learning_rate": 1.65444168619344e-05, "loss": 0.9889, "step": 6262 }, { "epoch": 0.8843547020615645, "grad_norm": 3.126164312983101, "learning_rate": 1.6543264127991326e-05, "loss": 0.9411, "step": 6263 }, { "epoch": 0.8844959051115504, "grad_norm": 3.230417965774497, "learning_rate": 1.6542111241985623e-05, "loss": 0.7864, "step": 6264 }, { "epoch": 0.8846371081615363, "grad_norm": 3.2550130678261016, "learning_rate": 1.6540958203944078e-05, "loss": 0.9913, "step": 6265 }, { "epoch": 0.8847783112115222, "grad_norm": 3.429588042849031, "learning_rate": 1.6539805013893493e-05, "loss": 0.9504, "step": 6266 }, { "epoch": 0.8849195142615081, "grad_norm": 3.4760504327165593, "learning_rate": 1.6538651671860663e-05, "loss": 0.9265, "step": 6267 }, { "epoch": 0.8850607173114939, "grad_norm": 4.009079652428015, "learning_rate": 1.653749817787239e-05, "loss": 1.1381, "step": 6268 }, { "epoch": 0.8852019203614798, "grad_norm": 3.5665924349938902, "learning_rate": 1.653634453195548e-05, "loss": 1.0588, "step": 6269 }, { "epoch": 0.8853431234114657, "grad_norm": 3.757490811110777, "learning_rate": 1.653519073413675e-05, "loss": 1.1374, "step": 6270 }, { "epoch": 0.8854843264614516, "grad_norm": 3.230801478874337, "learning_rate": 1.653403678444301e-05, "loss": 0.9248, "step": 6271 }, { "epoch": 0.8856255295114375, "grad_norm": 3.7232276018239854, "learning_rate": 1.6532882682901076e-05, "loss": 1.14, "step": 6272 }, { "epoch": 0.8857667325614234, "grad_norm": 3.7306297520162564, "learning_rate": 1.6531728429537766e-05, "loss": 0.8855, "step": 6273 }, { "epoch": 0.8859079356114092, "grad_norm": 3.080527210910394, "learning_rate": 1.6530574024379915e-05, "loss": 0.7426, "step": 6274 }, { "epoch": 0.8860491386613951, "grad_norm": 3.7346279813889955, "learning_rate": 1.652941946745434e-05, "loss": 1.0093, "step": 6275 }, { "epoch": 0.886190341711381, "grad_norm": 3.5884784765022526, "learning_rate": 1.6528264758787876e-05, "loss": 1.09, "step": 6276 }, { "epoch": 0.8863315447613669, "grad_norm": 3.635761130255006, "learning_rate": 1.652710989840736e-05, "loss": 1.2248, "step": 6277 }, { "epoch": 0.8864727478113528, "grad_norm": 2.686935252804271, "learning_rate": 1.652595488633963e-05, "loss": 0.8449, "step": 6278 }, { "epoch": 0.8866139508613387, "grad_norm": 3.2603137955830066, "learning_rate": 1.6524799722611524e-05, "loss": 0.9368, "step": 6279 }, { "epoch": 0.8867551539113245, "grad_norm": 3.9035804986769294, "learning_rate": 1.6523644407249893e-05, "loss": 1.1666, "step": 6280 }, { "epoch": 0.8868963569613104, "grad_norm": 3.063690458925986, "learning_rate": 1.652248894028158e-05, "loss": 0.8948, "step": 6281 }, { "epoch": 0.8870375600112962, "grad_norm": 7.6355713880990015, "learning_rate": 1.652133332173344e-05, "loss": 1.0238, "step": 6282 }, { "epoch": 0.8871787630612821, "grad_norm": 3.2125999286457834, "learning_rate": 1.6520177551632333e-05, "loss": 0.9862, "step": 6283 }, { "epoch": 0.887319966111268, "grad_norm": 3.8834751827599714, "learning_rate": 1.6519021630005115e-05, "loss": 1.2326, "step": 6284 }, { "epoch": 0.8874611691612538, "grad_norm": 2.8396215264957454, "learning_rate": 1.651786555687865e-05, "loss": 0.7942, "step": 6285 }, { "epoch": 0.8876023722112397, "grad_norm": 2.8171848274427944, "learning_rate": 1.6516709332279806e-05, "loss": 0.6937, "step": 6286 }, { "epoch": 0.8877435752612256, "grad_norm": 3.5458306517057836, "learning_rate": 1.651555295623545e-05, "loss": 1.0868, "step": 6287 }, { "epoch": 0.8878847783112115, "grad_norm": 4.041983846009704, "learning_rate": 1.6514396428772457e-05, "loss": 1.0189, "step": 6288 }, { "epoch": 0.8880259813611974, "grad_norm": 3.6233959175061727, "learning_rate": 1.6513239749917702e-05, "loss": 1.0256, "step": 6289 }, { "epoch": 0.8881671844111833, "grad_norm": 3.995777922733096, "learning_rate": 1.6512082919698072e-05, "loss": 1.0383, "step": 6290 }, { "epoch": 0.8883083874611691, "grad_norm": 3.046837862698834, "learning_rate": 1.6510925938140444e-05, "loss": 0.9265, "step": 6291 }, { "epoch": 0.888449590511155, "grad_norm": 4.440050379900637, "learning_rate": 1.650976880527171e-05, "loss": 1.0376, "step": 6292 }, { "epoch": 0.8885907935611409, "grad_norm": 2.9934852108745553, "learning_rate": 1.6508611521118762e-05, "loss": 0.9242, "step": 6293 }, { "epoch": 0.8887319966111268, "grad_norm": 3.6436000456338444, "learning_rate": 1.650745408570849e-05, "loss": 1.1386, "step": 6294 }, { "epoch": 0.8888731996611127, "grad_norm": 3.431257868082384, "learning_rate": 1.6506296499067798e-05, "loss": 1.2067, "step": 6295 }, { "epoch": 0.8890144027110986, "grad_norm": 4.428262472203916, "learning_rate": 1.6505138761223586e-05, "loss": 1.2847, "step": 6296 }, { "epoch": 0.8891556057610844, "grad_norm": 3.241582776240133, "learning_rate": 1.6503980872202757e-05, "loss": 0.8599, "step": 6297 }, { "epoch": 0.8892968088110703, "grad_norm": 3.4401444635380707, "learning_rate": 1.650282283203222e-05, "loss": 1.1032, "step": 6298 }, { "epoch": 0.8894380118610562, "grad_norm": 4.50436054943897, "learning_rate": 1.650166464073889e-05, "loss": 1.2196, "step": 6299 }, { "epoch": 0.8895792149110421, "grad_norm": 3.7664110732326077, "learning_rate": 1.6500506298349682e-05, "loss": 1.1037, "step": 6300 }, { "epoch": 0.889720417961028, "grad_norm": 3.1933587658407454, "learning_rate": 1.6499347804891515e-05, "loss": 1.0194, "step": 6301 }, { "epoch": 0.8898616210110138, "grad_norm": 3.279234947720154, "learning_rate": 1.649818916039131e-05, "loss": 1.145, "step": 6302 }, { "epoch": 0.8900028240609997, "grad_norm": 3.9515885490145, "learning_rate": 1.6497030364876e-05, "loss": 1.1405, "step": 6303 }, { "epoch": 0.8901440271109856, "grad_norm": 3.659629283958127, "learning_rate": 1.6495871418372503e-05, "loss": 0.9084, "step": 6304 }, { "epoch": 0.8902852301609715, "grad_norm": 3.6700228729188553, "learning_rate": 1.6494712320907766e-05, "loss": 1.0685, "step": 6305 }, { "epoch": 0.8904264332109574, "grad_norm": 3.1427972040192818, "learning_rate": 1.6493553072508716e-05, "loss": 0.894, "step": 6306 }, { "epoch": 0.8905676362609433, "grad_norm": 2.854101334893658, "learning_rate": 1.6492393673202297e-05, "loss": 0.7566, "step": 6307 }, { "epoch": 0.8907088393109291, "grad_norm": 3.1010356633771394, "learning_rate": 1.6491234123015454e-05, "loss": 0.8636, "step": 6308 }, { "epoch": 0.890850042360915, "grad_norm": 3.4964583465196473, "learning_rate": 1.6490074421975137e-05, "loss": 1.0554, "step": 6309 }, { "epoch": 0.8909912454109009, "grad_norm": 3.9622664320674446, "learning_rate": 1.6488914570108287e-05, "loss": 1.0025, "step": 6310 }, { "epoch": 0.8911324484608868, "grad_norm": 3.7087121384663795, "learning_rate": 1.6487754567441868e-05, "loss": 1.0529, "step": 6311 }, { "epoch": 0.8912736515108727, "grad_norm": 3.5587988751041504, "learning_rate": 1.6486594414002836e-05, "loss": 1.097, "step": 6312 }, { "epoch": 0.8914148545608586, "grad_norm": 3.5219557298608124, "learning_rate": 1.6485434109818146e-05, "loss": 1.0799, "step": 6313 }, { "epoch": 0.8915560576108444, "grad_norm": 3.701205319253776, "learning_rate": 1.6484273654914772e-05, "loss": 1.0741, "step": 6314 }, { "epoch": 0.8916972606608303, "grad_norm": 2.9816190895284223, "learning_rate": 1.6483113049319676e-05, "loss": 0.8864, "step": 6315 }, { "epoch": 0.8918384637108161, "grad_norm": 4.082276936410888, "learning_rate": 1.6481952293059835e-05, "loss": 1.3575, "step": 6316 }, { "epoch": 0.891979666760802, "grad_norm": 3.437347889876703, "learning_rate": 1.6480791386162224e-05, "loss": 1.2365, "step": 6317 }, { "epoch": 0.8921208698107879, "grad_norm": 3.576584225009612, "learning_rate": 1.6479630328653814e-05, "loss": 0.8913, "step": 6318 }, { "epoch": 0.8922620728607737, "grad_norm": 2.7439594880680938, "learning_rate": 1.64784691205616e-05, "loss": 0.781, "step": 6319 }, { "epoch": 0.8924032759107596, "grad_norm": 4.433358041759848, "learning_rate": 1.6477307761912555e-05, "loss": 1.1811, "step": 6320 }, { "epoch": 0.8925444789607455, "grad_norm": 4.13092979689268, "learning_rate": 1.647614625273368e-05, "loss": 1.1199, "step": 6321 }, { "epoch": 0.8926856820107314, "grad_norm": 4.016729430615833, "learning_rate": 1.6474984593051965e-05, "loss": 1.0831, "step": 6322 }, { "epoch": 0.8928268850607173, "grad_norm": 3.346769759328968, "learning_rate": 1.6473822782894398e-05, "loss": 0.9427, "step": 6323 }, { "epoch": 0.8929680881107032, "grad_norm": 3.4067678797248937, "learning_rate": 1.6472660822287987e-05, "loss": 0.7522, "step": 6324 }, { "epoch": 0.893109291160689, "grad_norm": 2.715021915462966, "learning_rate": 1.6471498711259733e-05, "loss": 0.7023, "step": 6325 }, { "epoch": 0.8932504942106749, "grad_norm": 2.8418434408515703, "learning_rate": 1.647033644983665e-05, "loss": 0.827, "step": 6326 }, { "epoch": 0.8933916972606608, "grad_norm": 2.7713515769696717, "learning_rate": 1.6469174038045735e-05, "loss": 0.7298, "step": 6327 }, { "epoch": 0.8935329003106467, "grad_norm": 3.151798026246386, "learning_rate": 1.6468011475914015e-05, "loss": 1.0759, "step": 6328 }, { "epoch": 0.8936741033606326, "grad_norm": 3.3023395772130857, "learning_rate": 1.6466848763468496e-05, "loss": 0.8557, "step": 6329 }, { "epoch": 0.8938153064106185, "grad_norm": 3.9831663290601513, "learning_rate": 1.6465685900736204e-05, "loss": 0.9985, "step": 6330 }, { "epoch": 0.8939565094606043, "grad_norm": 4.528716139246703, "learning_rate": 1.646452288774417e-05, "loss": 0.9884, "step": 6331 }, { "epoch": 0.8940977125105902, "grad_norm": 3.730476661333404, "learning_rate": 1.6463359724519413e-05, "loss": 0.9398, "step": 6332 }, { "epoch": 0.8942389155605761, "grad_norm": 3.510438103015362, "learning_rate": 1.6462196411088968e-05, "loss": 0.8778, "step": 6333 }, { "epoch": 0.894380118610562, "grad_norm": 4.173387206457739, "learning_rate": 1.646103294747987e-05, "loss": 1.1937, "step": 6334 }, { "epoch": 0.8945213216605479, "grad_norm": 3.429067510848518, "learning_rate": 1.6459869333719157e-05, "loss": 0.9763, "step": 6335 }, { "epoch": 0.8946625247105338, "grad_norm": 3.429202642859671, "learning_rate": 1.6458705569833866e-05, "loss": 1.0889, "step": 6336 }, { "epoch": 0.8948037277605196, "grad_norm": 3.7146330251530095, "learning_rate": 1.645754165585105e-05, "loss": 1.0187, "step": 6337 }, { "epoch": 0.8949449308105055, "grad_norm": 2.9399779957588024, "learning_rate": 1.6456377591797754e-05, "loss": 0.8477, "step": 6338 }, { "epoch": 0.8950861338604914, "grad_norm": 4.1504427829263975, "learning_rate": 1.645521337770103e-05, "loss": 0.9921, "step": 6339 }, { "epoch": 0.8952273369104773, "grad_norm": 4.229539108225094, "learning_rate": 1.645404901358794e-05, "loss": 1.1716, "step": 6340 }, { "epoch": 0.8953685399604632, "grad_norm": 3.5251081612203694, "learning_rate": 1.645288449948553e-05, "loss": 1.0646, "step": 6341 }, { "epoch": 0.895509743010449, "grad_norm": 3.4672418999283563, "learning_rate": 1.645171983542088e-05, "loss": 0.9936, "step": 6342 }, { "epoch": 0.8956509460604349, "grad_norm": 3.6989111517391886, "learning_rate": 1.645055502142104e-05, "loss": 1.0971, "step": 6343 }, { "epoch": 0.8957921491104208, "grad_norm": 3.0487158756534347, "learning_rate": 1.644939005751309e-05, "loss": 0.8226, "step": 6344 }, { "epoch": 0.8959333521604067, "grad_norm": 3.706532851992003, "learning_rate": 1.64482249437241e-05, "loss": 1.0046, "step": 6345 }, { "epoch": 0.8960745552103926, "grad_norm": 3.2382411927351065, "learning_rate": 1.644705968008115e-05, "loss": 0.9275, "step": 6346 }, { "epoch": 0.8962157582603785, "grad_norm": 3.718491852024694, "learning_rate": 1.6445894266611313e-05, "loss": 1.1884, "step": 6347 }, { "epoch": 0.8963569613103644, "grad_norm": 3.5715238342309634, "learning_rate": 1.644472870334168e-05, "loss": 0.8239, "step": 6348 }, { "epoch": 0.8964981643603502, "grad_norm": 3.214774016627596, "learning_rate": 1.644356299029933e-05, "loss": 0.8592, "step": 6349 }, { "epoch": 0.896639367410336, "grad_norm": 3.0772126412715934, "learning_rate": 1.6442397127511366e-05, "loss": 0.874, "step": 6350 }, { "epoch": 0.8967805704603219, "grad_norm": 3.494614123521099, "learning_rate": 1.644123111500487e-05, "loss": 1.0229, "step": 6351 }, { "epoch": 0.8969217735103078, "grad_norm": 3.6543780808568127, "learning_rate": 1.644006495280695e-05, "loss": 0.9777, "step": 6352 }, { "epoch": 0.8970629765602937, "grad_norm": 3.275269209723654, "learning_rate": 1.6438898640944695e-05, "loss": 0.9526, "step": 6353 }, { "epoch": 0.8972041796102795, "grad_norm": 4.240065361708412, "learning_rate": 1.6437732179445222e-05, "loss": 1.3762, "step": 6354 }, { "epoch": 0.8973453826602654, "grad_norm": 3.7080799987620074, "learning_rate": 1.6436565568335627e-05, "loss": 1.0319, "step": 6355 }, { "epoch": 0.8974865857102513, "grad_norm": 3.6766847484700036, "learning_rate": 1.6435398807643035e-05, "loss": 0.7831, "step": 6356 }, { "epoch": 0.8976277887602372, "grad_norm": 3.348770560170033, "learning_rate": 1.643423189739455e-05, "loss": 0.9921, "step": 6357 }, { "epoch": 0.8977689918102231, "grad_norm": 3.0691067507615624, "learning_rate": 1.6433064837617294e-05, "loss": 0.8643, "step": 6358 }, { "epoch": 0.897910194860209, "grad_norm": 3.447078341860875, "learning_rate": 1.6431897628338388e-05, "loss": 0.8723, "step": 6359 }, { "epoch": 0.8980513979101948, "grad_norm": 4.739890138777377, "learning_rate": 1.6430730269584963e-05, "loss": 1.0103, "step": 6360 }, { "epoch": 0.8981926009601807, "grad_norm": 3.2959118190204126, "learning_rate": 1.6429562761384142e-05, "loss": 1.035, "step": 6361 }, { "epoch": 0.8983338040101666, "grad_norm": 3.5041806149249295, "learning_rate": 1.642839510376306e-05, "loss": 0.9645, "step": 6362 }, { "epoch": 0.8984750070601525, "grad_norm": 3.7831602354830043, "learning_rate": 1.642722729674885e-05, "loss": 1.1624, "step": 6363 }, { "epoch": 0.8986162101101384, "grad_norm": 5.190905920885287, "learning_rate": 1.6426059340368653e-05, "loss": 0.9601, "step": 6364 }, { "epoch": 0.8987574131601243, "grad_norm": 3.5973357315364494, "learning_rate": 1.642489123464962e-05, "loss": 1.1092, "step": 6365 }, { "epoch": 0.8988986162101101, "grad_norm": 3.37458317385293, "learning_rate": 1.6423722979618883e-05, "loss": 1.0815, "step": 6366 }, { "epoch": 0.899039819260096, "grad_norm": 3.930104267676092, "learning_rate": 1.6422554575303594e-05, "loss": 1.1814, "step": 6367 }, { "epoch": 0.8991810223100819, "grad_norm": 3.149866409520708, "learning_rate": 1.6421386021730915e-05, "loss": 0.9405, "step": 6368 }, { "epoch": 0.8993222253600678, "grad_norm": 3.4876782820594414, "learning_rate": 1.6420217318928e-05, "loss": 1.039, "step": 6369 }, { "epoch": 0.8994634284100537, "grad_norm": 3.103993832696666, "learning_rate": 1.6419048466922004e-05, "loss": 0.8383, "step": 6370 }, { "epoch": 0.8996046314600395, "grad_norm": 3.3437203932875077, "learning_rate": 1.6417879465740094e-05, "loss": 0.9043, "step": 6371 }, { "epoch": 0.8997458345100254, "grad_norm": 3.272905173825727, "learning_rate": 1.6416710315409437e-05, "loss": 0.9486, "step": 6372 }, { "epoch": 0.8998870375600113, "grad_norm": 3.6542381565208717, "learning_rate": 1.6415541015957207e-05, "loss": 1.0517, "step": 6373 }, { "epoch": 0.9000282406099972, "grad_norm": 3.885777518290856, "learning_rate": 1.641437156741057e-05, "loss": 1.2471, "step": 6374 }, { "epoch": 0.9001694436599831, "grad_norm": 4.535231305909152, "learning_rate": 1.641320196979671e-05, "loss": 1.2171, "step": 6375 }, { "epoch": 0.900310646709969, "grad_norm": 2.824397457729353, "learning_rate": 1.6412032223142806e-05, "loss": 0.8448, "step": 6376 }, { "epoch": 0.9004518497599548, "grad_norm": 3.541347898156463, "learning_rate": 1.641086232747604e-05, "loss": 1.1564, "step": 6377 }, { "epoch": 0.9005930528099407, "grad_norm": 3.0358873033964446, "learning_rate": 1.6409692282823604e-05, "loss": 0.9569, "step": 6378 }, { "epoch": 0.9007342558599266, "grad_norm": 4.063892402208464, "learning_rate": 1.6408522089212685e-05, "loss": 1.1203, "step": 6379 }, { "epoch": 0.9008754589099125, "grad_norm": 3.2849444068540845, "learning_rate": 1.6407351746670484e-05, "loss": 0.9515, "step": 6380 }, { "epoch": 0.9010166619598984, "grad_norm": 3.397551892032349, "learning_rate": 1.640618125522419e-05, "loss": 0.9084, "step": 6381 }, { "epoch": 0.9011578650098843, "grad_norm": 4.052224756407713, "learning_rate": 1.6405010614901017e-05, "loss": 1.1574, "step": 6382 }, { "epoch": 0.9012990680598701, "grad_norm": 3.600648772363036, "learning_rate": 1.640383982572816e-05, "loss": 1.1406, "step": 6383 }, { "epoch": 0.901440271109856, "grad_norm": 2.8889817916854654, "learning_rate": 1.6402668887732833e-05, "loss": 0.8786, "step": 6384 }, { "epoch": 0.9015814741598418, "grad_norm": 2.993633294464633, "learning_rate": 1.6401497800942246e-05, "loss": 0.7332, "step": 6385 }, { "epoch": 0.9017226772098277, "grad_norm": 3.7582617790608843, "learning_rate": 1.6400326565383614e-05, "loss": 1.0162, "step": 6386 }, { "epoch": 0.9018638802598136, "grad_norm": 3.896201774926858, "learning_rate": 1.6399155181084156e-05, "loss": 1.2324, "step": 6387 }, { "epoch": 0.9020050833097994, "grad_norm": 4.690828935486476, "learning_rate": 1.6397983648071093e-05, "loss": 1.3361, "step": 6388 }, { "epoch": 0.9021462863597853, "grad_norm": 3.524546553263179, "learning_rate": 1.639681196637166e-05, "loss": 0.9704, "step": 6389 }, { "epoch": 0.9022874894097712, "grad_norm": 3.0796935274743493, "learning_rate": 1.6395640136013073e-05, "loss": 0.8122, "step": 6390 }, { "epoch": 0.9024286924597571, "grad_norm": 3.901693627711233, "learning_rate": 1.6394468157022574e-05, "loss": 1.0836, "step": 6391 }, { "epoch": 0.902569895509743, "grad_norm": 3.0025643110490106, "learning_rate": 1.6393296029427395e-05, "loss": 0.9572, "step": 6392 }, { "epoch": 0.9027110985597289, "grad_norm": 2.7189192621917915, "learning_rate": 1.6392123753254777e-05, "loss": 0.718, "step": 6393 }, { "epoch": 0.9028523016097147, "grad_norm": 3.1977006042337, "learning_rate": 1.6390951328531966e-05, "loss": 0.9446, "step": 6394 }, { "epoch": 0.9029935046597006, "grad_norm": 3.8478735722850828, "learning_rate": 1.6389778755286204e-05, "loss": 1.0463, "step": 6395 }, { "epoch": 0.9031347077096865, "grad_norm": 3.5958454306218717, "learning_rate": 1.6388606033544745e-05, "loss": 1.2908, "step": 6396 }, { "epoch": 0.9032759107596724, "grad_norm": 3.609194486480678, "learning_rate": 1.638743316333484e-05, "loss": 0.9803, "step": 6397 }, { "epoch": 0.9034171138096583, "grad_norm": 2.951844735983867, "learning_rate": 1.6386260144683744e-05, "loss": 0.8944, "step": 6398 }, { "epoch": 0.9035583168596442, "grad_norm": 3.462846387715468, "learning_rate": 1.6385086977618724e-05, "loss": 0.9576, "step": 6399 }, { "epoch": 0.90369951990963, "grad_norm": 3.3113407718054737, "learning_rate": 1.638391366216704e-05, "loss": 0.909, "step": 6400 }, { "epoch": 0.9038407229596159, "grad_norm": 5.592687748915697, "learning_rate": 1.638274019835596e-05, "loss": 0.8976, "step": 6401 }, { "epoch": 0.9039819260096018, "grad_norm": 3.292297921098729, "learning_rate": 1.6381566586212752e-05, "loss": 0.8168, "step": 6402 }, { "epoch": 0.9041231290595877, "grad_norm": 2.995104054977495, "learning_rate": 1.6380392825764693e-05, "loss": 0.9421, "step": 6403 }, { "epoch": 0.9042643321095736, "grad_norm": 3.967572988229277, "learning_rate": 1.637921891703906e-05, "loss": 1.336, "step": 6404 }, { "epoch": 0.9044055351595595, "grad_norm": 3.3793536427564352, "learning_rate": 1.6378044860063135e-05, "loss": 0.9428, "step": 6405 }, { "epoch": 0.9045467382095453, "grad_norm": 3.4864630281757933, "learning_rate": 1.63768706548642e-05, "loss": 1.0383, "step": 6406 }, { "epoch": 0.9046879412595312, "grad_norm": 3.269324864408698, "learning_rate": 1.637569630146955e-05, "loss": 0.9502, "step": 6407 }, { "epoch": 0.9048291443095171, "grad_norm": 3.932156779301153, "learning_rate": 1.6374521799906468e-05, "loss": 1.1213, "step": 6408 }, { "epoch": 0.904970347359503, "grad_norm": 3.488737161696404, "learning_rate": 1.6373347150202252e-05, "loss": 1.0736, "step": 6409 }, { "epoch": 0.9051115504094889, "grad_norm": 3.45691299305735, "learning_rate": 1.63721723523842e-05, "loss": 1.323, "step": 6410 }, { "epoch": 0.9052527534594748, "grad_norm": 4.123122204528794, "learning_rate": 1.6370997406479617e-05, "loss": 1.088, "step": 6411 }, { "epoch": 0.9053939565094606, "grad_norm": 2.932696925657219, "learning_rate": 1.6369822312515805e-05, "loss": 0.9449, "step": 6412 }, { "epoch": 0.9055351595594465, "grad_norm": 3.5924038027489833, "learning_rate": 1.6368647070520073e-05, "loss": 0.9957, "step": 6413 }, { "epoch": 0.9056763626094324, "grad_norm": 3.0452533661273677, "learning_rate": 1.6367471680519734e-05, "loss": 0.7521, "step": 6414 }, { "epoch": 0.9058175656594183, "grad_norm": 3.430979167473585, "learning_rate": 1.63662961425421e-05, "loss": 0.9996, "step": 6415 }, { "epoch": 0.9059587687094042, "grad_norm": 3.2118397964166343, "learning_rate": 1.6365120456614498e-05, "loss": 0.8012, "step": 6416 }, { "epoch": 0.90609997175939, "grad_norm": 3.4687346856841765, "learning_rate": 1.6363944622764242e-05, "loss": 0.9832, "step": 6417 }, { "epoch": 0.9062411748093759, "grad_norm": 3.1850841632678533, "learning_rate": 1.6362768641018662e-05, "loss": 0.8534, "step": 6418 }, { "epoch": 0.9063823778593617, "grad_norm": 3.8570785053852004, "learning_rate": 1.6361592511405087e-05, "loss": 1.1803, "step": 6419 }, { "epoch": 0.9065235809093476, "grad_norm": 3.798911937195144, "learning_rate": 1.636041623395085e-05, "loss": 1.1614, "step": 6420 }, { "epoch": 0.9066647839593335, "grad_norm": 3.2804090491860083, "learning_rate": 1.6359239808683284e-05, "loss": 0.9358, "step": 6421 }, { "epoch": 0.9068059870093194, "grad_norm": 3.242869213817217, "learning_rate": 1.6358063235629733e-05, "loss": 0.958, "step": 6422 }, { "epoch": 0.9069471900593052, "grad_norm": 3.610011364677211, "learning_rate": 1.635688651481754e-05, "loss": 0.9925, "step": 6423 }, { "epoch": 0.9070883931092911, "grad_norm": 3.5019880820239147, "learning_rate": 1.6355709646274048e-05, "loss": 0.937, "step": 6424 }, { "epoch": 0.907229596159277, "grad_norm": 3.374026007544711, "learning_rate": 1.6354532630026608e-05, "loss": 1.0366, "step": 6425 }, { "epoch": 0.9073707992092629, "grad_norm": 3.454867641995901, "learning_rate": 1.6353355466102575e-05, "loss": 1.0522, "step": 6426 }, { "epoch": 0.9075120022592488, "grad_norm": 3.074664317801325, "learning_rate": 1.63521781545293e-05, "loss": 0.7329, "step": 6427 }, { "epoch": 0.9076532053092347, "grad_norm": 3.68905918591236, "learning_rate": 1.6351000695334157e-05, "loss": 1.1899, "step": 6428 }, { "epoch": 0.9077944083592205, "grad_norm": 3.240610260937898, "learning_rate": 1.6349823088544494e-05, "loss": 1.0243, "step": 6429 }, { "epoch": 0.9079356114092064, "grad_norm": 3.782862579173547, "learning_rate": 1.6348645334187686e-05, "loss": 1.1277, "step": 6430 }, { "epoch": 0.9080768144591923, "grad_norm": 3.611453837528714, "learning_rate": 1.6347467432291103e-05, "loss": 1.1065, "step": 6431 }, { "epoch": 0.9082180175091782, "grad_norm": 3.3992537941464525, "learning_rate": 1.6346289382882117e-05, "loss": 1.0409, "step": 6432 }, { "epoch": 0.9083592205591641, "grad_norm": 3.941759431897247, "learning_rate": 1.634511118598811e-05, "loss": 1.0057, "step": 6433 }, { "epoch": 0.90850042360915, "grad_norm": 3.9713725755904443, "learning_rate": 1.6343932841636455e-05, "loss": 1.4145, "step": 6434 }, { "epoch": 0.9086416266591358, "grad_norm": 3.225655718159319, "learning_rate": 1.634275434985454e-05, "loss": 0.8825, "step": 6435 }, { "epoch": 0.9087828297091217, "grad_norm": 2.953734803908207, "learning_rate": 1.6341575710669758e-05, "loss": 0.9377, "step": 6436 }, { "epoch": 0.9089240327591076, "grad_norm": 2.7080328796005197, "learning_rate": 1.6340396924109492e-05, "loss": 0.8174, "step": 6437 }, { "epoch": 0.9090652358090935, "grad_norm": 3.808070280708418, "learning_rate": 1.633921799020114e-05, "loss": 1.1486, "step": 6438 }, { "epoch": 0.9092064388590794, "grad_norm": 3.4553732972963607, "learning_rate": 1.6338038908972102e-05, "loss": 1.0062, "step": 6439 }, { "epoch": 0.9093476419090653, "grad_norm": 3.357020716821186, "learning_rate": 1.6336859680449773e-05, "loss": 0.8977, "step": 6440 }, { "epoch": 0.9094888449590511, "grad_norm": 4.364238974953833, "learning_rate": 1.6335680304661568e-05, "loss": 1.2124, "step": 6441 }, { "epoch": 0.909630048009037, "grad_norm": 4.553917791555002, "learning_rate": 1.633450078163488e-05, "loss": 1.408, "step": 6442 }, { "epoch": 0.9097712510590229, "grad_norm": 3.1867507924473073, "learning_rate": 1.6333321111397137e-05, "loss": 0.9386, "step": 6443 }, { "epoch": 0.9099124541090088, "grad_norm": 2.988064502235517, "learning_rate": 1.6332141293975742e-05, "loss": 0.8516, "step": 6444 }, { "epoch": 0.9100536571589947, "grad_norm": 2.8275460070170513, "learning_rate": 1.633096132939812e-05, "loss": 0.7627, "step": 6445 }, { "epoch": 0.9101948602089805, "grad_norm": 3.3866323421143414, "learning_rate": 1.632978121769169e-05, "loss": 1.1489, "step": 6446 }, { "epoch": 0.9103360632589664, "grad_norm": 3.7166489280905832, "learning_rate": 1.632860095888388e-05, "loss": 1.2273, "step": 6447 }, { "epoch": 0.9104772663089523, "grad_norm": 2.887425020153329, "learning_rate": 1.6327420553002113e-05, "loss": 0.8809, "step": 6448 }, { "epoch": 0.9106184693589382, "grad_norm": 3.7485365862294633, "learning_rate": 1.632624000007383e-05, "loss": 0.9296, "step": 6449 }, { "epoch": 0.9107596724089241, "grad_norm": 3.09669815331168, "learning_rate": 1.632505930012646e-05, "loss": 0.8432, "step": 6450 }, { "epoch": 0.91090087545891, "grad_norm": 3.2999396426940484, "learning_rate": 1.632387845318744e-05, "loss": 0.8602, "step": 6451 }, { "epoch": 0.9110420785088958, "grad_norm": 3.2303337219903034, "learning_rate": 1.632269745928422e-05, "loss": 0.9958, "step": 6452 }, { "epoch": 0.9111832815588816, "grad_norm": 3.723849588378819, "learning_rate": 1.6321516318444235e-05, "loss": 1.0423, "step": 6453 }, { "epoch": 0.9113244846088675, "grad_norm": 3.57385878663657, "learning_rate": 1.632033503069495e-05, "loss": 1.0368, "step": 6454 }, { "epoch": 0.9114656876588534, "grad_norm": 3.492467131366321, "learning_rate": 1.6319153596063803e-05, "loss": 1.0658, "step": 6455 }, { "epoch": 0.9116068907088393, "grad_norm": 3.9088152109391907, "learning_rate": 1.6317972014578252e-05, "loss": 1.1752, "step": 6456 }, { "epoch": 0.9117480937588252, "grad_norm": 3.0977364498140596, "learning_rate": 1.6316790286265764e-05, "loss": 0.8803, "step": 6457 }, { "epoch": 0.911889296808811, "grad_norm": 2.579159624840554, "learning_rate": 1.63156084111538e-05, "loss": 0.7622, "step": 6458 }, { "epoch": 0.9120304998587969, "grad_norm": 3.164945781703793, "learning_rate": 1.6314426389269822e-05, "loss": 0.9472, "step": 6459 }, { "epoch": 0.9121717029087828, "grad_norm": 3.29705428750325, "learning_rate": 1.6313244220641304e-05, "loss": 1.0124, "step": 6460 }, { "epoch": 0.9123129059587687, "grad_norm": 2.8569896134220794, "learning_rate": 1.631206190529571e-05, "loss": 0.6594, "step": 6461 }, { "epoch": 0.9124541090087546, "grad_norm": 3.669410298196866, "learning_rate": 1.631087944326053e-05, "loss": 1.037, "step": 6462 }, { "epoch": 0.9125953120587404, "grad_norm": 3.6779842569744243, "learning_rate": 1.6309696834563236e-05, "loss": 1.053, "step": 6463 }, { "epoch": 0.9127365151087263, "grad_norm": 3.6417646976897022, "learning_rate": 1.630851407923131e-05, "loss": 1.0478, "step": 6464 }, { "epoch": 0.9128777181587122, "grad_norm": 3.5583779349343794, "learning_rate": 1.630733117729224e-05, "loss": 0.937, "step": 6465 }, { "epoch": 0.9130189212086981, "grad_norm": 3.395730746753288, "learning_rate": 1.6306148128773522e-05, "loss": 0.8451, "step": 6466 }, { "epoch": 0.913160124258684, "grad_norm": 2.9769673979264826, "learning_rate": 1.630496493370264e-05, "loss": 0.8601, "step": 6467 }, { "epoch": 0.9133013273086699, "grad_norm": 4.6383332448709655, "learning_rate": 1.6303781592107102e-05, "loss": 0.9726, "step": 6468 }, { "epoch": 0.9134425303586557, "grad_norm": 3.4450015016292137, "learning_rate": 1.63025981040144e-05, "loss": 0.785, "step": 6469 }, { "epoch": 0.9135837334086416, "grad_norm": 2.688717599412168, "learning_rate": 1.6301414469452037e-05, "loss": 0.6726, "step": 6470 }, { "epoch": 0.9137249364586275, "grad_norm": 3.881157902900692, "learning_rate": 1.6300230688447528e-05, "loss": 1.0401, "step": 6471 }, { "epoch": 0.9138661395086134, "grad_norm": 3.4931998160486564, "learning_rate": 1.6299046761028373e-05, "loss": 0.9349, "step": 6472 }, { "epoch": 0.9140073425585993, "grad_norm": 3.729040506184297, "learning_rate": 1.6297862687222097e-05, "loss": 1.135, "step": 6473 }, { "epoch": 0.9141485456085852, "grad_norm": 2.8563495841444784, "learning_rate": 1.629667846705621e-05, "loss": 0.9413, "step": 6474 }, { "epoch": 0.914289748658571, "grad_norm": 4.498214571334865, "learning_rate": 1.629549410055823e-05, "loss": 1.3015, "step": 6475 }, { "epoch": 0.9144309517085569, "grad_norm": 2.9692374649824487, "learning_rate": 1.6294309587755693e-05, "loss": 0.7861, "step": 6476 }, { "epoch": 0.9145721547585428, "grad_norm": 2.8417696624316298, "learning_rate": 1.6293124928676112e-05, "loss": 0.842, "step": 6477 }, { "epoch": 0.9147133578085287, "grad_norm": 3.4988101096698196, "learning_rate": 1.6291940123347033e-05, "loss": 1.1867, "step": 6478 }, { "epoch": 0.9148545608585146, "grad_norm": 4.143335881245334, "learning_rate": 1.629075517179598e-05, "loss": 1.0987, "step": 6479 }, { "epoch": 0.9149957639085005, "grad_norm": 2.8784451802518185, "learning_rate": 1.6289570074050492e-05, "loss": 0.8046, "step": 6480 }, { "epoch": 0.9151369669584863, "grad_norm": 3.544265192779974, "learning_rate": 1.6288384830138114e-05, "loss": 1.1424, "step": 6481 }, { "epoch": 0.9152781700084722, "grad_norm": 3.9949656730225502, "learning_rate": 1.628719944008639e-05, "loss": 1.111, "step": 6482 }, { "epoch": 0.9154193730584581, "grad_norm": 3.62174297133545, "learning_rate": 1.628601390392286e-05, "loss": 1.2106, "step": 6483 }, { "epoch": 0.915560576108444, "grad_norm": 2.856197374657836, "learning_rate": 1.6284828221675085e-05, "loss": 0.8514, "step": 6484 }, { "epoch": 0.9157017791584299, "grad_norm": 3.131734457577334, "learning_rate": 1.6283642393370618e-05, "loss": 0.8376, "step": 6485 }, { "epoch": 0.9158429822084158, "grad_norm": 2.5955052020340856, "learning_rate": 1.6282456419037013e-05, "loss": 0.6893, "step": 6486 }, { "epoch": 0.9159841852584015, "grad_norm": 3.6320334985753027, "learning_rate": 1.6281270298701836e-05, "loss": 0.9257, "step": 6487 }, { "epoch": 0.9161253883083874, "grad_norm": 3.0782148265217257, "learning_rate": 1.628008403239265e-05, "loss": 0.983, "step": 6488 }, { "epoch": 0.9162665913583733, "grad_norm": 3.466755041653721, "learning_rate": 1.627889762013702e-05, "loss": 0.9602, "step": 6489 }, { "epoch": 0.9164077944083592, "grad_norm": 3.998983868221303, "learning_rate": 1.6277711061962525e-05, "loss": 1.0819, "step": 6490 }, { "epoch": 0.9165489974583451, "grad_norm": 3.5704280116184925, "learning_rate": 1.627652435789673e-05, "loss": 1.2447, "step": 6491 }, { "epoch": 0.9166902005083309, "grad_norm": 3.3586997527248745, "learning_rate": 1.6275337507967228e-05, "loss": 1.1726, "step": 6492 }, { "epoch": 0.9168314035583168, "grad_norm": 3.188302054712984, "learning_rate": 1.6274150512201586e-05, "loss": 0.84, "step": 6493 }, { "epoch": 0.9169726066083027, "grad_norm": 3.1356474658379256, "learning_rate": 1.6272963370627398e-05, "loss": 0.9393, "step": 6494 }, { "epoch": 0.9171138096582886, "grad_norm": 2.9653126666027445, "learning_rate": 1.627177608327225e-05, "loss": 0.8941, "step": 6495 }, { "epoch": 0.9172550127082745, "grad_norm": 3.2943340292705687, "learning_rate": 1.6270588650163737e-05, "loss": 1.0362, "step": 6496 }, { "epoch": 0.9173962157582604, "grad_norm": 3.563154733280506, "learning_rate": 1.6269401071329447e-05, "loss": 0.9116, "step": 6497 }, { "epoch": 0.9175374188082462, "grad_norm": 3.1930088120817866, "learning_rate": 1.626821334679699e-05, "loss": 1.0641, "step": 6498 }, { "epoch": 0.9176786218582321, "grad_norm": 3.4269208237411544, "learning_rate": 1.6267025476593957e-05, "loss": 1.0319, "step": 6499 }, { "epoch": 0.917819824908218, "grad_norm": 3.734524923394806, "learning_rate": 1.626583746074796e-05, "loss": 1.2924, "step": 6500 }, { "epoch": 0.9179610279582039, "grad_norm": 3.1224376496902964, "learning_rate": 1.6264649299286604e-05, "loss": 0.9259, "step": 6501 }, { "epoch": 0.9181022310081898, "grad_norm": 4.071241522938372, "learning_rate": 1.6263460992237507e-05, "loss": 0.8515, "step": 6502 }, { "epoch": 0.9182434340581757, "grad_norm": 4.059756988466037, "learning_rate": 1.6262272539628277e-05, "loss": 0.9501, "step": 6503 }, { "epoch": 0.9183846371081615, "grad_norm": 3.205173473942416, "learning_rate": 1.6261083941486543e-05, "loss": 1.0663, "step": 6504 }, { "epoch": 0.9185258401581474, "grad_norm": 3.1996604623118126, "learning_rate": 1.625989519783992e-05, "loss": 0.9513, "step": 6505 }, { "epoch": 0.9186670432081333, "grad_norm": 3.473012426704352, "learning_rate": 1.6258706308716035e-05, "loss": 0.9723, "step": 6506 }, { "epoch": 0.9188082462581192, "grad_norm": 3.04782440693302, "learning_rate": 1.625751727414252e-05, "loss": 0.9731, "step": 6507 }, { "epoch": 0.9189494493081051, "grad_norm": 3.0747473294265513, "learning_rate": 1.6256328094147003e-05, "loss": 0.919, "step": 6508 }, { "epoch": 0.919090652358091, "grad_norm": 3.3822927863583554, "learning_rate": 1.6255138768757125e-05, "loss": 0.9418, "step": 6509 }, { "epoch": 0.9192318554080768, "grad_norm": 3.5730048663200353, "learning_rate": 1.6253949298000527e-05, "loss": 1.1946, "step": 6510 }, { "epoch": 0.9193730584580627, "grad_norm": 3.3631679261710787, "learning_rate": 1.6252759681904842e-05, "loss": 1.0108, "step": 6511 }, { "epoch": 0.9195142615080486, "grad_norm": 3.6846368827906053, "learning_rate": 1.6251569920497725e-05, "loss": 1.1637, "step": 6512 }, { "epoch": 0.9196554645580345, "grad_norm": 3.2062803068476686, "learning_rate": 1.6250380013806825e-05, "loss": 1.0155, "step": 6513 }, { "epoch": 0.9197966676080204, "grad_norm": 3.450059360845324, "learning_rate": 1.624918996185979e-05, "loss": 1.0584, "step": 6514 }, { "epoch": 0.9199378706580063, "grad_norm": 3.1505281130072125, "learning_rate": 1.624799976468428e-05, "loss": 0.9195, "step": 6515 }, { "epoch": 0.9200790737079921, "grad_norm": 3.179172106833367, "learning_rate": 1.6246809422307954e-05, "loss": 0.9899, "step": 6516 }, { "epoch": 0.920220276757978, "grad_norm": 4.407861571885109, "learning_rate": 1.6245618934758474e-05, "loss": 1.1602, "step": 6517 }, { "epoch": 0.9203614798079639, "grad_norm": 3.031374950062368, "learning_rate": 1.6244428302063506e-05, "loss": 0.9449, "step": 6518 }, { "epoch": 0.9205026828579498, "grad_norm": 4.520511461060888, "learning_rate": 1.6243237524250726e-05, "loss": 1.1381, "step": 6519 }, { "epoch": 0.9206438859079357, "grad_norm": 2.6540141904241437, "learning_rate": 1.6242046601347796e-05, "loss": 0.7752, "step": 6520 }, { "epoch": 0.9207850889579214, "grad_norm": 3.7924728146525797, "learning_rate": 1.6240855533382403e-05, "loss": 1.0883, "step": 6521 }, { "epoch": 0.9209262920079073, "grad_norm": 3.4749409643240963, "learning_rate": 1.623966432038222e-05, "loss": 0.8391, "step": 6522 }, { "epoch": 0.9210674950578932, "grad_norm": 3.090354350555065, "learning_rate": 1.6238472962374935e-05, "loss": 0.8217, "step": 6523 }, { "epoch": 0.9212086981078791, "grad_norm": 3.132292832661475, "learning_rate": 1.6237281459388233e-05, "loss": 0.9028, "step": 6524 }, { "epoch": 0.921349901157865, "grad_norm": 3.041310840224807, "learning_rate": 1.62360898114498e-05, "loss": 0.8923, "step": 6525 }, { "epoch": 0.9214911042078509, "grad_norm": 3.7704624380968337, "learning_rate": 1.6234898018587336e-05, "loss": 1.0205, "step": 6526 }, { "epoch": 0.9216323072578367, "grad_norm": 3.3463124645082925, "learning_rate": 1.6233706080828536e-05, "loss": 1.1514, "step": 6527 }, { "epoch": 0.9217735103078226, "grad_norm": 3.823108935158952, "learning_rate": 1.6232513998201094e-05, "loss": 1.1626, "step": 6528 }, { "epoch": 0.9219147133578085, "grad_norm": 3.247472082851392, "learning_rate": 1.6231321770732723e-05, "loss": 0.9973, "step": 6529 }, { "epoch": 0.9220559164077944, "grad_norm": 3.1485110413108854, "learning_rate": 1.6230129398451124e-05, "loss": 1.0056, "step": 6530 }, { "epoch": 0.9221971194577803, "grad_norm": 3.613716171026468, "learning_rate": 1.6228936881384004e-05, "loss": 0.999, "step": 6531 }, { "epoch": 0.9223383225077662, "grad_norm": 3.0014247942425327, "learning_rate": 1.6227744219559086e-05, "loss": 0.9268, "step": 6532 }, { "epoch": 0.922479525557752, "grad_norm": 3.4157491085468004, "learning_rate": 1.622655141300408e-05, "loss": 1.021, "step": 6533 }, { "epoch": 0.9226207286077379, "grad_norm": 3.1163121875221345, "learning_rate": 1.622535846174671e-05, "loss": 0.9583, "step": 6534 }, { "epoch": 0.9227619316577238, "grad_norm": 3.2765055269526098, "learning_rate": 1.6224165365814696e-05, "loss": 1.0569, "step": 6535 }, { "epoch": 0.9229031347077097, "grad_norm": 3.1145723217312815, "learning_rate": 1.6222972125235766e-05, "loss": 0.9179, "step": 6536 }, { "epoch": 0.9230443377576956, "grad_norm": 3.8362118046865925, "learning_rate": 1.6221778740037654e-05, "loss": 1.1982, "step": 6537 }, { "epoch": 0.9231855408076814, "grad_norm": 3.106405013315922, "learning_rate": 1.6220585210248093e-05, "loss": 0.8733, "step": 6538 }, { "epoch": 0.9233267438576673, "grad_norm": 3.097846497794573, "learning_rate": 1.6219391535894813e-05, "loss": 0.9241, "step": 6539 }, { "epoch": 0.9234679469076532, "grad_norm": 2.797389330606165, "learning_rate": 1.6218197717005562e-05, "loss": 0.7392, "step": 6540 }, { "epoch": 0.9236091499576391, "grad_norm": 3.7784967448358446, "learning_rate": 1.6217003753608082e-05, "loss": 1.0826, "step": 6541 }, { "epoch": 0.923750353007625, "grad_norm": 3.6470869353337445, "learning_rate": 1.6215809645730115e-05, "loss": 1.0618, "step": 6542 }, { "epoch": 0.9238915560576109, "grad_norm": 3.7944147655600178, "learning_rate": 1.621461539339942e-05, "loss": 1.0126, "step": 6543 }, { "epoch": 0.9240327591075967, "grad_norm": 3.0046921863210025, "learning_rate": 1.621342099664375e-05, "loss": 0.9825, "step": 6544 }, { "epoch": 0.9241739621575826, "grad_norm": 4.058260534922192, "learning_rate": 1.6212226455490854e-05, "loss": 1.0173, "step": 6545 }, { "epoch": 0.9243151652075685, "grad_norm": 3.682830785820891, "learning_rate": 1.6211031769968503e-05, "loss": 1.1978, "step": 6546 }, { "epoch": 0.9244563682575544, "grad_norm": 2.85664317665372, "learning_rate": 1.6209836940104454e-05, "loss": 0.8729, "step": 6547 }, { "epoch": 0.9245975713075403, "grad_norm": 3.3040525020861815, "learning_rate": 1.6208641965926474e-05, "loss": 1.0727, "step": 6548 }, { "epoch": 0.9247387743575262, "grad_norm": 3.6136875716159746, "learning_rate": 1.6207446847462338e-05, "loss": 0.8909, "step": 6549 }, { "epoch": 0.924879977407512, "grad_norm": 2.7700756853847577, "learning_rate": 1.6206251584739817e-05, "loss": 0.7598, "step": 6550 }, { "epoch": 0.9250211804574979, "grad_norm": 3.145393815891417, "learning_rate": 1.6205056177786694e-05, "loss": 0.7477, "step": 6551 }, { "epoch": 0.9251623835074838, "grad_norm": 3.7099153690182023, "learning_rate": 1.620386062663074e-05, "loss": 1.3379, "step": 6552 }, { "epoch": 0.9253035865574697, "grad_norm": 2.46848462197872, "learning_rate": 1.6202664931299747e-05, "loss": 0.7509, "step": 6553 }, { "epoch": 0.9254447896074556, "grad_norm": 4.174216918755577, "learning_rate": 1.6201469091821498e-05, "loss": 1.3007, "step": 6554 }, { "epoch": 0.9255859926574413, "grad_norm": 3.4526130704189724, "learning_rate": 1.6200273108223784e-05, "loss": 1.0193, "step": 6555 }, { "epoch": 0.9257271957074272, "grad_norm": 3.848591305024863, "learning_rate": 1.6199076980534406e-05, "loss": 1.2292, "step": 6556 }, { "epoch": 0.9258683987574131, "grad_norm": 3.247033543524722, "learning_rate": 1.6197880708781153e-05, "loss": 0.8927, "step": 6557 }, { "epoch": 0.926009601807399, "grad_norm": 3.465304199821099, "learning_rate": 1.6196684292991827e-05, "loss": 0.8489, "step": 6558 }, { "epoch": 0.9261508048573849, "grad_norm": 3.2732944513517737, "learning_rate": 1.619548773319424e-05, "loss": 0.9409, "step": 6559 }, { "epoch": 0.9262920079073708, "grad_norm": 3.4228844891062935, "learning_rate": 1.6194291029416188e-05, "loss": 1.0694, "step": 6560 }, { "epoch": 0.9264332109573566, "grad_norm": 4.350219465506222, "learning_rate": 1.6193094181685487e-05, "loss": 1.2617, "step": 6561 }, { "epoch": 0.9265744140073425, "grad_norm": 2.840311862374248, "learning_rate": 1.6191897190029956e-05, "loss": 0.9025, "step": 6562 }, { "epoch": 0.9267156170573284, "grad_norm": 4.173153730264222, "learning_rate": 1.6190700054477402e-05, "loss": 0.9725, "step": 6563 }, { "epoch": 0.9268568201073143, "grad_norm": 3.5415823690999377, "learning_rate": 1.6189502775055662e-05, "loss": 1.0944, "step": 6564 }, { "epoch": 0.9269980231573002, "grad_norm": 3.3256080983271277, "learning_rate": 1.6188305351792545e-05, "loss": 1.0635, "step": 6565 }, { "epoch": 0.9271392262072861, "grad_norm": 3.6999797565389034, "learning_rate": 1.618710778471588e-05, "loss": 1.1142, "step": 6566 }, { "epoch": 0.9272804292572719, "grad_norm": 3.9718094721407073, "learning_rate": 1.6185910073853512e-05, "loss": 1.0722, "step": 6567 }, { "epoch": 0.9274216323072578, "grad_norm": 2.6431148011264454, "learning_rate": 1.618471221923326e-05, "loss": 0.6533, "step": 6568 }, { "epoch": 0.9275628353572437, "grad_norm": 2.899684122298023, "learning_rate": 1.6183514220882967e-05, "loss": 0.7918, "step": 6569 }, { "epoch": 0.9277040384072296, "grad_norm": 3.5686752323456026, "learning_rate": 1.6182316078830473e-05, "loss": 1.0149, "step": 6570 }, { "epoch": 0.9278452414572155, "grad_norm": 3.4501077651045313, "learning_rate": 1.6181117793103623e-05, "loss": 0.9554, "step": 6571 }, { "epoch": 0.9279864445072014, "grad_norm": 3.1359683910892366, "learning_rate": 1.617991936373027e-05, "loss": 1.0575, "step": 6572 }, { "epoch": 0.9281276475571872, "grad_norm": 3.8091972286309383, "learning_rate": 1.6178720790738253e-05, "loss": 1.1078, "step": 6573 }, { "epoch": 0.9282688506071731, "grad_norm": 3.4005356649117537, "learning_rate": 1.6177522074155436e-05, "loss": 0.8075, "step": 6574 }, { "epoch": 0.928410053657159, "grad_norm": 3.3953090304717635, "learning_rate": 1.6176323214009673e-05, "loss": 0.9872, "step": 6575 }, { "epoch": 0.9285512567071449, "grad_norm": 3.145929491216068, "learning_rate": 1.617512421032883e-05, "loss": 0.7226, "step": 6576 }, { "epoch": 0.9286924597571308, "grad_norm": 3.592347105746373, "learning_rate": 1.6173925063140763e-05, "loss": 1.0242, "step": 6577 }, { "epoch": 0.9288336628071167, "grad_norm": 5.026814788563661, "learning_rate": 1.6172725772473343e-05, "loss": 1.2462, "step": 6578 }, { "epoch": 0.9289748658571025, "grad_norm": 3.428738721048319, "learning_rate": 1.6171526338354447e-05, "loss": 1.0621, "step": 6579 }, { "epoch": 0.9291160689070884, "grad_norm": 4.152960011796144, "learning_rate": 1.617032676081194e-05, "loss": 0.9564, "step": 6580 }, { "epoch": 0.9292572719570743, "grad_norm": 3.188149118146041, "learning_rate": 1.6169127039873705e-05, "loss": 0.898, "step": 6581 }, { "epoch": 0.9293984750070602, "grad_norm": 3.5968951619181566, "learning_rate": 1.616792717556762e-05, "loss": 1.1058, "step": 6582 }, { "epoch": 0.9295396780570461, "grad_norm": 2.9634276321167383, "learning_rate": 1.6166727167921574e-05, "loss": 0.7907, "step": 6583 }, { "epoch": 0.929680881107032, "grad_norm": 3.3421397472548584, "learning_rate": 1.616552701696345e-05, "loss": 1.1259, "step": 6584 }, { "epoch": 0.9298220841570178, "grad_norm": 4.063807513564883, "learning_rate": 1.6164326722721143e-05, "loss": 0.928, "step": 6585 }, { "epoch": 0.9299632872070037, "grad_norm": 2.824623220074162, "learning_rate": 1.616312628522254e-05, "loss": 0.887, "step": 6586 }, { "epoch": 0.9301044902569896, "grad_norm": 3.7609218569945027, "learning_rate": 1.616192570449555e-05, "loss": 1.0199, "step": 6587 }, { "epoch": 0.9302456933069755, "grad_norm": 3.094255537332547, "learning_rate": 1.6160724980568066e-05, "loss": 0.7786, "step": 6588 }, { "epoch": 0.9303868963569613, "grad_norm": 3.3790308587174, "learning_rate": 1.6159524113467994e-05, "loss": 0.9376, "step": 6589 }, { "epoch": 0.9305280994069471, "grad_norm": 3.6992299814511838, "learning_rate": 1.615832310322324e-05, "loss": 0.8449, "step": 6590 }, { "epoch": 0.930669302456933, "grad_norm": 3.0140259491578045, "learning_rate": 1.6157121949861716e-05, "loss": 0.8915, "step": 6591 }, { "epoch": 0.9308105055069189, "grad_norm": 4.082198101389141, "learning_rate": 1.615592065341134e-05, "loss": 1.0802, "step": 6592 }, { "epoch": 0.9309517085569048, "grad_norm": 4.0294258299787336, "learning_rate": 1.6154719213900026e-05, "loss": 1.2819, "step": 6593 }, { "epoch": 0.9310929116068907, "grad_norm": 3.2170476712423595, "learning_rate": 1.6153517631355696e-05, "loss": 1.0005, "step": 6594 }, { "epoch": 0.9312341146568766, "grad_norm": 3.9953870540984426, "learning_rate": 1.615231590580627e-05, "loss": 1.5294, "step": 6595 }, { "epoch": 0.9313753177068624, "grad_norm": 3.2808214715101602, "learning_rate": 1.6151114037279682e-05, "loss": 0.8664, "step": 6596 }, { "epoch": 0.9315165207568483, "grad_norm": 3.018473763698476, "learning_rate": 1.6149912025803858e-05, "loss": 0.9352, "step": 6597 }, { "epoch": 0.9316577238068342, "grad_norm": 3.2547615575746933, "learning_rate": 1.614870987140674e-05, "loss": 0.846, "step": 6598 }, { "epoch": 0.9317989268568201, "grad_norm": 3.2381716267024725, "learning_rate": 1.6147507574116255e-05, "loss": 1.0794, "step": 6599 }, { "epoch": 0.931940129906806, "grad_norm": 3.1232228305841723, "learning_rate": 1.614630513396035e-05, "loss": 0.7491, "step": 6600 }, { "epoch": 0.9320813329567919, "grad_norm": 3.3246207921196107, "learning_rate": 1.6145102550966968e-05, "loss": 0.9123, "step": 6601 }, { "epoch": 0.9322225360067777, "grad_norm": 4.079148799336749, "learning_rate": 1.6143899825164058e-05, "loss": 1.056, "step": 6602 }, { "epoch": 0.9323637390567636, "grad_norm": 3.599630589151924, "learning_rate": 1.614269695657957e-05, "loss": 0.9798, "step": 6603 }, { "epoch": 0.9325049421067495, "grad_norm": 3.077999710278226, "learning_rate": 1.6141493945241453e-05, "loss": 0.8897, "step": 6604 }, { "epoch": 0.9326461451567354, "grad_norm": 2.741628885574526, "learning_rate": 1.614029079117767e-05, "loss": 0.8615, "step": 6605 }, { "epoch": 0.9327873482067213, "grad_norm": 3.287507221927196, "learning_rate": 1.6139087494416184e-05, "loss": 0.9631, "step": 6606 }, { "epoch": 0.9329285512567072, "grad_norm": 2.794249827723645, "learning_rate": 1.6137884054984957e-05, "loss": 0.7355, "step": 6607 }, { "epoch": 0.933069754306693, "grad_norm": 3.5959268876182113, "learning_rate": 1.613668047291195e-05, "loss": 0.9246, "step": 6608 }, { "epoch": 0.9332109573566789, "grad_norm": 3.0472050206252, "learning_rate": 1.6135476748225144e-05, "loss": 0.8533, "step": 6609 }, { "epoch": 0.9333521604066648, "grad_norm": 3.442388327176033, "learning_rate": 1.6134272880952506e-05, "loss": 0.8853, "step": 6610 }, { "epoch": 0.9334933634566507, "grad_norm": 3.972918683569106, "learning_rate": 1.6133068871122014e-05, "loss": 0.9759, "step": 6611 }, { "epoch": 0.9336345665066366, "grad_norm": 3.703257759697238, "learning_rate": 1.613186471876165e-05, "loss": 1.1206, "step": 6612 }, { "epoch": 0.9337757695566224, "grad_norm": 3.9603337825891396, "learning_rate": 1.6130660423899402e-05, "loss": 1.1118, "step": 6613 }, { "epoch": 0.9339169726066083, "grad_norm": 3.3795723024989948, "learning_rate": 1.612945598656325e-05, "loss": 1.1892, "step": 6614 }, { "epoch": 0.9340581756565942, "grad_norm": 3.9905483845370537, "learning_rate": 1.6128251406781192e-05, "loss": 1.2366, "step": 6615 }, { "epoch": 0.9341993787065801, "grad_norm": 3.329887909936065, "learning_rate": 1.6127046684581212e-05, "loss": 0.9167, "step": 6616 }, { "epoch": 0.934340581756566, "grad_norm": 2.8474628022446113, "learning_rate": 1.6125841819991318e-05, "loss": 0.7879, "step": 6617 }, { "epoch": 0.9344817848065519, "grad_norm": 3.81014116857258, "learning_rate": 1.6124636813039502e-05, "loss": 1.3412, "step": 6618 }, { "epoch": 0.9346229878565377, "grad_norm": 3.7165276637482454, "learning_rate": 1.6123431663753774e-05, "loss": 1.1202, "step": 6619 }, { "epoch": 0.9347641909065236, "grad_norm": 3.287506410672592, "learning_rate": 1.6122226372162137e-05, "loss": 0.9609, "step": 6620 }, { "epoch": 0.9349053939565095, "grad_norm": 3.221201278525491, "learning_rate": 1.6121020938292604e-05, "loss": 1.0229, "step": 6621 }, { "epoch": 0.9350465970064954, "grad_norm": 3.4827038972567044, "learning_rate": 1.6119815362173188e-05, "loss": 1.0889, "step": 6622 }, { "epoch": 0.9351878000564812, "grad_norm": 2.98283698591927, "learning_rate": 1.6118609643831905e-05, "loss": 0.8248, "step": 6623 }, { "epoch": 0.935329003106467, "grad_norm": 3.7315759284376133, "learning_rate": 1.6117403783296778e-05, "loss": 1.1653, "step": 6624 }, { "epoch": 0.9354702061564529, "grad_norm": 2.486813716941703, "learning_rate": 1.611619778059583e-05, "loss": 0.7721, "step": 6625 }, { "epoch": 0.9356114092064388, "grad_norm": 3.9327041639059077, "learning_rate": 1.6114991635757085e-05, "loss": 1.1281, "step": 6626 }, { "epoch": 0.9357526122564247, "grad_norm": 3.8592349273098367, "learning_rate": 1.611378534880857e-05, "loss": 0.9629, "step": 6627 }, { "epoch": 0.9358938153064106, "grad_norm": 3.346570976486084, "learning_rate": 1.611257891977833e-05, "loss": 1.0868, "step": 6628 }, { "epoch": 0.9360350183563965, "grad_norm": 3.110660704947061, "learning_rate": 1.6111372348694397e-05, "loss": 0.8131, "step": 6629 }, { "epoch": 0.9361762214063823, "grad_norm": 3.0689548252960726, "learning_rate": 1.6110165635584807e-05, "loss": 0.9233, "step": 6630 }, { "epoch": 0.9363174244563682, "grad_norm": 3.3533527982205085, "learning_rate": 1.6108958780477607e-05, "loss": 1.0977, "step": 6631 }, { "epoch": 0.9364586275063541, "grad_norm": 3.038257348862183, "learning_rate": 1.6107751783400845e-05, "loss": 0.7936, "step": 6632 }, { "epoch": 0.93659983055634, "grad_norm": 3.2941898820665467, "learning_rate": 1.6106544644382567e-05, "loss": 0.9352, "step": 6633 }, { "epoch": 0.9367410336063259, "grad_norm": 3.484758876676196, "learning_rate": 1.610533736345083e-05, "loss": 1.0089, "step": 6634 }, { "epoch": 0.9368822366563118, "grad_norm": 3.4283958984201943, "learning_rate": 1.610412994063369e-05, "loss": 0.9341, "step": 6635 }, { "epoch": 0.9370234397062976, "grad_norm": 4.385211688915032, "learning_rate": 1.6102922375959204e-05, "loss": 1.0819, "step": 6636 }, { "epoch": 0.9371646427562835, "grad_norm": 3.6863060714029827, "learning_rate": 1.6101714669455438e-05, "loss": 1.0554, "step": 6637 }, { "epoch": 0.9373058458062694, "grad_norm": 4.0089603959322035, "learning_rate": 1.6100506821150455e-05, "loss": 1.2873, "step": 6638 }, { "epoch": 0.9374470488562553, "grad_norm": 4.1208848745432665, "learning_rate": 1.6099298831072334e-05, "loss": 1.1831, "step": 6639 }, { "epoch": 0.9375882519062412, "grad_norm": 2.8475719310319993, "learning_rate": 1.6098090699249144e-05, "loss": 0.7505, "step": 6640 }, { "epoch": 0.9377294549562271, "grad_norm": 4.080486384741495, "learning_rate": 1.6096882425708953e-05, "loss": 1.2425, "step": 6641 }, { "epoch": 0.9378706580062129, "grad_norm": 3.8872234066593196, "learning_rate": 1.609567401047985e-05, "loss": 0.8701, "step": 6642 }, { "epoch": 0.9380118610561988, "grad_norm": 3.4176709692163083, "learning_rate": 1.6094465453589915e-05, "loss": 0.9469, "step": 6643 }, { "epoch": 0.9381530641061847, "grad_norm": 2.9601686480136684, "learning_rate": 1.6093256755067236e-05, "loss": 0.8684, "step": 6644 }, { "epoch": 0.9382942671561706, "grad_norm": 3.3082696632840434, "learning_rate": 1.60920479149399e-05, "loss": 1.0592, "step": 6645 }, { "epoch": 0.9384354702061565, "grad_norm": 2.6254444362242153, "learning_rate": 1.6090838933236004e-05, "loss": 0.7735, "step": 6646 }, { "epoch": 0.9385766732561424, "grad_norm": 3.0839950407523267, "learning_rate": 1.608962980998364e-05, "loss": 0.9762, "step": 6647 }, { "epoch": 0.9387178763061282, "grad_norm": 3.459557276954536, "learning_rate": 1.608842054521091e-05, "loss": 0.9345, "step": 6648 }, { "epoch": 0.9388590793561141, "grad_norm": 3.5899919801696525, "learning_rate": 1.6087211138945917e-05, "loss": 0.9923, "step": 6649 }, { "epoch": 0.9390002824061, "grad_norm": 3.950889396040656, "learning_rate": 1.6086001591216764e-05, "loss": 1.1762, "step": 6650 }, { "epoch": 0.9391414854560859, "grad_norm": 3.7910557716480087, "learning_rate": 1.6084791902051563e-05, "loss": 1.0029, "step": 6651 }, { "epoch": 0.9392826885060718, "grad_norm": 3.0713211144037684, "learning_rate": 1.6083582071478424e-05, "loss": 0.9519, "step": 6652 }, { "epoch": 0.9394238915560577, "grad_norm": 3.52584341383269, "learning_rate": 1.6082372099525464e-05, "loss": 1.0988, "step": 6653 }, { "epoch": 0.9395650946060435, "grad_norm": 2.9522503340859534, "learning_rate": 1.6081161986220807e-05, "loss": 0.8544, "step": 6654 }, { "epoch": 0.9397062976560294, "grad_norm": 3.226681077472121, "learning_rate": 1.6079951731592573e-05, "loss": 0.935, "step": 6655 }, { "epoch": 0.9398475007060153, "grad_norm": 3.0553486203415146, "learning_rate": 1.6078741335668882e-05, "loss": 0.7655, "step": 6656 }, { "epoch": 0.9399887037560011, "grad_norm": 3.324879468485464, "learning_rate": 1.607753079847787e-05, "loss": 0.8258, "step": 6657 }, { "epoch": 0.940129906805987, "grad_norm": 3.3381348180705266, "learning_rate": 1.6076320120047667e-05, "loss": 0.977, "step": 6658 }, { "epoch": 0.9402711098559728, "grad_norm": 3.8040267048022436, "learning_rate": 1.6075109300406407e-05, "loss": 1.3205, "step": 6659 }, { "epoch": 0.9404123129059587, "grad_norm": 3.1607199518088858, "learning_rate": 1.607389833958223e-05, "loss": 1.026, "step": 6660 }, { "epoch": 0.9405535159559446, "grad_norm": 3.2102565232140483, "learning_rate": 1.6072687237603283e-05, "loss": 0.8745, "step": 6661 }, { "epoch": 0.9406947190059305, "grad_norm": 3.3262210983727445, "learning_rate": 1.6071475994497702e-05, "loss": 0.8807, "step": 6662 }, { "epoch": 0.9408359220559164, "grad_norm": 3.1618972028901493, "learning_rate": 1.6070264610293645e-05, "loss": 0.9394, "step": 6663 }, { "epoch": 0.9409771251059023, "grad_norm": 3.512876482071222, "learning_rate": 1.6069053085019258e-05, "loss": 1.2797, "step": 6664 }, { "epoch": 0.9411183281558881, "grad_norm": 3.290273807703738, "learning_rate": 1.6067841418702702e-05, "loss": 1.057, "step": 6665 }, { "epoch": 0.941259531205874, "grad_norm": 3.308185624541523, "learning_rate": 1.6066629611372127e-05, "loss": 0.9493, "step": 6666 }, { "epoch": 0.9414007342558599, "grad_norm": 3.2422475772554624, "learning_rate": 1.60654176630557e-05, "loss": 0.898, "step": 6667 }, { "epoch": 0.9415419373058458, "grad_norm": 3.4866492215997145, "learning_rate": 1.6064205573781587e-05, "loss": 1.05, "step": 6668 }, { "epoch": 0.9416831403558317, "grad_norm": 2.8983844958828526, "learning_rate": 1.606299334357796e-05, "loss": 0.9658, "step": 6669 }, { "epoch": 0.9418243434058176, "grad_norm": 3.4328595192868456, "learning_rate": 1.6061780972472978e-05, "loss": 1.0506, "step": 6670 }, { "epoch": 0.9419655464558034, "grad_norm": 3.0889144449357104, "learning_rate": 1.6060568460494828e-05, "loss": 1.02, "step": 6671 }, { "epoch": 0.9421067495057893, "grad_norm": 3.03361620794687, "learning_rate": 1.6059355807671683e-05, "loss": 0.8275, "step": 6672 }, { "epoch": 0.9422479525557752, "grad_norm": 2.884333698059003, "learning_rate": 1.605814301403173e-05, "loss": 0.7323, "step": 6673 }, { "epoch": 0.9423891556057611, "grad_norm": 3.1836482184551316, "learning_rate": 1.6056930079603144e-05, "loss": 0.7934, "step": 6674 }, { "epoch": 0.942530358655747, "grad_norm": 3.4125152825407725, "learning_rate": 1.6055717004414125e-05, "loss": 0.8653, "step": 6675 }, { "epoch": 0.9426715617057329, "grad_norm": 2.910643880237899, "learning_rate": 1.6054503788492852e-05, "loss": 0.9476, "step": 6676 }, { "epoch": 0.9428127647557187, "grad_norm": 3.4209158245494056, "learning_rate": 1.6053290431867528e-05, "loss": 0.9221, "step": 6677 }, { "epoch": 0.9429539678057046, "grad_norm": 3.6658283944741523, "learning_rate": 1.605207693456635e-05, "loss": 1.1199, "step": 6678 }, { "epoch": 0.9430951708556905, "grad_norm": 3.2051746205158427, "learning_rate": 1.605086329661752e-05, "loss": 0.8933, "step": 6679 }, { "epoch": 0.9432363739056764, "grad_norm": 3.3420356761290653, "learning_rate": 1.6049649518049234e-05, "loss": 1.044, "step": 6680 }, { "epoch": 0.9433775769556623, "grad_norm": 3.2460219685075087, "learning_rate": 1.6048435598889708e-05, "loss": 0.7345, "step": 6681 }, { "epoch": 0.9435187800056481, "grad_norm": 3.5539531333143897, "learning_rate": 1.6047221539167152e-05, "loss": 1.0141, "step": 6682 }, { "epoch": 0.943659983055634, "grad_norm": 3.392029939472976, "learning_rate": 1.604600733890978e-05, "loss": 1.0401, "step": 6683 }, { "epoch": 0.9438011861056199, "grad_norm": 3.6183830643802763, "learning_rate": 1.6044792998145804e-05, "loss": 1.0269, "step": 6684 }, { "epoch": 0.9439423891556058, "grad_norm": 3.7388635242574146, "learning_rate": 1.6043578516903452e-05, "loss": 1.0434, "step": 6685 }, { "epoch": 0.9440835922055917, "grad_norm": 2.765974676715772, "learning_rate": 1.6042363895210948e-05, "loss": 0.7987, "step": 6686 }, { "epoch": 0.9442247952555776, "grad_norm": 3.228900249498772, "learning_rate": 1.6041149133096515e-05, "loss": 0.995, "step": 6687 }, { "epoch": 0.9443659983055634, "grad_norm": 3.8561738013689646, "learning_rate": 1.6039934230588384e-05, "loss": 1.168, "step": 6688 }, { "epoch": 0.9445072013555493, "grad_norm": 3.5186093249951895, "learning_rate": 1.6038719187714788e-05, "loss": 0.9663, "step": 6689 }, { "epoch": 0.9446484044055352, "grad_norm": 3.5293143488948933, "learning_rate": 1.6037504004503967e-05, "loss": 1.1972, "step": 6690 }, { "epoch": 0.944789607455521, "grad_norm": 3.222830060959618, "learning_rate": 1.6036288680984164e-05, "loss": 1.0092, "step": 6691 }, { "epoch": 0.9449308105055069, "grad_norm": 3.3479942661906525, "learning_rate": 1.6035073217183613e-05, "loss": 0.7898, "step": 6692 }, { "epoch": 0.9450720135554928, "grad_norm": 2.706742446356195, "learning_rate": 1.6033857613130574e-05, "loss": 0.7797, "step": 6693 }, { "epoch": 0.9452132166054786, "grad_norm": 2.967255021079711, "learning_rate": 1.6032641868853283e-05, "loss": 0.8365, "step": 6694 }, { "epoch": 0.9453544196554645, "grad_norm": 3.0158795341226474, "learning_rate": 1.6031425984380006e-05, "loss": 0.9347, "step": 6695 }, { "epoch": 0.9454956227054504, "grad_norm": 3.418218520605631, "learning_rate": 1.6030209959738988e-05, "loss": 0.9387, "step": 6696 }, { "epoch": 0.9456368257554363, "grad_norm": 3.6190250247360907, "learning_rate": 1.60289937949585e-05, "loss": 1.0718, "step": 6697 }, { "epoch": 0.9457780288054222, "grad_norm": 3.417781106338642, "learning_rate": 1.6027777490066798e-05, "loss": 0.8786, "step": 6698 }, { "epoch": 0.945919231855408, "grad_norm": 3.226895744446707, "learning_rate": 1.602656104509215e-05, "loss": 0.8636, "step": 6699 }, { "epoch": 0.9460604349053939, "grad_norm": 3.0532366793867434, "learning_rate": 1.6025344460062826e-05, "loss": 0.8661, "step": 6700 }, { "epoch": 0.9462016379553798, "grad_norm": 3.5896744323056304, "learning_rate": 1.60241277350071e-05, "loss": 0.9442, "step": 6701 }, { "epoch": 0.9463428410053657, "grad_norm": 3.5276638642262217, "learning_rate": 1.6022910869953245e-05, "loss": 0.9921, "step": 6702 }, { "epoch": 0.9464840440553516, "grad_norm": 3.754744285092437, "learning_rate": 1.6021693864929548e-05, "loss": 1.206, "step": 6703 }, { "epoch": 0.9466252471053375, "grad_norm": 3.199563372734988, "learning_rate": 1.602047671996428e-05, "loss": 0.7907, "step": 6704 }, { "epoch": 0.9467664501553233, "grad_norm": 3.0184576091123363, "learning_rate": 1.6019259435085733e-05, "loss": 0.8096, "step": 6705 }, { "epoch": 0.9469076532053092, "grad_norm": 3.161366852180515, "learning_rate": 1.6018042010322197e-05, "loss": 1.1012, "step": 6706 }, { "epoch": 0.9470488562552951, "grad_norm": 3.6676417078460988, "learning_rate": 1.6016824445701965e-05, "loss": 1.031, "step": 6707 }, { "epoch": 0.947190059305281, "grad_norm": 3.417058895814996, "learning_rate": 1.6015606741253334e-05, "loss": 0.9663, "step": 6708 }, { "epoch": 0.9473312623552669, "grad_norm": 2.9297808756704122, "learning_rate": 1.6014388897004595e-05, "loss": 0.8103, "step": 6709 }, { "epoch": 0.9474724654052528, "grad_norm": 2.8256065991246504, "learning_rate": 1.601317091298406e-05, "loss": 0.8442, "step": 6710 }, { "epoch": 0.9476136684552386, "grad_norm": 3.035086442580244, "learning_rate": 1.6011952789220025e-05, "loss": 0.8863, "step": 6711 }, { "epoch": 0.9477548715052245, "grad_norm": 3.2467716081350417, "learning_rate": 1.601073452574081e-05, "loss": 1.1156, "step": 6712 }, { "epoch": 0.9478960745552104, "grad_norm": 2.9998715551611137, "learning_rate": 1.6009516122574717e-05, "loss": 0.7638, "step": 6713 }, { "epoch": 0.9480372776051963, "grad_norm": 2.9927676190915706, "learning_rate": 1.6008297579750063e-05, "loss": 0.8013, "step": 6714 }, { "epoch": 0.9481784806551822, "grad_norm": 3.7753727829347907, "learning_rate": 1.600707889729517e-05, "loss": 0.9594, "step": 6715 }, { "epoch": 0.9483196837051681, "grad_norm": 3.0565816641728967, "learning_rate": 1.6005860075238358e-05, "loss": 1.0416, "step": 6716 }, { "epoch": 0.9484608867551539, "grad_norm": 3.365895551812851, "learning_rate": 1.600464111360795e-05, "loss": 1.1126, "step": 6717 }, { "epoch": 0.9486020898051398, "grad_norm": 3.2037498624525673, "learning_rate": 1.6003422012432275e-05, "loss": 1.0029, "step": 6718 }, { "epoch": 0.9487432928551257, "grad_norm": 2.78030582086645, "learning_rate": 1.6002202771739666e-05, "loss": 0.8412, "step": 6719 }, { "epoch": 0.9488844959051116, "grad_norm": 3.519099739407791, "learning_rate": 1.6000983391558457e-05, "loss": 0.9792, "step": 6720 }, { "epoch": 0.9490256989550975, "grad_norm": 3.4547811176230856, "learning_rate": 1.5999763871916987e-05, "loss": 1.0257, "step": 6721 }, { "epoch": 0.9491669020050834, "grad_norm": 3.928588709125975, "learning_rate": 1.5998544212843597e-05, "loss": 1.1313, "step": 6722 }, { "epoch": 0.9493081050550692, "grad_norm": 3.5481236859440477, "learning_rate": 1.5997324414366626e-05, "loss": 1.0001, "step": 6723 }, { "epoch": 0.9494493081050551, "grad_norm": 3.5957404600538183, "learning_rate": 1.5996104476514426e-05, "loss": 1.1066, "step": 6724 }, { "epoch": 0.9495905111550409, "grad_norm": 3.3883070319642385, "learning_rate": 1.5994884399315348e-05, "loss": 1.1193, "step": 6725 }, { "epoch": 0.9497317142050268, "grad_norm": 3.4567736060702896, "learning_rate": 1.5993664182797747e-05, "loss": 1.1202, "step": 6726 }, { "epoch": 0.9498729172550127, "grad_norm": 3.6762692009232993, "learning_rate": 1.599244382698998e-05, "loss": 1.2075, "step": 6727 }, { "epoch": 0.9500141203049985, "grad_norm": 3.675974358282678, "learning_rate": 1.59912233319204e-05, "loss": 1.0842, "step": 6728 }, { "epoch": 0.9501553233549844, "grad_norm": 3.4022117365793787, "learning_rate": 1.5990002697617386e-05, "loss": 0.8944, "step": 6729 }, { "epoch": 0.9502965264049703, "grad_norm": 3.33084817505258, "learning_rate": 1.5988781924109293e-05, "loss": 1.0385, "step": 6730 }, { "epoch": 0.9504377294549562, "grad_norm": 3.1293236362281576, "learning_rate": 1.598756101142449e-05, "loss": 0.8546, "step": 6731 }, { "epoch": 0.9505789325049421, "grad_norm": 3.11331648312038, "learning_rate": 1.598633995959136e-05, "loss": 0.8249, "step": 6732 }, { "epoch": 0.950720135554928, "grad_norm": 3.152349150049387, "learning_rate": 1.5985118768638276e-05, "loss": 0.9512, "step": 6733 }, { "epoch": 0.9508613386049138, "grad_norm": 2.7945221659294814, "learning_rate": 1.5983897438593612e-05, "loss": 0.853, "step": 6734 }, { "epoch": 0.9510025416548997, "grad_norm": 3.8858451242695167, "learning_rate": 1.5982675969485756e-05, "loss": 1.0983, "step": 6735 }, { "epoch": 0.9511437447048856, "grad_norm": 2.8233957316869205, "learning_rate": 1.5981454361343097e-05, "loss": 0.7475, "step": 6736 }, { "epoch": 0.9512849477548715, "grad_norm": 3.2609576157697053, "learning_rate": 1.5980232614194023e-05, "loss": 0.8103, "step": 6737 }, { "epoch": 0.9514261508048574, "grad_norm": 3.6054255120768683, "learning_rate": 1.597901072806692e-05, "loss": 1.1149, "step": 6738 }, { "epoch": 0.9515673538548433, "grad_norm": 3.028609404345994, "learning_rate": 1.5977788702990192e-05, "loss": 0.8763, "step": 6739 }, { "epoch": 0.9517085569048291, "grad_norm": 3.2838051152049257, "learning_rate": 1.5976566538992237e-05, "loss": 0.9289, "step": 6740 }, { "epoch": 0.951849759954815, "grad_norm": 3.67397767400594, "learning_rate": 1.597534423610146e-05, "loss": 0.8777, "step": 6741 }, { "epoch": 0.9519909630048009, "grad_norm": 3.7955202689089167, "learning_rate": 1.597412179434626e-05, "loss": 1.2206, "step": 6742 }, { "epoch": 0.9521321660547868, "grad_norm": 3.23797944649157, "learning_rate": 1.5972899213755047e-05, "loss": 0.9903, "step": 6743 }, { "epoch": 0.9522733691047727, "grad_norm": 3.5213991069236985, "learning_rate": 1.5971676494356237e-05, "loss": 0.7797, "step": 6744 }, { "epoch": 0.9524145721547586, "grad_norm": 3.2323595962664284, "learning_rate": 1.5970453636178248e-05, "loss": 1.017, "step": 6745 }, { "epoch": 0.9525557752047444, "grad_norm": 3.944320190742375, "learning_rate": 1.5969230639249492e-05, "loss": 1.001, "step": 6746 }, { "epoch": 0.9526969782547303, "grad_norm": 2.962039286834128, "learning_rate": 1.5968007503598397e-05, "loss": 0.8269, "step": 6747 }, { "epoch": 0.9528381813047162, "grad_norm": 3.08786057862195, "learning_rate": 1.596678422925338e-05, "loss": 0.978, "step": 6748 }, { "epoch": 0.9529793843547021, "grad_norm": 2.8344784056513173, "learning_rate": 1.596556081624288e-05, "loss": 0.8715, "step": 6749 }, { "epoch": 0.953120587404688, "grad_norm": 3.534679241935083, "learning_rate": 1.596433726459532e-05, "loss": 0.9334, "step": 6750 }, { "epoch": 0.9532617904546739, "grad_norm": 3.5766762057609385, "learning_rate": 1.596311357433914e-05, "loss": 1.1952, "step": 6751 }, { "epoch": 0.9534029935046597, "grad_norm": 2.867566151961349, "learning_rate": 1.5961889745502767e-05, "loss": 0.8382, "step": 6752 }, { "epoch": 0.9535441965546456, "grad_norm": 4.406039812060257, "learning_rate": 1.596066577811466e-05, "loss": 1.151, "step": 6753 }, { "epoch": 0.9536853996046315, "grad_norm": 3.5751234630709696, "learning_rate": 1.5959441672203254e-05, "loss": 0.9983, "step": 6754 }, { "epoch": 0.9538266026546174, "grad_norm": 3.2278906890611005, "learning_rate": 1.5958217427796994e-05, "loss": 0.8101, "step": 6755 }, { "epoch": 0.9539678057046033, "grad_norm": 2.9200246756230728, "learning_rate": 1.5956993044924334e-05, "loss": 0.8703, "step": 6756 }, { "epoch": 0.9541090087545891, "grad_norm": 3.595892262961553, "learning_rate": 1.595576852361373e-05, "loss": 0.9129, "step": 6757 }, { "epoch": 0.954250211804575, "grad_norm": 3.1449596351379996, "learning_rate": 1.5954543863893638e-05, "loss": 0.8221, "step": 6758 }, { "epoch": 0.9543914148545608, "grad_norm": 3.4367082936662827, "learning_rate": 1.5953319065792516e-05, "loss": 1.1315, "step": 6759 }, { "epoch": 0.9545326179045467, "grad_norm": 3.2051265225121366, "learning_rate": 1.5952094129338834e-05, "loss": 0.9898, "step": 6760 }, { "epoch": 0.9546738209545326, "grad_norm": 3.5080836225380705, "learning_rate": 1.595086905456105e-05, "loss": 1.1654, "step": 6761 }, { "epoch": 0.9548150240045185, "grad_norm": 3.864677702889774, "learning_rate": 1.594964384148764e-05, "loss": 1.1831, "step": 6762 }, { "epoch": 0.9549562270545043, "grad_norm": 3.2950318257495756, "learning_rate": 1.594841849014708e-05, "loss": 1.008, "step": 6763 }, { "epoch": 0.9550974301044902, "grad_norm": 3.266416286832824, "learning_rate": 1.5947193000567844e-05, "loss": 0.9798, "step": 6764 }, { "epoch": 0.9552386331544761, "grad_norm": 3.307313507320486, "learning_rate": 1.5945967372778406e-05, "loss": 1.1058, "step": 6765 }, { "epoch": 0.955379836204462, "grad_norm": 3.29561805948912, "learning_rate": 1.5944741606807257e-05, "loss": 1.048, "step": 6766 }, { "epoch": 0.9555210392544479, "grad_norm": 3.205673217210416, "learning_rate": 1.594351570268288e-05, "loss": 1.048, "step": 6767 }, { "epoch": 0.9556622423044338, "grad_norm": 3.4806223230940496, "learning_rate": 1.5942289660433766e-05, "loss": 0.9942, "step": 6768 }, { "epoch": 0.9558034453544196, "grad_norm": 3.324422296548328, "learning_rate": 1.5941063480088406e-05, "loss": 0.9264, "step": 6769 }, { "epoch": 0.9559446484044055, "grad_norm": 3.25724625024677, "learning_rate": 1.5939837161675297e-05, "loss": 0.916, "step": 6770 }, { "epoch": 0.9560858514543914, "grad_norm": 3.2390138304741565, "learning_rate": 1.5938610705222936e-05, "loss": 0.9942, "step": 6771 }, { "epoch": 0.9562270545043773, "grad_norm": 3.435879977823398, "learning_rate": 1.5937384110759824e-05, "loss": 1.0334, "step": 6772 }, { "epoch": 0.9563682575543632, "grad_norm": 2.4984399307293517, "learning_rate": 1.5936157378314473e-05, "loss": 0.7904, "step": 6773 }, { "epoch": 0.956509460604349, "grad_norm": 2.781947933495412, "learning_rate": 1.5934930507915386e-05, "loss": 0.7486, "step": 6774 }, { "epoch": 0.9566506636543349, "grad_norm": 3.1584283741458976, "learning_rate": 1.5933703499591082e-05, "loss": 0.9675, "step": 6775 }, { "epoch": 0.9567918667043208, "grad_norm": 3.0449649968016734, "learning_rate": 1.5932476353370068e-05, "loss": 0.9858, "step": 6776 }, { "epoch": 0.9569330697543067, "grad_norm": 2.9607852389209657, "learning_rate": 1.5931249069280866e-05, "loss": 0.7486, "step": 6777 }, { "epoch": 0.9570742728042926, "grad_norm": 3.6066535603275143, "learning_rate": 1.5930021647351997e-05, "loss": 1.2387, "step": 6778 }, { "epoch": 0.9572154758542785, "grad_norm": 4.13675172208724, "learning_rate": 1.5928794087611988e-05, "loss": 1.1815, "step": 6779 }, { "epoch": 0.9573566789042643, "grad_norm": 2.847477817893974, "learning_rate": 1.5927566390089362e-05, "loss": 0.8239, "step": 6780 }, { "epoch": 0.9574978819542502, "grad_norm": 3.360918265216355, "learning_rate": 1.5926338554812653e-05, "loss": 0.7735, "step": 6781 }, { "epoch": 0.9576390850042361, "grad_norm": 3.5124256924633612, "learning_rate": 1.5925110581810396e-05, "loss": 1.1325, "step": 6782 }, { "epoch": 0.957780288054222, "grad_norm": 3.5172408555891796, "learning_rate": 1.592388247111113e-05, "loss": 1.0073, "step": 6783 }, { "epoch": 0.9579214911042079, "grad_norm": 4.034763736468467, "learning_rate": 1.592265422274339e-05, "loss": 1.087, "step": 6784 }, { "epoch": 0.9580626941541938, "grad_norm": 3.7955639946441897, "learning_rate": 1.5921425836735725e-05, "loss": 1.1642, "step": 6785 }, { "epoch": 0.9582038972041796, "grad_norm": 3.1893957637619863, "learning_rate": 1.5920197313116682e-05, "loss": 0.8976, "step": 6786 }, { "epoch": 0.9583451002541655, "grad_norm": 3.877590000041149, "learning_rate": 1.591896865191481e-05, "loss": 0.9861, "step": 6787 }, { "epoch": 0.9584863033041514, "grad_norm": 3.7872837437015408, "learning_rate": 1.591773985315866e-05, "loss": 0.9501, "step": 6788 }, { "epoch": 0.9586275063541373, "grad_norm": 2.8069601379982436, "learning_rate": 1.5916510916876794e-05, "loss": 0.7218, "step": 6789 }, { "epoch": 0.9587687094041232, "grad_norm": 4.518970237798156, "learning_rate": 1.591528184309777e-05, "loss": 1.5069, "step": 6790 }, { "epoch": 0.9589099124541091, "grad_norm": 3.404808765362731, "learning_rate": 1.591405263185015e-05, "loss": 1.1348, "step": 6791 }, { "epoch": 0.9590511155040949, "grad_norm": 3.2324040040428614, "learning_rate": 1.59128232831625e-05, "loss": 0.9001, "step": 6792 }, { "epoch": 0.9591923185540807, "grad_norm": 3.587905717034151, "learning_rate": 1.591159379706339e-05, "loss": 1.123, "step": 6793 }, { "epoch": 0.9593335216040666, "grad_norm": 2.9311800938656, "learning_rate": 1.5910364173581395e-05, "loss": 0.7579, "step": 6794 }, { "epoch": 0.9594747246540525, "grad_norm": 3.4258164922055507, "learning_rate": 1.5909134412745087e-05, "loss": 0.995, "step": 6795 }, { "epoch": 0.9596159277040384, "grad_norm": 3.2619321189690305, "learning_rate": 1.5907904514583047e-05, "loss": 0.8399, "step": 6796 }, { "epoch": 0.9597571307540242, "grad_norm": 3.236394450637207, "learning_rate": 1.590667447912386e-05, "loss": 0.9869, "step": 6797 }, { "epoch": 0.9598983338040101, "grad_norm": 3.177077362516382, "learning_rate": 1.590544430639611e-05, "loss": 0.9513, "step": 6798 }, { "epoch": 0.960039536853996, "grad_norm": 4.065562860549573, "learning_rate": 1.590421399642838e-05, "loss": 1.275, "step": 6799 }, { "epoch": 0.9601807399039819, "grad_norm": 3.2497034351080276, "learning_rate": 1.5902983549249272e-05, "loss": 1.1588, "step": 6800 }, { "epoch": 0.9603219429539678, "grad_norm": 3.3055289396953778, "learning_rate": 1.5901752964887373e-05, "loss": 0.7902, "step": 6801 }, { "epoch": 0.9604631460039537, "grad_norm": 3.4158928668264985, "learning_rate": 1.5900522243371283e-05, "loss": 0.9128, "step": 6802 }, { "epoch": 0.9606043490539395, "grad_norm": 3.4712116606356336, "learning_rate": 1.5899291384729606e-05, "loss": 1.2385, "step": 6803 }, { "epoch": 0.9607455521039254, "grad_norm": 3.6750775894209404, "learning_rate": 1.589806038899094e-05, "loss": 1.0007, "step": 6804 }, { "epoch": 0.9608867551539113, "grad_norm": 3.057119133281129, "learning_rate": 1.5896829256183905e-05, "loss": 0.8873, "step": 6805 }, { "epoch": 0.9610279582038972, "grad_norm": 3.136489052840755, "learning_rate": 1.58955979863371e-05, "loss": 0.8882, "step": 6806 }, { "epoch": 0.9611691612538831, "grad_norm": 3.6939306596756376, "learning_rate": 1.5894366579479144e-05, "loss": 0.9684, "step": 6807 }, { "epoch": 0.961310364303869, "grad_norm": 2.9173679167197206, "learning_rate": 1.5893135035638658e-05, "loss": 0.9104, "step": 6808 }, { "epoch": 0.9614515673538548, "grad_norm": 2.8311280096775353, "learning_rate": 1.5891903354844258e-05, "loss": 0.8049, "step": 6809 }, { "epoch": 0.9615927704038407, "grad_norm": 3.573897937300494, "learning_rate": 1.5890671537124565e-05, "loss": 0.9463, "step": 6810 }, { "epoch": 0.9617339734538266, "grad_norm": 3.0231796177610146, "learning_rate": 1.588943958250821e-05, "loss": 0.7729, "step": 6811 }, { "epoch": 0.9618751765038125, "grad_norm": 3.0563598004731216, "learning_rate": 1.5888207491023824e-05, "loss": 0.9399, "step": 6812 }, { "epoch": 0.9620163795537984, "grad_norm": 3.915954008629436, "learning_rate": 1.588697526270004e-05, "loss": 1.0972, "step": 6813 }, { "epoch": 0.9621575826037843, "grad_norm": 3.571242425262027, "learning_rate": 1.5885742897565494e-05, "loss": 0.9581, "step": 6814 }, { "epoch": 0.9622987856537701, "grad_norm": 3.425426153330954, "learning_rate": 1.5884510395648823e-05, "loss": 1.0471, "step": 6815 }, { "epoch": 0.962439988703756, "grad_norm": 2.854683644645683, "learning_rate": 1.588327775697867e-05, "loss": 0.792, "step": 6816 }, { "epoch": 0.9625811917537419, "grad_norm": 4.078296539366986, "learning_rate": 1.5882044981583685e-05, "loss": 1.141, "step": 6817 }, { "epoch": 0.9627223948037278, "grad_norm": 3.8403016607712845, "learning_rate": 1.5880812069492516e-05, "loss": 0.9468, "step": 6818 }, { "epoch": 0.9628635978537137, "grad_norm": 3.7463268626977646, "learning_rate": 1.5879579020733814e-05, "loss": 1.0548, "step": 6819 }, { "epoch": 0.9630048009036996, "grad_norm": 2.8867767484420708, "learning_rate": 1.5878345835336232e-05, "loss": 0.8262, "step": 6820 }, { "epoch": 0.9631460039536854, "grad_norm": 3.9045870175460133, "learning_rate": 1.587711251332843e-05, "loss": 1.2583, "step": 6821 }, { "epoch": 0.9632872070036713, "grad_norm": 3.310821321982624, "learning_rate": 1.5875879054739075e-05, "loss": 0.9406, "step": 6822 }, { "epoch": 0.9634284100536572, "grad_norm": 3.948061844915971, "learning_rate": 1.587464545959683e-05, "loss": 0.9872, "step": 6823 }, { "epoch": 0.9635696131036431, "grad_norm": 3.3474727001525486, "learning_rate": 1.587341172793036e-05, "loss": 1.1394, "step": 6824 }, { "epoch": 0.963710816153629, "grad_norm": 2.9843997917899645, "learning_rate": 1.5872177859768336e-05, "loss": 0.8646, "step": 6825 }, { "epoch": 0.9638520192036149, "grad_norm": 4.043080962413109, "learning_rate": 1.5870943855139437e-05, "loss": 1.2428, "step": 6826 }, { "epoch": 0.9639932222536006, "grad_norm": 2.927053804436132, "learning_rate": 1.5869709714072335e-05, "loss": 1.0471, "step": 6827 }, { "epoch": 0.9641344253035865, "grad_norm": 3.5492530662742485, "learning_rate": 1.5868475436595713e-05, "loss": 1.1749, "step": 6828 }, { "epoch": 0.9642756283535724, "grad_norm": 3.102729490915012, "learning_rate": 1.5867241022738262e-05, "loss": 0.9967, "step": 6829 }, { "epoch": 0.9644168314035583, "grad_norm": 3.516904227721121, "learning_rate": 1.586600647252866e-05, "loss": 1.0932, "step": 6830 }, { "epoch": 0.9645580344535442, "grad_norm": 3.3609921418556965, "learning_rate": 1.5864771785995602e-05, "loss": 1.0374, "step": 6831 }, { "epoch": 0.96469923750353, "grad_norm": 3.276700813425036, "learning_rate": 1.586353696316778e-05, "loss": 1.0624, "step": 6832 }, { "epoch": 0.9648404405535159, "grad_norm": 3.222746563496837, "learning_rate": 1.586230200407389e-05, "loss": 1.0355, "step": 6833 }, { "epoch": 0.9649816436035018, "grad_norm": 3.5511795942251068, "learning_rate": 1.5861066908742638e-05, "loss": 1.0672, "step": 6834 }, { "epoch": 0.9651228466534877, "grad_norm": 2.9941926665216476, "learning_rate": 1.5859831677202718e-05, "loss": 0.8671, "step": 6835 }, { "epoch": 0.9652640497034736, "grad_norm": 3.070440140597219, "learning_rate": 1.585859630948284e-05, "loss": 0.9123, "step": 6836 }, { "epoch": 0.9654052527534595, "grad_norm": 3.7192103244389427, "learning_rate": 1.5857360805611717e-05, "loss": 0.9213, "step": 6837 }, { "epoch": 0.9655464558034453, "grad_norm": 3.523345687234521, "learning_rate": 1.5856125165618056e-05, "loss": 1.1936, "step": 6838 }, { "epoch": 0.9656876588534312, "grad_norm": 3.4636091709232892, "learning_rate": 1.5854889389530577e-05, "loss": 1.2216, "step": 6839 }, { "epoch": 0.9658288619034171, "grad_norm": 3.5887460992891738, "learning_rate": 1.5853653477377996e-05, "loss": 0.9244, "step": 6840 }, { "epoch": 0.965970064953403, "grad_norm": 2.721520328532032, "learning_rate": 1.5852417429189037e-05, "loss": 0.7991, "step": 6841 }, { "epoch": 0.9661112680033889, "grad_norm": 3.4393972019609196, "learning_rate": 1.585118124499242e-05, "loss": 0.9428, "step": 6842 }, { "epoch": 0.9662524710533748, "grad_norm": 3.432416544453715, "learning_rate": 1.5849944924816883e-05, "loss": 1.1066, "step": 6843 }, { "epoch": 0.9663936741033606, "grad_norm": 3.0051061265447045, "learning_rate": 1.584870846869115e-05, "loss": 0.8612, "step": 6844 }, { "epoch": 0.9665348771533465, "grad_norm": 3.1454586567533847, "learning_rate": 1.584747187664396e-05, "loss": 0.8654, "step": 6845 }, { "epoch": 0.9666760802033324, "grad_norm": 3.735789297060759, "learning_rate": 1.5846235148704047e-05, "loss": 1.1413, "step": 6846 }, { "epoch": 0.9668172832533183, "grad_norm": 3.8938661289639125, "learning_rate": 1.5844998284900155e-05, "loss": 1.1151, "step": 6847 }, { "epoch": 0.9669584863033042, "grad_norm": 3.7629071977821638, "learning_rate": 1.5843761285261027e-05, "loss": 0.9197, "step": 6848 }, { "epoch": 0.96709968935329, "grad_norm": 4.374299755015566, "learning_rate": 1.584252414981541e-05, "loss": 1.1907, "step": 6849 }, { "epoch": 0.9672408924032759, "grad_norm": 3.203809547659136, "learning_rate": 1.5841286878592055e-05, "loss": 0.9175, "step": 6850 }, { "epoch": 0.9673820954532618, "grad_norm": 3.3445786517312053, "learning_rate": 1.5840049471619717e-05, "loss": 1.1313, "step": 6851 }, { "epoch": 0.9675232985032477, "grad_norm": 3.391874673777384, "learning_rate": 1.583881192892715e-05, "loss": 1.0373, "step": 6852 }, { "epoch": 0.9676645015532336, "grad_norm": 3.0386219188269252, "learning_rate": 1.5837574250543118e-05, "loss": 0.7768, "step": 6853 }, { "epoch": 0.9678057046032195, "grad_norm": 3.0068322827410707, "learning_rate": 1.5836336436496377e-05, "loss": 0.8668, "step": 6854 }, { "epoch": 0.9679469076532053, "grad_norm": 3.4130528716695028, "learning_rate": 1.5835098486815698e-05, "loss": 0.8466, "step": 6855 }, { "epoch": 0.9680881107031912, "grad_norm": 3.150392581721561, "learning_rate": 1.5833860401529855e-05, "loss": 1.0577, "step": 6856 }, { "epoch": 0.9682293137531771, "grad_norm": 2.7528416175145547, "learning_rate": 1.5832622180667613e-05, "loss": 1.024, "step": 6857 }, { "epoch": 0.968370516803163, "grad_norm": 3.0843098285562354, "learning_rate": 1.5831383824257748e-05, "loss": 0.9073, "step": 6858 }, { "epoch": 0.9685117198531489, "grad_norm": 3.5290779794876537, "learning_rate": 1.5830145332329043e-05, "loss": 1.1477, "step": 6859 }, { "epoch": 0.9686529229031348, "grad_norm": 3.510242526965286, "learning_rate": 1.582890670491028e-05, "loss": 1.1602, "step": 6860 }, { "epoch": 0.9687941259531205, "grad_norm": 3.228156173843682, "learning_rate": 1.5827667942030244e-05, "loss": 1.1081, "step": 6861 }, { "epoch": 0.9689353290031064, "grad_norm": 2.93213187446821, "learning_rate": 1.5826429043717716e-05, "loss": 0.9327, "step": 6862 }, { "epoch": 0.9690765320530923, "grad_norm": 2.8671632917765932, "learning_rate": 1.5825190010001496e-05, "loss": 0.9143, "step": 6863 }, { "epoch": 0.9692177351030782, "grad_norm": 3.873518327711928, "learning_rate": 1.5823950840910376e-05, "loss": 1.1302, "step": 6864 }, { "epoch": 0.9693589381530641, "grad_norm": 3.131185158429214, "learning_rate": 1.5822711536473156e-05, "loss": 0.7891, "step": 6865 }, { "epoch": 0.96950014120305, "grad_norm": 3.884595612913844, "learning_rate": 1.582147209671863e-05, "loss": 1.1184, "step": 6866 }, { "epoch": 0.9696413442530358, "grad_norm": 3.327291461393861, "learning_rate": 1.582023252167561e-05, "loss": 1.0937, "step": 6867 }, { "epoch": 0.9697825473030217, "grad_norm": 4.067796244243117, "learning_rate": 1.5818992811372898e-05, "loss": 1.0095, "step": 6868 }, { "epoch": 0.9699237503530076, "grad_norm": 3.5734283154589366, "learning_rate": 1.5817752965839308e-05, "loss": 1.0098, "step": 6869 }, { "epoch": 0.9700649534029935, "grad_norm": 3.584176672852647, "learning_rate": 1.581651298510365e-05, "loss": 0.9498, "step": 6870 }, { "epoch": 0.9702061564529794, "grad_norm": 3.2518716175459983, "learning_rate": 1.581527286919474e-05, "loss": 1.0061, "step": 6871 }, { "epoch": 0.9703473595029652, "grad_norm": 3.4856346375892553, "learning_rate": 1.58140326181414e-05, "loss": 0.7918, "step": 6872 }, { "epoch": 0.9704885625529511, "grad_norm": 2.6094398776856638, "learning_rate": 1.581279223197246e-05, "loss": 0.6555, "step": 6873 }, { "epoch": 0.970629765602937, "grad_norm": 3.3146461727426906, "learning_rate": 1.5811551710716732e-05, "loss": 0.8259, "step": 6874 }, { "epoch": 0.9707709686529229, "grad_norm": 3.4566785361733077, "learning_rate": 1.5810311054403056e-05, "loss": 1.0797, "step": 6875 }, { "epoch": 0.9709121717029088, "grad_norm": 3.7048654585669736, "learning_rate": 1.5809070263060256e-05, "loss": 0.8461, "step": 6876 }, { "epoch": 0.9710533747528947, "grad_norm": 2.8460653855458053, "learning_rate": 1.5807829336717176e-05, "loss": 0.8323, "step": 6877 }, { "epoch": 0.9711945778028805, "grad_norm": 4.093219741670033, "learning_rate": 1.580658827540265e-05, "loss": 1.0815, "step": 6878 }, { "epoch": 0.9713357808528664, "grad_norm": 3.8501741569804246, "learning_rate": 1.580534707914552e-05, "loss": 1.2278, "step": 6879 }, { "epoch": 0.9714769839028523, "grad_norm": 3.9819037809808386, "learning_rate": 1.5804105747974626e-05, "loss": 1.0844, "step": 6880 }, { "epoch": 0.9716181869528382, "grad_norm": 3.9075175206641486, "learning_rate": 1.5802864281918832e-05, "loss": 1.0516, "step": 6881 }, { "epoch": 0.9717593900028241, "grad_norm": 3.5049321380720655, "learning_rate": 1.5801622681006966e-05, "loss": 0.9866, "step": 6882 }, { "epoch": 0.97190059305281, "grad_norm": 3.046121035944033, "learning_rate": 1.5800380945267902e-05, "loss": 0.8331, "step": 6883 }, { "epoch": 0.9720417961027958, "grad_norm": 3.35198584994267, "learning_rate": 1.5799139074730487e-05, "loss": 0.9949, "step": 6884 }, { "epoch": 0.9721829991527817, "grad_norm": 3.5445071027830206, "learning_rate": 1.5797897069423584e-05, "loss": 0.8613, "step": 6885 }, { "epoch": 0.9723242022027676, "grad_norm": 3.472183312156491, "learning_rate": 1.5796654929376057e-05, "loss": 1.0795, "step": 6886 }, { "epoch": 0.9724654052527535, "grad_norm": 3.787288286908652, "learning_rate": 1.5795412654616776e-05, "loss": 0.8871, "step": 6887 }, { "epoch": 0.9726066083027394, "grad_norm": 2.6620783569640687, "learning_rate": 1.5794170245174605e-05, "loss": 0.7564, "step": 6888 }, { "epoch": 0.9727478113527253, "grad_norm": 3.6164220689390367, "learning_rate": 1.579292770107842e-05, "loss": 0.9598, "step": 6889 }, { "epoch": 0.9728890144027111, "grad_norm": 3.234976480525693, "learning_rate": 1.5791685022357098e-05, "loss": 1.0008, "step": 6890 }, { "epoch": 0.973030217452697, "grad_norm": 3.1478943685983323, "learning_rate": 1.5790442209039517e-05, "loss": 0.9154, "step": 6891 }, { "epoch": 0.9731714205026829, "grad_norm": 3.979593816630565, "learning_rate": 1.5789199261154557e-05, "loss": 0.9352, "step": 6892 }, { "epoch": 0.9733126235526688, "grad_norm": 3.086726929904248, "learning_rate": 1.578795617873111e-05, "loss": 0.9797, "step": 6893 }, { "epoch": 0.9734538266026547, "grad_norm": 3.547628256933968, "learning_rate": 1.578671296179806e-05, "loss": 1.0601, "step": 6894 }, { "epoch": 0.9735950296526404, "grad_norm": 3.7934147359910755, "learning_rate": 1.57854696103843e-05, "loss": 1.1474, "step": 6895 }, { "epoch": 0.9737362327026263, "grad_norm": 4.291327581643057, "learning_rate": 1.5784226124518724e-05, "loss": 1.4029, "step": 6896 }, { "epoch": 0.9738774357526122, "grad_norm": 3.033833682302448, "learning_rate": 1.578298250423023e-05, "loss": 0.8849, "step": 6897 }, { "epoch": 0.9740186388025981, "grad_norm": 3.2378779310510093, "learning_rate": 1.5781738749547724e-05, "loss": 0.9698, "step": 6898 }, { "epoch": 0.974159841852584, "grad_norm": 3.725550847691739, "learning_rate": 1.5780494860500103e-05, "loss": 1.0099, "step": 6899 }, { "epoch": 0.9743010449025699, "grad_norm": 3.158443764881195, "learning_rate": 1.5779250837116275e-05, "loss": 0.8815, "step": 6900 }, { "epoch": 0.9744422479525557, "grad_norm": 2.808354497167918, "learning_rate": 1.577800667942516e-05, "loss": 0.8346, "step": 6901 }, { "epoch": 0.9745834510025416, "grad_norm": 3.6570803382766566, "learning_rate": 1.577676238745566e-05, "loss": 1.11, "step": 6902 }, { "epoch": 0.9747246540525275, "grad_norm": 3.605730746823552, "learning_rate": 1.57755179612367e-05, "loss": 1.1869, "step": 6903 }, { "epoch": 0.9748658571025134, "grad_norm": 4.262153500361827, "learning_rate": 1.5774273400797195e-05, "loss": 1.1906, "step": 6904 }, { "epoch": 0.9750070601524993, "grad_norm": 2.7671942956806292, "learning_rate": 1.5773028706166072e-05, "loss": 0.7458, "step": 6905 }, { "epoch": 0.9751482632024852, "grad_norm": 3.6153926077309233, "learning_rate": 1.5771783877372252e-05, "loss": 1.1004, "step": 6906 }, { "epoch": 0.975289466252471, "grad_norm": 3.7013225797162317, "learning_rate": 1.5770538914444672e-05, "loss": 1.1629, "step": 6907 }, { "epoch": 0.9754306693024569, "grad_norm": 3.9692878853993054, "learning_rate": 1.576929381741226e-05, "loss": 0.9929, "step": 6908 }, { "epoch": 0.9755718723524428, "grad_norm": 3.335377330363598, "learning_rate": 1.576804858630394e-05, "loss": 1.0708, "step": 6909 }, { "epoch": 0.9757130754024287, "grad_norm": 3.3016386486113207, "learning_rate": 1.5766803221148676e-05, "loss": 0.9419, "step": 6910 }, { "epoch": 0.9758542784524146, "grad_norm": 3.6043610029243336, "learning_rate": 1.576555772197539e-05, "loss": 1.2182, "step": 6911 }, { "epoch": 0.9759954815024005, "grad_norm": 2.8666036338856244, "learning_rate": 1.576431208881303e-05, "loss": 0.9397, "step": 6912 }, { "epoch": 0.9761366845523863, "grad_norm": 3.69751471671717, "learning_rate": 1.576306632169055e-05, "loss": 1.0234, "step": 6913 }, { "epoch": 0.9762778876023722, "grad_norm": 3.0733924380982525, "learning_rate": 1.5761820420636894e-05, "loss": 0.8671, "step": 6914 }, { "epoch": 0.9764190906523581, "grad_norm": 3.7748641828352287, "learning_rate": 1.576057438568102e-05, "loss": 0.9105, "step": 6915 }, { "epoch": 0.976560293702344, "grad_norm": 4.0278514125909535, "learning_rate": 1.5759328216851886e-05, "loss": 1.1893, "step": 6916 }, { "epoch": 0.9767014967523299, "grad_norm": 3.7982278528909563, "learning_rate": 1.5758081914178457e-05, "loss": 1.223, "step": 6917 }, { "epoch": 0.9768426998023158, "grad_norm": 3.34537858554109, "learning_rate": 1.5756835477689683e-05, "loss": 1.1261, "step": 6918 }, { "epoch": 0.9769839028523016, "grad_norm": 3.246170363653838, "learning_rate": 1.5755588907414544e-05, "loss": 0.806, "step": 6919 }, { "epoch": 0.9771251059022875, "grad_norm": 3.713685600210636, "learning_rate": 1.5754342203382003e-05, "loss": 1.0807, "step": 6920 }, { "epoch": 0.9772663089522734, "grad_norm": 3.066571006635337, "learning_rate": 1.5753095365621033e-05, "loss": 0.9107, "step": 6921 }, { "epoch": 0.9774075120022593, "grad_norm": 3.5619322967913076, "learning_rate": 1.575184839416061e-05, "loss": 1.1108, "step": 6922 }, { "epoch": 0.9775487150522452, "grad_norm": 3.783484182435127, "learning_rate": 1.5750601289029716e-05, "loss": 0.9791, "step": 6923 }, { "epoch": 0.977689918102231, "grad_norm": 3.347506141866573, "learning_rate": 1.5749354050257334e-05, "loss": 1.0116, "step": 6924 }, { "epoch": 0.9778311211522169, "grad_norm": 2.951419145491745, "learning_rate": 1.5748106677872447e-05, "loss": 0.8336, "step": 6925 }, { "epoch": 0.9779723242022028, "grad_norm": 3.8479643720786068, "learning_rate": 1.574685917190404e-05, "loss": 1.001, "step": 6926 }, { "epoch": 0.9781135272521887, "grad_norm": 3.2761712161137986, "learning_rate": 1.574561153238111e-05, "loss": 1.0955, "step": 6927 }, { "epoch": 0.9782547303021746, "grad_norm": 3.215192452005424, "learning_rate": 1.5744363759332647e-05, "loss": 1.0562, "step": 6928 }, { "epoch": 0.9783959333521604, "grad_norm": 2.7404894657469754, "learning_rate": 1.574311585278765e-05, "loss": 0.7568, "step": 6929 }, { "epoch": 0.9785371364021462, "grad_norm": 3.031030347938334, "learning_rate": 1.5741867812775125e-05, "loss": 0.9893, "step": 6930 }, { "epoch": 0.9786783394521321, "grad_norm": 3.1543805295442846, "learning_rate": 1.574061963932407e-05, "loss": 0.9141, "step": 6931 }, { "epoch": 0.978819542502118, "grad_norm": 2.879539667564236, "learning_rate": 1.5739371332463496e-05, "loss": 0.7845, "step": 6932 }, { "epoch": 0.9789607455521039, "grad_norm": 3.2888939995831104, "learning_rate": 1.5738122892222407e-05, "loss": 0.9535, "step": 6933 }, { "epoch": 0.9791019486020898, "grad_norm": 3.3660376509967906, "learning_rate": 1.573687431862982e-05, "loss": 0.9463, "step": 6934 }, { "epoch": 0.9792431516520756, "grad_norm": 3.3763077317102357, "learning_rate": 1.5735625611714754e-05, "loss": 0.9301, "step": 6935 }, { "epoch": 0.9793843547020615, "grad_norm": 3.534849893334151, "learning_rate": 1.5734376771506222e-05, "loss": 1.0963, "step": 6936 }, { "epoch": 0.9795255577520474, "grad_norm": 3.5510548809389353, "learning_rate": 1.5733127798033257e-05, "loss": 1.0692, "step": 6937 }, { "epoch": 0.9796667608020333, "grad_norm": 2.9643008601291796, "learning_rate": 1.5731878691324874e-05, "loss": 0.7428, "step": 6938 }, { "epoch": 0.9798079638520192, "grad_norm": 3.239925961012417, "learning_rate": 1.5730629451410103e-05, "loss": 0.9767, "step": 6939 }, { "epoch": 0.9799491669020051, "grad_norm": 3.223027482757864, "learning_rate": 1.5729380078317982e-05, "loss": 1.1101, "step": 6940 }, { "epoch": 0.980090369951991, "grad_norm": 3.5910506772478366, "learning_rate": 1.5728130572077543e-05, "loss": 0.9898, "step": 6941 }, { "epoch": 0.9802315730019768, "grad_norm": 3.5382573150416254, "learning_rate": 1.572688093271782e-05, "loss": 1.1979, "step": 6942 }, { "epoch": 0.9803727760519627, "grad_norm": 3.2634444563160283, "learning_rate": 1.5725631160267858e-05, "loss": 1.0517, "step": 6943 }, { "epoch": 0.9805139791019486, "grad_norm": 3.380523977344248, "learning_rate": 1.57243812547567e-05, "loss": 0.9333, "step": 6944 }, { "epoch": 0.9806551821519345, "grad_norm": 3.904459545111257, "learning_rate": 1.57231312162134e-05, "loss": 1.1011, "step": 6945 }, { "epoch": 0.9807963852019204, "grad_norm": 2.843037788944992, "learning_rate": 1.5721881044666996e-05, "loss": 0.9079, "step": 6946 }, { "epoch": 0.9809375882519062, "grad_norm": 3.4792988498133735, "learning_rate": 1.572063074014655e-05, "loss": 1.0359, "step": 6947 }, { "epoch": 0.9810787913018921, "grad_norm": 2.9680471599663694, "learning_rate": 1.5719380302681114e-05, "loss": 0.8634, "step": 6948 }, { "epoch": 0.981219994351878, "grad_norm": 3.695512963764563, "learning_rate": 1.5718129732299753e-05, "loss": 1.1771, "step": 6949 }, { "epoch": 0.9813611974018639, "grad_norm": 3.00318618308261, "learning_rate": 1.5716879029031523e-05, "loss": 0.9638, "step": 6950 }, { "epoch": 0.9815024004518498, "grad_norm": 3.3874566214544086, "learning_rate": 1.5715628192905495e-05, "loss": 0.9435, "step": 6951 }, { "epoch": 0.9816436035018357, "grad_norm": 3.423338292236942, "learning_rate": 1.5714377223950734e-05, "loss": 0.8845, "step": 6952 }, { "epoch": 0.9817848065518215, "grad_norm": 2.5500230239586874, "learning_rate": 1.571312612219632e-05, "loss": 0.5997, "step": 6953 }, { "epoch": 0.9819260096018074, "grad_norm": 4.404992993318122, "learning_rate": 1.5711874887671318e-05, "loss": 1.3522, "step": 6954 }, { "epoch": 0.9820672126517933, "grad_norm": 3.255053771014023, "learning_rate": 1.571062352040481e-05, "loss": 0.8757, "step": 6955 }, { "epoch": 0.9822084157017792, "grad_norm": 4.188498337049012, "learning_rate": 1.5709372020425877e-05, "loss": 1.1596, "step": 6956 }, { "epoch": 0.9823496187517651, "grad_norm": 3.157554487681271, "learning_rate": 1.5708120387763607e-05, "loss": 0.8428, "step": 6957 }, { "epoch": 0.982490821801751, "grad_norm": 3.228535383690472, "learning_rate": 1.5706868622447084e-05, "loss": 0.9451, "step": 6958 }, { "epoch": 0.9826320248517368, "grad_norm": 4.422321718570655, "learning_rate": 1.5705616724505394e-05, "loss": 1.0163, "step": 6959 }, { "epoch": 0.9827732279017227, "grad_norm": 3.680373155701201, "learning_rate": 1.570436469396764e-05, "loss": 1.2727, "step": 6960 }, { "epoch": 0.9829144309517086, "grad_norm": 3.597854083406116, "learning_rate": 1.5703112530862912e-05, "loss": 1.0064, "step": 6961 }, { "epoch": 0.9830556340016945, "grad_norm": 2.8992529116195374, "learning_rate": 1.5701860235220314e-05, "loss": 0.7728, "step": 6962 }, { "epoch": 0.9831968370516803, "grad_norm": 3.208125986474056, "learning_rate": 1.5700607807068946e-05, "loss": 0.9497, "step": 6963 }, { "epoch": 0.9833380401016661, "grad_norm": 3.705476981783934, "learning_rate": 1.569935524643791e-05, "loss": 1.2896, "step": 6964 }, { "epoch": 0.983479243151652, "grad_norm": 3.469104658118669, "learning_rate": 1.5698102553356325e-05, "loss": 1.0155, "step": 6965 }, { "epoch": 0.9836204462016379, "grad_norm": 3.5618830968649102, "learning_rate": 1.5696849727853297e-05, "loss": 0.9966, "step": 6966 }, { "epoch": 0.9837616492516238, "grad_norm": 3.250448878096537, "learning_rate": 1.569559676995794e-05, "loss": 1.0674, "step": 6967 }, { "epoch": 0.9839028523016097, "grad_norm": 3.6351630743531866, "learning_rate": 1.5694343679699377e-05, "loss": 0.932, "step": 6968 }, { "epoch": 0.9840440553515956, "grad_norm": 3.7322724348988916, "learning_rate": 1.569309045710672e-05, "loss": 1.2539, "step": 6969 }, { "epoch": 0.9841852584015814, "grad_norm": 3.077452036850276, "learning_rate": 1.5691837102209103e-05, "loss": 0.8782, "step": 6970 }, { "epoch": 0.9843264614515673, "grad_norm": 4.088119037012379, "learning_rate": 1.569058361503565e-05, "loss": 1.106, "step": 6971 }, { "epoch": 0.9844676645015532, "grad_norm": 3.8713703787299076, "learning_rate": 1.568932999561549e-05, "loss": 1.1572, "step": 6972 }, { "epoch": 0.9846088675515391, "grad_norm": 3.628531331112453, "learning_rate": 1.5688076243977758e-05, "loss": 1.0644, "step": 6973 }, { "epoch": 0.984750070601525, "grad_norm": 3.0259289312552244, "learning_rate": 1.568682236015159e-05, "loss": 0.9877, "step": 6974 }, { "epoch": 0.9848912736515109, "grad_norm": 3.421105743113141, "learning_rate": 1.568556834416613e-05, "loss": 0.8944, "step": 6975 }, { "epoch": 0.9850324767014967, "grad_norm": 3.0690637735114557, "learning_rate": 1.5684314196050516e-05, "loss": 0.8954, "step": 6976 }, { "epoch": 0.9851736797514826, "grad_norm": 3.533321172873058, "learning_rate": 1.5683059915833893e-05, "loss": 1.0902, "step": 6977 }, { "epoch": 0.9853148828014685, "grad_norm": 3.451738621238093, "learning_rate": 1.568180550354541e-05, "loss": 0.9889, "step": 6978 }, { "epoch": 0.9854560858514544, "grad_norm": 3.689876372140364, "learning_rate": 1.5680550959214228e-05, "loss": 1.1631, "step": 6979 }, { "epoch": 0.9855972889014403, "grad_norm": 3.7522893547777083, "learning_rate": 1.567929628286949e-05, "loss": 1.0714, "step": 6980 }, { "epoch": 0.9857384919514262, "grad_norm": 3.469037734312863, "learning_rate": 1.5678041474540356e-05, "loss": 1.1226, "step": 6981 }, { "epoch": 0.985879695001412, "grad_norm": 3.2522972241197867, "learning_rate": 1.5676786534255993e-05, "loss": 0.7746, "step": 6982 }, { "epoch": 0.9860208980513979, "grad_norm": 3.608721466873715, "learning_rate": 1.5675531462045567e-05, "loss": 1.1465, "step": 6983 }, { "epoch": 0.9861621011013838, "grad_norm": 3.499731549687233, "learning_rate": 1.5674276257938234e-05, "loss": 1.0859, "step": 6984 }, { "epoch": 0.9863033041513697, "grad_norm": 4.813250376864065, "learning_rate": 1.5673020921963174e-05, "loss": 1.2355, "step": 6985 }, { "epoch": 0.9864445072013556, "grad_norm": 3.4343063979784643, "learning_rate": 1.5671765454149558e-05, "loss": 1.1618, "step": 6986 }, { "epoch": 0.9865857102513415, "grad_norm": 3.680731680392903, "learning_rate": 1.5670509854526566e-05, "loss": 1.078, "step": 6987 }, { "epoch": 0.9867269133013273, "grad_norm": 3.748786507501026, "learning_rate": 1.566925412312337e-05, "loss": 1.1682, "step": 6988 }, { "epoch": 0.9868681163513132, "grad_norm": 3.377734580982437, "learning_rate": 1.5667998259969154e-05, "loss": 0.971, "step": 6989 }, { "epoch": 0.9870093194012991, "grad_norm": 3.4895733730412752, "learning_rate": 1.566674226509311e-05, "loss": 1.1186, "step": 6990 }, { "epoch": 0.987150522451285, "grad_norm": 3.9237819825322156, "learning_rate": 1.5665486138524425e-05, "loss": 1.1409, "step": 6991 }, { "epoch": 0.9872917255012709, "grad_norm": 3.713623998650466, "learning_rate": 1.5664229880292286e-05, "loss": 1.0355, "step": 6992 }, { "epoch": 0.9874329285512568, "grad_norm": 3.607571372416664, "learning_rate": 1.566297349042589e-05, "loss": 1.1739, "step": 6993 }, { "epoch": 0.9875741316012426, "grad_norm": 3.4152831711214544, "learning_rate": 1.5661716968954436e-05, "loss": 1.0416, "step": 6994 }, { "epoch": 0.9877153346512285, "grad_norm": 3.0364888505542136, "learning_rate": 1.5660460315907125e-05, "loss": 0.8819, "step": 6995 }, { "epoch": 0.9878565377012144, "grad_norm": 2.8189967619070364, "learning_rate": 1.5659203531313162e-05, "loss": 0.6696, "step": 6996 }, { "epoch": 0.9879977407512002, "grad_norm": 2.6824644103683766, "learning_rate": 1.5657946615201755e-05, "loss": 0.7952, "step": 6997 }, { "epoch": 0.988138943801186, "grad_norm": 3.9999698076649297, "learning_rate": 1.565668956760211e-05, "loss": 1.1374, "step": 6998 }, { "epoch": 0.9882801468511719, "grad_norm": 2.815202808771742, "learning_rate": 1.565543238854344e-05, "loss": 0.7855, "step": 6999 }, { "epoch": 0.9884213499011578, "grad_norm": 3.119927566390854, "learning_rate": 1.5654175078054965e-05, "loss": 0.7941, "step": 7000 }, { "epoch": 0.9885625529511437, "grad_norm": 3.710496191695528, "learning_rate": 1.5652917636165905e-05, "loss": 1.0291, "step": 7001 }, { "epoch": 0.9887037560011296, "grad_norm": 3.5445064781669657, "learning_rate": 1.5651660062905476e-05, "loss": 1.0, "step": 7002 }, { "epoch": 0.9888449590511155, "grad_norm": 3.0508636236193296, "learning_rate": 1.5650402358302913e-05, "loss": 0.7994, "step": 7003 }, { "epoch": 0.9889861621011014, "grad_norm": 3.0206325015955295, "learning_rate": 1.5649144522387437e-05, "loss": 0.9788, "step": 7004 }, { "epoch": 0.9891273651510872, "grad_norm": 3.257815829686731, "learning_rate": 1.5647886555188282e-05, "loss": 1.1091, "step": 7005 }, { "epoch": 0.9892685682010731, "grad_norm": 4.0364687772674825, "learning_rate": 1.564662845673468e-05, "loss": 1.3046, "step": 7006 }, { "epoch": 0.989409771251059, "grad_norm": 3.430130517321909, "learning_rate": 1.5645370227055874e-05, "loss": 1.0219, "step": 7007 }, { "epoch": 0.9895509743010449, "grad_norm": 3.5588576823990614, "learning_rate": 1.56441118661811e-05, "loss": 1.1569, "step": 7008 }, { "epoch": 0.9896921773510308, "grad_norm": 3.1063170184303655, "learning_rate": 1.564285337413961e-05, "loss": 0.8242, "step": 7009 }, { "epoch": 0.9898333804010166, "grad_norm": 3.0517766556805617, "learning_rate": 1.564159475096064e-05, "loss": 0.859, "step": 7010 }, { "epoch": 0.9899745834510025, "grad_norm": 3.977257597100643, "learning_rate": 1.5640335996673446e-05, "loss": 1.1925, "step": 7011 }, { "epoch": 0.9901157865009884, "grad_norm": 3.2798279653197553, "learning_rate": 1.5639077111307276e-05, "loss": 0.8232, "step": 7012 }, { "epoch": 0.9902569895509743, "grad_norm": 3.346384994046706, "learning_rate": 1.563781809489139e-05, "loss": 1.0752, "step": 7013 }, { "epoch": 0.9903981926009602, "grad_norm": 2.9977320693960756, "learning_rate": 1.563655894745505e-05, "loss": 0.8363, "step": 7014 }, { "epoch": 0.9905393956509461, "grad_norm": 3.255853153086719, "learning_rate": 1.563529966902751e-05, "loss": 1.0111, "step": 7015 }, { "epoch": 0.990680598700932, "grad_norm": 3.1205128480531688, "learning_rate": 1.5634040259638044e-05, "loss": 0.7567, "step": 7016 }, { "epoch": 0.9908218017509178, "grad_norm": 3.582942074407956, "learning_rate": 1.5632780719315912e-05, "loss": 1.224, "step": 7017 }, { "epoch": 0.9909630048009037, "grad_norm": 3.556001786382917, "learning_rate": 1.563152104809039e-05, "loss": 0.951, "step": 7018 }, { "epoch": 0.9911042078508896, "grad_norm": 3.2147592862399317, "learning_rate": 1.563026124599075e-05, "loss": 0.9708, "step": 7019 }, { "epoch": 0.9912454109008755, "grad_norm": 3.4783365359073595, "learning_rate": 1.562900131304627e-05, "loss": 0.9113, "step": 7020 }, { "epoch": 0.9913866139508614, "grad_norm": 3.4651610598413387, "learning_rate": 1.5627741249286233e-05, "loss": 0.928, "step": 7021 }, { "epoch": 0.9915278170008472, "grad_norm": 3.303366968723144, "learning_rate": 1.5626481054739916e-05, "loss": 1.0275, "step": 7022 }, { "epoch": 0.9916690200508331, "grad_norm": 4.047000915110003, "learning_rate": 1.562522072943661e-05, "loss": 0.9735, "step": 7023 }, { "epoch": 0.991810223100819, "grad_norm": 3.117661129910116, "learning_rate": 1.5623960273405605e-05, "loss": 0.9313, "step": 7024 }, { "epoch": 0.9919514261508049, "grad_norm": 3.0924144270926632, "learning_rate": 1.562269968667619e-05, "loss": 0.9477, "step": 7025 }, { "epoch": 0.9920926292007908, "grad_norm": 3.5471854211645657, "learning_rate": 1.5621438969277667e-05, "loss": 1.1245, "step": 7026 }, { "epoch": 0.9922338322507767, "grad_norm": 3.328051526576892, "learning_rate": 1.5620178121239325e-05, "loss": 0.9079, "step": 7027 }, { "epoch": 0.9923750353007625, "grad_norm": 3.3618530498812427, "learning_rate": 1.5618917142590472e-05, "loss": 1.0184, "step": 7028 }, { "epoch": 0.9925162383507484, "grad_norm": 2.995249132331739, "learning_rate": 1.5617656033360414e-05, "loss": 0.9172, "step": 7029 }, { "epoch": 0.9926574414007343, "grad_norm": 3.5022546646379156, "learning_rate": 1.5616394793578455e-05, "loss": 0.9893, "step": 7030 }, { "epoch": 0.9927986444507201, "grad_norm": 3.06374782003365, "learning_rate": 1.5615133423273906e-05, "loss": 0.8965, "step": 7031 }, { "epoch": 0.992939847500706, "grad_norm": 3.6321936916219038, "learning_rate": 1.5613871922476082e-05, "loss": 1.1367, "step": 7032 }, { "epoch": 0.9930810505506918, "grad_norm": 3.793161549894474, "learning_rate": 1.5612610291214296e-05, "loss": 1.2445, "step": 7033 }, { "epoch": 0.9932222536006777, "grad_norm": 3.277534278856119, "learning_rate": 1.5611348529517872e-05, "loss": 0.9351, "step": 7034 }, { "epoch": 0.9933634566506636, "grad_norm": 3.518118534321915, "learning_rate": 1.561008663741613e-05, "loss": 1.0028, "step": 7035 }, { "epoch": 0.9935046597006495, "grad_norm": 3.35057390033774, "learning_rate": 1.56088246149384e-05, "loss": 1.0941, "step": 7036 }, { "epoch": 0.9936458627506354, "grad_norm": 3.380690123914075, "learning_rate": 1.560756246211401e-05, "loss": 0.9519, "step": 7037 }, { "epoch": 0.9937870658006213, "grad_norm": 2.8764134346023327, "learning_rate": 1.560630017897229e-05, "loss": 0.9132, "step": 7038 }, { "epoch": 0.9939282688506071, "grad_norm": 3.469913487101549, "learning_rate": 1.560503776554257e-05, "loss": 1.2105, "step": 7039 }, { "epoch": 0.994069471900593, "grad_norm": 3.8035032284545878, "learning_rate": 1.5603775221854195e-05, "loss": 1.0751, "step": 7040 }, { "epoch": 0.9942106749505789, "grad_norm": 3.011659005685611, "learning_rate": 1.5602512547936504e-05, "loss": 0.9795, "step": 7041 }, { "epoch": 0.9943518780005648, "grad_norm": 3.322639403798117, "learning_rate": 1.560124974381884e-05, "loss": 0.9458, "step": 7042 }, { "epoch": 0.9944930810505507, "grad_norm": 3.292749054251956, "learning_rate": 1.559998680953055e-05, "loss": 1.0769, "step": 7043 }, { "epoch": 0.9946342841005366, "grad_norm": 2.983155213528207, "learning_rate": 1.559872374510099e-05, "loss": 0.8762, "step": 7044 }, { "epoch": 0.9947754871505224, "grad_norm": 4.127678051151956, "learning_rate": 1.5597460550559508e-05, "loss": 1.1266, "step": 7045 }, { "epoch": 0.9949166902005083, "grad_norm": 3.374956110435403, "learning_rate": 1.5596197225935458e-05, "loss": 0.8584, "step": 7046 }, { "epoch": 0.9950578932504942, "grad_norm": 3.200598872907082, "learning_rate": 1.55949337712582e-05, "loss": 0.9614, "step": 7047 }, { "epoch": 0.9951990963004801, "grad_norm": 3.539221452036798, "learning_rate": 1.5593670186557096e-05, "loss": 1.0793, "step": 7048 }, { "epoch": 0.995340299350466, "grad_norm": 2.8747625666202863, "learning_rate": 1.5592406471861516e-05, "loss": 0.7875, "step": 7049 }, { "epoch": 0.9954815024004519, "grad_norm": 3.7785221697406897, "learning_rate": 1.5591142627200825e-05, "loss": 1.1175, "step": 7050 }, { "epoch": 0.9956227054504377, "grad_norm": 3.3536102215692853, "learning_rate": 1.5589878652604392e-05, "loss": 1.061, "step": 7051 }, { "epoch": 0.9957639085004236, "grad_norm": 3.333537881416331, "learning_rate": 1.5588614548101593e-05, "loss": 0.8706, "step": 7052 }, { "epoch": 0.9959051115504095, "grad_norm": 3.260950893171303, "learning_rate": 1.5587350313721806e-05, "loss": 0.9259, "step": 7053 }, { "epoch": 0.9960463146003954, "grad_norm": 2.9409695260660826, "learning_rate": 1.558608594949441e-05, "loss": 0.9021, "step": 7054 }, { "epoch": 0.9961875176503813, "grad_norm": 3.190645696272113, "learning_rate": 1.558482145544879e-05, "loss": 0.9771, "step": 7055 }, { "epoch": 0.9963287207003672, "grad_norm": 3.3954615071214866, "learning_rate": 1.5583556831614333e-05, "loss": 1.0387, "step": 7056 }, { "epoch": 0.996469923750353, "grad_norm": 3.0652519708635606, "learning_rate": 1.5582292078020425e-05, "loss": 0.8763, "step": 7057 }, { "epoch": 0.9966111268003389, "grad_norm": 3.935264533859818, "learning_rate": 1.5581027194696458e-05, "loss": 1.24, "step": 7058 }, { "epoch": 0.9967523298503248, "grad_norm": 3.21581062488035, "learning_rate": 1.5579762181671832e-05, "loss": 1.0062, "step": 7059 }, { "epoch": 0.9968935329003107, "grad_norm": 3.362957833964704, "learning_rate": 1.557849703897594e-05, "loss": 0.9979, "step": 7060 }, { "epoch": 0.9970347359502966, "grad_norm": 4.292140604164678, "learning_rate": 1.557723176663819e-05, "loss": 1.1308, "step": 7061 }, { "epoch": 0.9971759390002825, "grad_norm": 3.185660636420962, "learning_rate": 1.557596636468798e-05, "loss": 0.9128, "step": 7062 }, { "epoch": 0.9973171420502683, "grad_norm": 3.591203109867533, "learning_rate": 1.5574700833154717e-05, "loss": 0.822, "step": 7063 }, { "epoch": 0.9974583451002542, "grad_norm": 3.6661527945391743, "learning_rate": 1.557343517206782e-05, "loss": 1.0747, "step": 7064 }, { "epoch": 0.99759954815024, "grad_norm": 4.363442797423987, "learning_rate": 1.5572169381456692e-05, "loss": 1.2384, "step": 7065 }, { "epoch": 0.9977407512002259, "grad_norm": 3.1332204666480528, "learning_rate": 1.5570903461350754e-05, "loss": 0.9118, "step": 7066 }, { "epoch": 0.9978819542502118, "grad_norm": 3.4591507532155643, "learning_rate": 1.5569637411779428e-05, "loss": 0.792, "step": 7067 }, { "epoch": 0.9980231573001976, "grad_norm": 3.358025928020668, "learning_rate": 1.5568371232772134e-05, "loss": 1.0065, "step": 7068 }, { "epoch": 0.9981643603501835, "grad_norm": 2.9706410828511425, "learning_rate": 1.5567104924358297e-05, "loss": 0.8425, "step": 7069 }, { "epoch": 0.9983055634001694, "grad_norm": 3.11709050814431, "learning_rate": 1.5565838486567343e-05, "loss": 0.8252, "step": 7070 }, { "epoch": 0.9984467664501553, "grad_norm": 3.44957872462301, "learning_rate": 1.5564571919428708e-05, "loss": 0.9985, "step": 7071 }, { "epoch": 0.9985879695001412, "grad_norm": 3.835627913308889, "learning_rate": 1.5563305222971826e-05, "loss": 1.2034, "step": 7072 }, { "epoch": 0.998729172550127, "grad_norm": 3.5639893779221574, "learning_rate": 1.5562038397226135e-05, "loss": 1.0206, "step": 7073 }, { "epoch": 0.9988703756001129, "grad_norm": 4.018968833148265, "learning_rate": 1.556077144222107e-05, "loss": 1.2566, "step": 7074 }, { "epoch": 0.9990115786500988, "grad_norm": 3.4173695035190272, "learning_rate": 1.5559504357986078e-05, "loss": 1.0191, "step": 7075 }, { "epoch": 0.9991527817000847, "grad_norm": 3.134018704373015, "learning_rate": 1.5558237144550608e-05, "loss": 1.0316, "step": 7076 }, { "epoch": 0.9992939847500706, "grad_norm": 3.1429545745613896, "learning_rate": 1.5556969801944105e-05, "loss": 0.9545, "step": 7077 }, { "epoch": 0.9994351878000565, "grad_norm": 3.1698403453593715, "learning_rate": 1.5555702330196024e-05, "loss": 0.9229, "step": 7078 }, { "epoch": 0.9995763908500424, "grad_norm": 3.063720326530578, "learning_rate": 1.555443472933582e-05, "loss": 0.9082, "step": 7079 }, { "epoch": 0.9997175939000282, "grad_norm": 3.2578613986798346, "learning_rate": 1.5553166999392954e-05, "loss": 1.0078, "step": 7080 }, { "epoch": 0.9998587969500141, "grad_norm": 2.8449006222963606, "learning_rate": 1.5551899140396883e-05, "loss": 0.7435, "step": 7081 }, { "epoch": 1.0, "grad_norm": 3.35298399538151, "learning_rate": 1.5550631152377075e-05, "loss": 1.0117, "step": 7082 }, { "epoch": 1.0001412030499859, "grad_norm": 2.7703924359903715, "learning_rate": 1.5549363035362995e-05, "loss": 0.7188, "step": 7083 }, { "epoch": 1.0002824060999718, "grad_norm": 2.5030652566205127, "learning_rate": 1.5548094789384113e-05, "loss": 0.5239, "step": 7084 }, { "epoch": 1.0004236091499576, "grad_norm": 2.463557242040833, "learning_rate": 1.554682641446991e-05, "loss": 0.5542, "step": 7085 }, { "epoch": 1.0005648121999435, "grad_norm": 3.125220239034055, "learning_rate": 1.554555791064985e-05, "loss": 0.766, "step": 7086 }, { "epoch": 1.0007060152499294, "grad_norm": 2.79247665278582, "learning_rate": 1.554428927795342e-05, "loss": 0.5893, "step": 7087 }, { "epoch": 1.0008472182999153, "grad_norm": 2.62365243433691, "learning_rate": 1.5543020516410104e-05, "loss": 0.6436, "step": 7088 }, { "epoch": 1.0009884213499012, "grad_norm": 2.970400234624762, "learning_rate": 1.5541751626049387e-05, "loss": 0.6496, "step": 7089 }, { "epoch": 1.001129624399887, "grad_norm": 2.6191402739140432, "learning_rate": 1.5540482606900752e-05, "loss": 0.4953, "step": 7090 }, { "epoch": 1.001270827449873, "grad_norm": 3.2537168382908503, "learning_rate": 1.5539213458993697e-05, "loss": 0.5547, "step": 7091 }, { "epoch": 1.0014120304998588, "grad_norm": 3.2061580467006197, "learning_rate": 1.553794418235771e-05, "loss": 0.6921, "step": 7092 }, { "epoch": 1.0015532335498447, "grad_norm": 3.3374077923941305, "learning_rate": 1.5536674777022295e-05, "loss": 0.7212, "step": 7093 }, { "epoch": 1.0016944365998306, "grad_norm": 3.2258764650999083, "learning_rate": 1.5535405243016945e-05, "loss": 0.5642, "step": 7094 }, { "epoch": 1.0018356396498165, "grad_norm": 2.7229051676067093, "learning_rate": 1.553413558037117e-05, "loss": 0.5861, "step": 7095 }, { "epoch": 1.0019768426998024, "grad_norm": 3.006919115153893, "learning_rate": 1.5532865789114477e-05, "loss": 0.6352, "step": 7096 }, { "epoch": 1.0021180457497882, "grad_norm": 3.562415190628279, "learning_rate": 1.5531595869276366e-05, "loss": 0.6815, "step": 7097 }, { "epoch": 1.0022592487997741, "grad_norm": 3.4815408536255816, "learning_rate": 1.5530325820886366e-05, "loss": 0.6794, "step": 7098 }, { "epoch": 1.00240045184976, "grad_norm": 3.742737946473658, "learning_rate": 1.5529055643973973e-05, "loss": 0.758, "step": 7099 }, { "epoch": 1.002541654899746, "grad_norm": 2.9996750284984217, "learning_rate": 1.5527785338568718e-05, "loss": 0.5891, "step": 7100 }, { "epoch": 1.0026828579497318, "grad_norm": 2.6143613971084156, "learning_rate": 1.552651490470012e-05, "loss": 0.5652, "step": 7101 }, { "epoch": 1.0028240609997177, "grad_norm": 3.0670215465162762, "learning_rate": 1.55252443423977e-05, "loss": 0.6603, "step": 7102 }, { "epoch": 1.0029652640497035, "grad_norm": 2.9572139631907843, "learning_rate": 1.552397365169099e-05, "loss": 0.5554, "step": 7103 }, { "epoch": 1.0031064670996894, "grad_norm": 2.931363176574739, "learning_rate": 1.5522702832609516e-05, "loss": 0.5618, "step": 7104 }, { "epoch": 1.0032476701496753, "grad_norm": 3.437590991890577, "learning_rate": 1.552143188518281e-05, "loss": 0.644, "step": 7105 }, { "epoch": 1.0033888731996612, "grad_norm": 3.194197917648109, "learning_rate": 1.552016080944042e-05, "loss": 0.609, "step": 7106 }, { "epoch": 1.003530076249647, "grad_norm": 2.9300701710619785, "learning_rate": 1.551888960541187e-05, "loss": 0.5751, "step": 7107 }, { "epoch": 1.003671279299633, "grad_norm": 3.0928977471480312, "learning_rate": 1.5517618273126714e-05, "loss": 0.7016, "step": 7108 }, { "epoch": 1.0038124823496188, "grad_norm": 3.685278482667479, "learning_rate": 1.5516346812614487e-05, "loss": 0.7401, "step": 7109 }, { "epoch": 1.0039536853996047, "grad_norm": 4.614679323716039, "learning_rate": 1.5515075223904745e-05, "loss": 0.8113, "step": 7110 }, { "epoch": 1.0040948884495906, "grad_norm": 3.2323602390210886, "learning_rate": 1.551380350702704e-05, "loss": 0.5941, "step": 7111 }, { "epoch": 1.0042360914995765, "grad_norm": 3.379148909601683, "learning_rate": 1.5512531662010918e-05, "loss": 0.752, "step": 7112 }, { "epoch": 1.0043772945495624, "grad_norm": 2.870867398825745, "learning_rate": 1.5511259688885943e-05, "loss": 0.5199, "step": 7113 }, { "epoch": 1.0045184975995483, "grad_norm": 3.3080872652837097, "learning_rate": 1.550998758768167e-05, "loss": 0.6551, "step": 7114 }, { "epoch": 1.0046597006495341, "grad_norm": 3.425594084549221, "learning_rate": 1.5508715358427667e-05, "loss": 0.7151, "step": 7115 }, { "epoch": 1.00480090369952, "grad_norm": 3.544107577569558, "learning_rate": 1.5507443001153497e-05, "loss": 0.6064, "step": 7116 }, { "epoch": 1.0049421067495057, "grad_norm": 3.035923927722001, "learning_rate": 1.5506170515888732e-05, "loss": 0.5797, "step": 7117 }, { "epoch": 1.0050833097994916, "grad_norm": 2.753522473876703, "learning_rate": 1.550489790266294e-05, "loss": 0.4597, "step": 7118 }, { "epoch": 1.0052245128494774, "grad_norm": 2.933612648461088, "learning_rate": 1.5503625161505703e-05, "loss": 0.5458, "step": 7119 }, { "epoch": 1.0053657158994633, "grad_norm": 3.595955419597733, "learning_rate": 1.550235229244659e-05, "loss": 0.5885, "step": 7120 }, { "epoch": 1.0055069189494492, "grad_norm": 2.9356881680492677, "learning_rate": 1.5501079295515188e-05, "loss": 0.4813, "step": 7121 }, { "epoch": 1.005648121999435, "grad_norm": 2.678086026101822, "learning_rate": 1.5499806170741073e-05, "loss": 0.4927, "step": 7122 }, { "epoch": 1.005789325049421, "grad_norm": 4.020260113678444, "learning_rate": 1.5498532918153847e-05, "loss": 0.7213, "step": 7123 }, { "epoch": 1.0059305280994069, "grad_norm": 3.259703376243231, "learning_rate": 1.5497259537783084e-05, "loss": 0.5475, "step": 7124 }, { "epoch": 1.0060717311493927, "grad_norm": 4.10785281086478, "learning_rate": 1.5495986029658385e-05, "loss": 0.7659, "step": 7125 }, { "epoch": 1.0062129341993786, "grad_norm": 3.661764437442794, "learning_rate": 1.5494712393809343e-05, "loss": 0.7327, "step": 7126 }, { "epoch": 1.0063541372493645, "grad_norm": 3.799365787502453, "learning_rate": 1.549343863026556e-05, "loss": 0.6346, "step": 7127 }, { "epoch": 1.0064953402993504, "grad_norm": 3.1384282819025056, "learning_rate": 1.5492164739056635e-05, "loss": 0.5589, "step": 7128 }, { "epoch": 1.0066365433493363, "grad_norm": 3.3999813184410006, "learning_rate": 1.5490890720212176e-05, "loss": 0.6209, "step": 7129 }, { "epoch": 1.0067777463993222, "grad_norm": 3.084468791538457, "learning_rate": 1.5489616573761784e-05, "loss": 0.5641, "step": 7130 }, { "epoch": 1.006918949449308, "grad_norm": 3.5695182673157837, "learning_rate": 1.5488342299735077e-05, "loss": 0.6313, "step": 7131 }, { "epoch": 1.007060152499294, "grad_norm": 3.2729248703124534, "learning_rate": 1.548706789816166e-05, "loss": 0.6199, "step": 7132 }, { "epoch": 1.0072013555492798, "grad_norm": 4.467379926360003, "learning_rate": 1.548579336907116e-05, "loss": 0.7252, "step": 7133 }, { "epoch": 1.0073425585992657, "grad_norm": 3.214694857152753, "learning_rate": 1.5484518712493188e-05, "loss": 0.5526, "step": 7134 }, { "epoch": 1.0074837616492516, "grad_norm": 4.136183980846995, "learning_rate": 1.548324392845737e-05, "loss": 0.7773, "step": 7135 }, { "epoch": 1.0076249646992375, "grad_norm": 2.89759458905711, "learning_rate": 1.5481969016993335e-05, "loss": 0.5302, "step": 7136 }, { "epoch": 1.0077661677492233, "grad_norm": 3.217114383951742, "learning_rate": 1.5480693978130706e-05, "loss": 0.6863, "step": 7137 }, { "epoch": 1.0079073707992092, "grad_norm": 3.0974145177204973, "learning_rate": 1.547941881189911e-05, "loss": 0.5675, "step": 7138 }, { "epoch": 1.008048573849195, "grad_norm": 3.2087558642186074, "learning_rate": 1.5478143518328193e-05, "loss": 0.5871, "step": 7139 }, { "epoch": 1.008189776899181, "grad_norm": 3.287248272842225, "learning_rate": 1.5476868097447586e-05, "loss": 0.6136, "step": 7140 }, { "epoch": 1.0083309799491669, "grad_norm": 3.259776838327227, "learning_rate": 1.547559254928693e-05, "loss": 0.5697, "step": 7141 }, { "epoch": 1.0084721829991528, "grad_norm": 4.102370229241689, "learning_rate": 1.5474316873875864e-05, "loss": 0.7838, "step": 7142 }, { "epoch": 1.0086133860491386, "grad_norm": 2.8569125522854226, "learning_rate": 1.547304107124404e-05, "loss": 0.4813, "step": 7143 }, { "epoch": 1.0087545890991245, "grad_norm": 2.5652592701540424, "learning_rate": 1.547176514142111e-05, "loss": 0.4166, "step": 7144 }, { "epoch": 1.0088957921491104, "grad_norm": 3.462431804523475, "learning_rate": 1.5470489084436717e-05, "loss": 0.5403, "step": 7145 }, { "epoch": 1.0090369951990963, "grad_norm": 2.9980376814615535, "learning_rate": 1.546921290032052e-05, "loss": 0.5405, "step": 7146 }, { "epoch": 1.0091781982490822, "grad_norm": 2.7360981449880786, "learning_rate": 1.546793658910218e-05, "loss": 0.481, "step": 7147 }, { "epoch": 1.009319401299068, "grad_norm": 3.5193793960579236, "learning_rate": 1.546666015081135e-05, "loss": 0.5286, "step": 7148 }, { "epoch": 1.009460604349054, "grad_norm": 2.9696808594793103, "learning_rate": 1.54653835854777e-05, "loss": 0.5117, "step": 7149 }, { "epoch": 1.0096018073990398, "grad_norm": 3.4997153037921587, "learning_rate": 1.5464106893130896e-05, "loss": 0.5732, "step": 7150 }, { "epoch": 1.0097430104490257, "grad_norm": 3.341477600524824, "learning_rate": 1.546283007380061e-05, "loss": 0.6164, "step": 7151 }, { "epoch": 1.0098842134990116, "grad_norm": 3.76128168957887, "learning_rate": 1.546155312751651e-05, "loss": 0.7862, "step": 7152 }, { "epoch": 1.0100254165489975, "grad_norm": 3.1644100175201846, "learning_rate": 1.5460276054308276e-05, "loss": 0.4801, "step": 7153 }, { "epoch": 1.0101666195989834, "grad_norm": 3.9111152938157567, "learning_rate": 1.5458998854205585e-05, "loss": 0.5954, "step": 7154 }, { "epoch": 1.0103078226489692, "grad_norm": 3.524662838161592, "learning_rate": 1.5457721527238118e-05, "loss": 0.632, "step": 7155 }, { "epoch": 1.0104490256989551, "grad_norm": 3.536214003037081, "learning_rate": 1.5456444073435558e-05, "loss": 0.6758, "step": 7156 }, { "epoch": 1.010590228748941, "grad_norm": 2.7940536998714016, "learning_rate": 1.5455166492827595e-05, "loss": 0.4743, "step": 7157 }, { "epoch": 1.0107314317989269, "grad_norm": 4.292083860781415, "learning_rate": 1.5453888785443916e-05, "loss": 0.6942, "step": 7158 }, { "epoch": 1.0108726348489128, "grad_norm": 3.2850495285187518, "learning_rate": 1.545261095131422e-05, "loss": 0.5796, "step": 7159 }, { "epoch": 1.0110138378988986, "grad_norm": 2.8636545546911387, "learning_rate": 1.5451332990468202e-05, "loss": 0.5915, "step": 7160 }, { "epoch": 1.0111550409488845, "grad_norm": 2.9936729852413277, "learning_rate": 1.5450054902935557e-05, "loss": 0.4969, "step": 7161 }, { "epoch": 1.0112962439988704, "grad_norm": 3.2457835319418686, "learning_rate": 1.544877668874599e-05, "loss": 0.5392, "step": 7162 }, { "epoch": 1.0114374470488563, "grad_norm": 3.4225143144801433, "learning_rate": 1.5447498347929207e-05, "loss": 0.5603, "step": 7163 }, { "epoch": 1.0115786500988422, "grad_norm": 4.159376686751067, "learning_rate": 1.5446219880514913e-05, "loss": 0.6187, "step": 7164 }, { "epoch": 1.011719853148828, "grad_norm": 2.858373468245726, "learning_rate": 1.5444941286532822e-05, "loss": 0.5637, "step": 7165 }, { "epoch": 1.011861056198814, "grad_norm": 3.4141400901650454, "learning_rate": 1.5443662566012645e-05, "loss": 0.6489, "step": 7166 }, { "epoch": 1.0120022592487998, "grad_norm": 3.117809639145722, "learning_rate": 1.5442383718984103e-05, "loss": 0.5875, "step": 7167 }, { "epoch": 1.0121434622987857, "grad_norm": 3.198548540607094, "learning_rate": 1.5441104745476913e-05, "loss": 0.6765, "step": 7168 }, { "epoch": 1.0122846653487716, "grad_norm": 3.9195734291345077, "learning_rate": 1.54398256455208e-05, "loss": 0.6523, "step": 7169 }, { "epoch": 1.0124258683987575, "grad_norm": 4.480506027388131, "learning_rate": 1.543854641914549e-05, "loss": 0.8599, "step": 7170 }, { "epoch": 1.0125670714487434, "grad_norm": 3.188886217596178, "learning_rate": 1.5437267066380707e-05, "loss": 0.5689, "step": 7171 }, { "epoch": 1.0127082744987292, "grad_norm": 4.089190761618454, "learning_rate": 1.5435987587256183e-05, "loss": 0.6878, "step": 7172 }, { "epoch": 1.0128494775487151, "grad_norm": 2.63684320387964, "learning_rate": 1.5434707981801658e-05, "loss": 0.473, "step": 7173 }, { "epoch": 1.012990680598701, "grad_norm": 4.241732185816439, "learning_rate": 1.543342825004687e-05, "loss": 0.55, "step": 7174 }, { "epoch": 1.013131883648687, "grad_norm": 4.152430106655621, "learning_rate": 1.543214839202155e-05, "loss": 0.8218, "step": 7175 }, { "epoch": 1.0132730866986728, "grad_norm": 4.078382509198113, "learning_rate": 1.543086840775545e-05, "loss": 0.6177, "step": 7176 }, { "epoch": 1.0134142897486587, "grad_norm": 4.1965713488735625, "learning_rate": 1.5429588297278315e-05, "loss": 0.7043, "step": 7177 }, { "epoch": 1.0135554927986445, "grad_norm": 4.154017591111017, "learning_rate": 1.5428308060619893e-05, "loss": 0.6683, "step": 7178 }, { "epoch": 1.0136966958486304, "grad_norm": 2.835390021926995, "learning_rate": 1.542702769780993e-05, "loss": 0.5195, "step": 7179 }, { "epoch": 1.0138378988986163, "grad_norm": 2.7114279801533607, "learning_rate": 1.5425747208878195e-05, "loss": 0.6356, "step": 7180 }, { "epoch": 1.0139791019486022, "grad_norm": 2.9185334066299573, "learning_rate": 1.542446659385443e-05, "loss": 0.5524, "step": 7181 }, { "epoch": 1.014120304998588, "grad_norm": 2.9038994343500852, "learning_rate": 1.542318585276841e-05, "loss": 0.5155, "step": 7182 }, { "epoch": 1.014261508048574, "grad_norm": 2.8204767960182187, "learning_rate": 1.5421904985649892e-05, "loss": 0.5502, "step": 7183 }, { "epoch": 1.0144027110985596, "grad_norm": 3.4982689180468682, "learning_rate": 1.5420623992528643e-05, "loss": 0.6886, "step": 7184 }, { "epoch": 1.0145439141485455, "grad_norm": 2.913587096173661, "learning_rate": 1.5419342873434434e-05, "loss": 0.5358, "step": 7185 }, { "epoch": 1.0146851171985314, "grad_norm": 2.837906748701576, "learning_rate": 1.5418061628397037e-05, "loss": 0.4769, "step": 7186 }, { "epoch": 1.0148263202485173, "grad_norm": 3.2292989920337933, "learning_rate": 1.5416780257446228e-05, "loss": 0.5166, "step": 7187 }, { "epoch": 1.0149675232985031, "grad_norm": 3.257126757116606, "learning_rate": 1.541549876061178e-05, "loss": 0.7386, "step": 7188 }, { "epoch": 1.015108726348489, "grad_norm": 3.7739124521570604, "learning_rate": 1.5414217137923485e-05, "loss": 0.6164, "step": 7189 }, { "epoch": 1.015249929398475, "grad_norm": 3.324963879021115, "learning_rate": 1.5412935389411124e-05, "loss": 0.54, "step": 7190 }, { "epoch": 1.0153911324484608, "grad_norm": 3.1652886655740944, "learning_rate": 1.5411653515104478e-05, "loss": 0.6177, "step": 7191 }, { "epoch": 1.0155323354984467, "grad_norm": 3.2735112766824805, "learning_rate": 1.5410371515033343e-05, "loss": 0.5501, "step": 7192 }, { "epoch": 1.0156735385484326, "grad_norm": 3.2336830257198943, "learning_rate": 1.540908938922751e-05, "loss": 0.7034, "step": 7193 }, { "epoch": 1.0158147415984184, "grad_norm": 4.125005116817651, "learning_rate": 1.5407807137716774e-05, "loss": 0.74, "step": 7194 }, { "epoch": 1.0159559446484043, "grad_norm": 3.2621598285542133, "learning_rate": 1.540652476053094e-05, "loss": 0.5576, "step": 7195 }, { "epoch": 1.0160971476983902, "grad_norm": 3.270942217060861, "learning_rate": 1.54052422576998e-05, "loss": 0.6965, "step": 7196 }, { "epoch": 1.016238350748376, "grad_norm": 3.906316014897361, "learning_rate": 1.5403959629253168e-05, "loss": 0.6746, "step": 7197 }, { "epoch": 1.016379553798362, "grad_norm": 3.1450405430365076, "learning_rate": 1.5402676875220847e-05, "loss": 0.6263, "step": 7198 }, { "epoch": 1.0165207568483479, "grad_norm": 3.4371348945556703, "learning_rate": 1.540139399563265e-05, "loss": 0.5828, "step": 7199 }, { "epoch": 1.0166619598983337, "grad_norm": 3.380551872934823, "learning_rate": 1.5400110990518386e-05, "loss": 0.6005, "step": 7200 }, { "epoch": 1.0168031629483196, "grad_norm": 4.59773651229296, "learning_rate": 1.5398827859907878e-05, "loss": 0.6946, "step": 7201 }, { "epoch": 1.0169443659983055, "grad_norm": 3.22909432774919, "learning_rate": 1.539754460383094e-05, "loss": 0.5719, "step": 7202 }, { "epoch": 1.0170855690482914, "grad_norm": 3.273546621398694, "learning_rate": 1.5396261222317397e-05, "loss": 0.5007, "step": 7203 }, { "epoch": 1.0172267720982773, "grad_norm": 2.986950310765524, "learning_rate": 1.5394977715397073e-05, "loss": 0.4956, "step": 7204 }, { "epoch": 1.0173679751482632, "grad_norm": 3.371032263679071, "learning_rate": 1.53936940830998e-05, "loss": 0.6042, "step": 7205 }, { "epoch": 1.017509178198249, "grad_norm": 3.279595037467894, "learning_rate": 1.5392410325455397e-05, "loss": 0.4924, "step": 7206 }, { "epoch": 1.017650381248235, "grad_norm": 3.1948154854260027, "learning_rate": 1.5391126442493715e-05, "loss": 0.5885, "step": 7207 }, { "epoch": 1.0177915842982208, "grad_norm": 4.448682306313843, "learning_rate": 1.538984243424458e-05, "loss": 0.9346, "step": 7208 }, { "epoch": 1.0179327873482067, "grad_norm": 3.406259540168245, "learning_rate": 1.538855830073784e-05, "loss": 0.6966, "step": 7209 }, { "epoch": 1.0180739903981926, "grad_norm": 3.567194703612795, "learning_rate": 1.5387274042003327e-05, "loss": 0.5877, "step": 7210 }, { "epoch": 1.0182151934481785, "grad_norm": 3.5516149059926363, "learning_rate": 1.538598965807089e-05, "loss": 0.5406, "step": 7211 }, { "epoch": 1.0183563964981643, "grad_norm": 4.206829421788801, "learning_rate": 1.5384705148970384e-05, "loss": 0.8361, "step": 7212 }, { "epoch": 1.0184975995481502, "grad_norm": 3.571647875827525, "learning_rate": 1.5383420514731653e-05, "loss": 0.6364, "step": 7213 }, { "epoch": 1.018638802598136, "grad_norm": 3.140850042554657, "learning_rate": 1.5382135755384554e-05, "loss": 0.5521, "step": 7214 }, { "epoch": 1.018780005648122, "grad_norm": 2.650281639448247, "learning_rate": 1.5380850870958945e-05, "loss": 0.505, "step": 7215 }, { "epoch": 1.0189212086981079, "grad_norm": 4.009481058679087, "learning_rate": 1.537956586148469e-05, "loss": 0.6736, "step": 7216 }, { "epoch": 1.0190624117480938, "grad_norm": 4.978755602432865, "learning_rate": 1.5378280726991638e-05, "loss": 0.6229, "step": 7217 }, { "epoch": 1.0192036147980796, "grad_norm": 3.703231219726251, "learning_rate": 1.5376995467509673e-05, "loss": 0.6891, "step": 7218 }, { "epoch": 1.0193448178480655, "grad_norm": 3.174865875069421, "learning_rate": 1.5375710083068653e-05, "loss": 0.5142, "step": 7219 }, { "epoch": 1.0194860208980514, "grad_norm": 3.1950152032541554, "learning_rate": 1.5374424573698453e-05, "loss": 0.4462, "step": 7220 }, { "epoch": 1.0196272239480373, "grad_norm": 3.362487248111589, "learning_rate": 1.5373138939428945e-05, "loss": 0.4778, "step": 7221 }, { "epoch": 1.0197684269980232, "grad_norm": 3.519852340488302, "learning_rate": 1.537185318029001e-05, "loss": 0.5585, "step": 7222 }, { "epoch": 1.019909630048009, "grad_norm": 4.173641963727146, "learning_rate": 1.5370567296311523e-05, "loss": 0.7462, "step": 7223 }, { "epoch": 1.020050833097995, "grad_norm": 4.086431851669458, "learning_rate": 1.5369281287523376e-05, "loss": 0.6723, "step": 7224 }, { "epoch": 1.0201920361479808, "grad_norm": 3.7946270213518147, "learning_rate": 1.5367995153955447e-05, "loss": 0.5936, "step": 7225 }, { "epoch": 1.0203332391979667, "grad_norm": 4.048652782544324, "learning_rate": 1.536670889563763e-05, "loss": 0.7304, "step": 7226 }, { "epoch": 1.0204744422479526, "grad_norm": 3.4333234946165523, "learning_rate": 1.536542251259982e-05, "loss": 0.5696, "step": 7227 }, { "epoch": 1.0206156452979385, "grad_norm": 3.2227697470408145, "learning_rate": 1.5364136004871906e-05, "loss": 0.5911, "step": 7228 }, { "epoch": 1.0207568483479244, "grad_norm": 4.563373354093314, "learning_rate": 1.5362849372483788e-05, "loss": 0.5358, "step": 7229 }, { "epoch": 1.0208980513979102, "grad_norm": 4.292529823963489, "learning_rate": 1.5361562615465366e-05, "loss": 0.7066, "step": 7230 }, { "epoch": 1.0210392544478961, "grad_norm": 3.274679871027537, "learning_rate": 1.536027573384654e-05, "loss": 0.5416, "step": 7231 }, { "epoch": 1.021180457497882, "grad_norm": 3.423583157734008, "learning_rate": 1.5358988727657227e-05, "loss": 0.6501, "step": 7232 }, { "epoch": 1.0213216605478679, "grad_norm": 3.4807870684669884, "learning_rate": 1.535770159692733e-05, "loss": 0.6156, "step": 7233 }, { "epoch": 1.0214628635978538, "grad_norm": 3.2780120296300304, "learning_rate": 1.5356414341686758e-05, "loss": 0.6189, "step": 7234 }, { "epoch": 1.0216040666478396, "grad_norm": 4.3423227163829345, "learning_rate": 1.535512696196543e-05, "loss": 0.639, "step": 7235 }, { "epoch": 1.0217452696978255, "grad_norm": 3.6362748793726305, "learning_rate": 1.535383945779327e-05, "loss": 0.5801, "step": 7236 }, { "epoch": 1.0218864727478114, "grad_norm": 4.277821769871563, "learning_rate": 1.5352551829200185e-05, "loss": 0.7147, "step": 7237 }, { "epoch": 1.0220276757977973, "grad_norm": 3.587534190319147, "learning_rate": 1.5351264076216114e-05, "loss": 0.6588, "step": 7238 }, { "epoch": 1.0221688788477832, "grad_norm": 3.8270448928798455, "learning_rate": 1.5349976198870974e-05, "loss": 0.6664, "step": 7239 }, { "epoch": 1.022310081897769, "grad_norm": 3.728377692383585, "learning_rate": 1.5348688197194696e-05, "loss": 0.5461, "step": 7240 }, { "epoch": 1.022451284947755, "grad_norm": 3.5497920726089176, "learning_rate": 1.5347400071217217e-05, "loss": 0.6393, "step": 7241 }, { "epoch": 1.0225924879977408, "grad_norm": 3.2256845115960955, "learning_rate": 1.534611182096847e-05, "loss": 0.5814, "step": 7242 }, { "epoch": 1.0227336910477267, "grad_norm": 3.7531103641381476, "learning_rate": 1.534482344647839e-05, "loss": 0.5902, "step": 7243 }, { "epoch": 1.0228748940977126, "grad_norm": 4.223739103111618, "learning_rate": 1.5343534947776924e-05, "loss": 0.6904, "step": 7244 }, { "epoch": 1.0230160971476985, "grad_norm": 3.225214758808627, "learning_rate": 1.534224632489401e-05, "loss": 0.6206, "step": 7245 }, { "epoch": 1.0231573001976844, "grad_norm": 3.1678050809306115, "learning_rate": 1.5340957577859605e-05, "loss": 0.5058, "step": 7246 }, { "epoch": 1.0232985032476702, "grad_norm": 3.058297180136965, "learning_rate": 1.5339668706703648e-05, "loss": 0.5962, "step": 7247 }, { "epoch": 1.0234397062976561, "grad_norm": 4.836654556181267, "learning_rate": 1.5338379711456096e-05, "loss": 0.9196, "step": 7248 }, { "epoch": 1.023580909347642, "grad_norm": 3.9013312387310672, "learning_rate": 1.5337090592146905e-05, "loss": 0.7425, "step": 7249 }, { "epoch": 1.023722112397628, "grad_norm": 3.7705050288015842, "learning_rate": 1.5335801348806036e-05, "loss": 0.6462, "step": 7250 }, { "epoch": 1.0238633154476138, "grad_norm": 3.7974687868876504, "learning_rate": 1.5334511981463446e-05, "loss": 0.655, "step": 7251 }, { "epoch": 1.0240045184975997, "grad_norm": 3.7551405416541037, "learning_rate": 1.53332224901491e-05, "loss": 0.7087, "step": 7252 }, { "epoch": 1.0241457215475853, "grad_norm": 3.4978244549280126, "learning_rate": 1.533193287489297e-05, "loss": 0.5627, "step": 7253 }, { "epoch": 1.0242869245975712, "grad_norm": 3.3166735054023078, "learning_rate": 1.5330643135725022e-05, "loss": 0.58, "step": 7254 }, { "epoch": 1.024428127647557, "grad_norm": 3.193471574925442, "learning_rate": 1.5329353272675228e-05, "loss": 0.5478, "step": 7255 }, { "epoch": 1.024569330697543, "grad_norm": 2.8375410707470463, "learning_rate": 1.5328063285773567e-05, "loss": 0.474, "step": 7256 }, { "epoch": 1.0247105337475289, "grad_norm": 3.9131124370848362, "learning_rate": 1.532677317505001e-05, "loss": 0.6844, "step": 7257 }, { "epoch": 1.0248517367975147, "grad_norm": 3.585885233528202, "learning_rate": 1.5325482940534554e-05, "loss": 0.5834, "step": 7258 }, { "epoch": 1.0249929398475006, "grad_norm": 2.6430576386991027, "learning_rate": 1.5324192582257173e-05, "loss": 0.4776, "step": 7259 }, { "epoch": 1.0251341428974865, "grad_norm": 3.921039976637262, "learning_rate": 1.532290210024785e-05, "loss": 0.8692, "step": 7260 }, { "epoch": 1.0252753459474724, "grad_norm": 2.9708415446190934, "learning_rate": 1.5321611494536587e-05, "loss": 0.5737, "step": 7261 }, { "epoch": 1.0254165489974583, "grad_norm": 5.023971930389562, "learning_rate": 1.5320320765153367e-05, "loss": 0.8844, "step": 7262 }, { "epoch": 1.0255577520474441, "grad_norm": 3.5578084152084255, "learning_rate": 1.531902991212819e-05, "loss": 0.7231, "step": 7263 }, { "epoch": 1.02569895509743, "grad_norm": 3.1660480012546515, "learning_rate": 1.531773893549106e-05, "loss": 0.5439, "step": 7264 }, { "epoch": 1.025840158147416, "grad_norm": 3.0703355996479784, "learning_rate": 1.531644783527197e-05, "loss": 0.5848, "step": 7265 }, { "epoch": 1.0259813611974018, "grad_norm": 3.295676235539044, "learning_rate": 1.5315156611500927e-05, "loss": 0.6646, "step": 7266 }, { "epoch": 1.0261225642473877, "grad_norm": 3.136151365208178, "learning_rate": 1.531386526420794e-05, "loss": 0.5156, "step": 7267 }, { "epoch": 1.0262637672973736, "grad_norm": 3.4156687526260168, "learning_rate": 1.531257379342302e-05, "loss": 0.6494, "step": 7268 }, { "epoch": 1.0264049703473594, "grad_norm": 3.495032888541256, "learning_rate": 1.531128219917618e-05, "loss": 0.7187, "step": 7269 }, { "epoch": 1.0265461733973453, "grad_norm": 3.1638700070585233, "learning_rate": 1.5309990481497438e-05, "loss": 0.654, "step": 7270 }, { "epoch": 1.0266873764473312, "grad_norm": 3.660734305614395, "learning_rate": 1.5308698640416806e-05, "loss": 0.8277, "step": 7271 }, { "epoch": 1.026828579497317, "grad_norm": 3.377396052765143, "learning_rate": 1.5307406675964315e-05, "loss": 0.6784, "step": 7272 }, { "epoch": 1.026969782547303, "grad_norm": 4.317938885818584, "learning_rate": 1.530611458816998e-05, "loss": 0.5993, "step": 7273 }, { "epoch": 1.0271109855972889, "grad_norm": 2.859528187443678, "learning_rate": 1.530482237706383e-05, "loss": 0.5508, "step": 7274 }, { "epoch": 1.0272521886472747, "grad_norm": 3.312074133834732, "learning_rate": 1.5303530042675907e-05, "loss": 0.6926, "step": 7275 }, { "epoch": 1.0273933916972606, "grad_norm": 3.2755348681257628, "learning_rate": 1.5302237585036236e-05, "loss": 0.6721, "step": 7276 }, { "epoch": 1.0275345947472465, "grad_norm": 3.1054530640157987, "learning_rate": 1.530094500417485e-05, "loss": 0.5919, "step": 7277 }, { "epoch": 1.0276757977972324, "grad_norm": 3.1320750140093367, "learning_rate": 1.5299652300121792e-05, "loss": 0.5834, "step": 7278 }, { "epoch": 1.0278170008472183, "grad_norm": 3.1947610046654904, "learning_rate": 1.5298359472907104e-05, "loss": 0.5835, "step": 7279 }, { "epoch": 1.0279582038972042, "grad_norm": 2.6395241884951655, "learning_rate": 1.529706652256083e-05, "loss": 0.4841, "step": 7280 }, { "epoch": 1.02809940694719, "grad_norm": 2.925001823496966, "learning_rate": 1.5295773449113016e-05, "loss": 0.5507, "step": 7281 }, { "epoch": 1.028240609997176, "grad_norm": 3.4967412288957114, "learning_rate": 1.5294480252593718e-05, "loss": 0.6396, "step": 7282 }, { "epoch": 1.0283818130471618, "grad_norm": 3.565400184956848, "learning_rate": 1.529318693303298e-05, "loss": 0.6525, "step": 7283 }, { "epoch": 1.0285230160971477, "grad_norm": 3.209365241740629, "learning_rate": 1.5291893490460868e-05, "loss": 0.6448, "step": 7284 }, { "epoch": 1.0286642191471336, "grad_norm": 3.259156534328898, "learning_rate": 1.5290599924907435e-05, "loss": 0.7105, "step": 7285 }, { "epoch": 1.0288054221971195, "grad_norm": 3.0491678078259157, "learning_rate": 1.5289306236402744e-05, "loss": 0.5773, "step": 7286 }, { "epoch": 1.0289466252471053, "grad_norm": 4.0170831717097935, "learning_rate": 1.5288012424976863e-05, "loss": 0.6881, "step": 7287 }, { "epoch": 1.0290878282970912, "grad_norm": 3.573003843544896, "learning_rate": 1.5286718490659854e-05, "loss": 0.6259, "step": 7288 }, { "epoch": 1.029229031347077, "grad_norm": 4.094586919145629, "learning_rate": 1.528542443348179e-05, "loss": 0.7148, "step": 7289 }, { "epoch": 1.029370234397063, "grad_norm": 2.6475493034602207, "learning_rate": 1.5284130253472746e-05, "loss": 0.4657, "step": 7290 }, { "epoch": 1.0295114374470489, "grad_norm": 3.2796552402351766, "learning_rate": 1.5282835950662798e-05, "loss": 0.6006, "step": 7291 }, { "epoch": 1.0296526404970348, "grad_norm": 3.614147371898756, "learning_rate": 1.5281541525082024e-05, "loss": 0.6198, "step": 7292 }, { "epoch": 1.0297938435470206, "grad_norm": 2.965731221069141, "learning_rate": 1.5280246976760508e-05, "loss": 0.5667, "step": 7293 }, { "epoch": 1.0299350465970065, "grad_norm": 3.8603808823558214, "learning_rate": 1.5278952305728325e-05, "loss": 0.7842, "step": 7294 }, { "epoch": 1.0300762496469924, "grad_norm": 3.7078664476820005, "learning_rate": 1.5277657512015577e-05, "loss": 0.7197, "step": 7295 }, { "epoch": 1.0302174526969783, "grad_norm": 2.7619158550215768, "learning_rate": 1.5276362595652347e-05, "loss": 0.5684, "step": 7296 }, { "epoch": 1.0303586557469642, "grad_norm": 3.2070022450375912, "learning_rate": 1.5275067556668727e-05, "loss": 0.5771, "step": 7297 }, { "epoch": 1.03049985879695, "grad_norm": 3.882799678914467, "learning_rate": 1.5273772395094814e-05, "loss": 0.7435, "step": 7298 }, { "epoch": 1.030641061846936, "grad_norm": 2.89210012233448, "learning_rate": 1.527247711096071e-05, "loss": 0.488, "step": 7299 }, { "epoch": 1.0307822648969218, "grad_norm": 3.3600617808852546, "learning_rate": 1.5271181704296513e-05, "loss": 0.62, "step": 7300 }, { "epoch": 1.0309234679469077, "grad_norm": 3.353807355954199, "learning_rate": 1.5269886175132335e-05, "loss": 0.5732, "step": 7301 }, { "epoch": 1.0310646709968936, "grad_norm": 4.382129451332229, "learning_rate": 1.526859052349827e-05, "loss": 0.6818, "step": 7302 }, { "epoch": 1.0312058740468795, "grad_norm": 3.047849373570398, "learning_rate": 1.526729474942444e-05, "loss": 0.5679, "step": 7303 }, { "epoch": 1.0313470770968654, "grad_norm": 3.4644139597527848, "learning_rate": 1.526599885294096e-05, "loss": 0.6065, "step": 7304 }, { "epoch": 1.0314882801468512, "grad_norm": 3.3240762818062533, "learning_rate": 1.5264702834077936e-05, "loss": 0.6895, "step": 7305 }, { "epoch": 1.0316294831968371, "grad_norm": 4.379825786643765, "learning_rate": 1.5263406692865494e-05, "loss": 0.7635, "step": 7306 }, { "epoch": 1.031770686246823, "grad_norm": 4.522413688316297, "learning_rate": 1.5262110429333752e-05, "loss": 0.8144, "step": 7307 }, { "epoch": 1.0319118892968089, "grad_norm": 2.587812204245488, "learning_rate": 1.5260814043512838e-05, "loss": 0.3828, "step": 7308 }, { "epoch": 1.0320530923467948, "grad_norm": 3.412593582122455, "learning_rate": 1.5259517535432875e-05, "loss": 0.655, "step": 7309 }, { "epoch": 1.0321942953967806, "grad_norm": 4.125467476664053, "learning_rate": 1.5258220905123997e-05, "loss": 0.8434, "step": 7310 }, { "epoch": 1.0323354984467665, "grad_norm": 3.7093074851905286, "learning_rate": 1.5256924152616333e-05, "loss": 0.6841, "step": 7311 }, { "epoch": 1.0324767014967524, "grad_norm": 3.2010603443376473, "learning_rate": 1.5255627277940023e-05, "loss": 0.5965, "step": 7312 }, { "epoch": 1.0326179045467383, "grad_norm": 4.22465757319989, "learning_rate": 1.525433028112521e-05, "loss": 0.824, "step": 7313 }, { "epoch": 1.0327591075967242, "grad_norm": 3.2734729026579323, "learning_rate": 1.5253033162202027e-05, "loss": 0.6886, "step": 7314 }, { "epoch": 1.03290031064671, "grad_norm": 3.5567594742369373, "learning_rate": 1.5251735921200622e-05, "loss": 0.625, "step": 7315 }, { "epoch": 1.033041513696696, "grad_norm": 2.9969436988915064, "learning_rate": 1.5250438558151142e-05, "loss": 0.4931, "step": 7316 }, { "epoch": 1.0331827167466818, "grad_norm": 3.149951444129728, "learning_rate": 1.5249141073083732e-05, "loss": 0.5772, "step": 7317 }, { "epoch": 1.0333239197966677, "grad_norm": 3.4615132257074674, "learning_rate": 1.524784346602856e-05, "loss": 0.5361, "step": 7318 }, { "epoch": 1.0334651228466536, "grad_norm": 3.843233333150441, "learning_rate": 1.524654573701577e-05, "loss": 0.543, "step": 7319 }, { "epoch": 1.0336063258966393, "grad_norm": 4.812809753533892, "learning_rate": 1.5245247886075518e-05, "loss": 0.805, "step": 7320 }, { "epoch": 1.0337475289466251, "grad_norm": 5.957928965738149, "learning_rate": 1.5243949913237975e-05, "loss": 0.8411, "step": 7321 }, { "epoch": 1.033888731996611, "grad_norm": 3.1164368800921554, "learning_rate": 1.5242651818533299e-05, "loss": 0.5108, "step": 7322 }, { "epoch": 1.034029935046597, "grad_norm": 3.9059937911442075, "learning_rate": 1.524135360199166e-05, "loss": 0.7906, "step": 7323 }, { "epoch": 1.0341711380965828, "grad_norm": 3.7424998217122036, "learning_rate": 1.5240055263643223e-05, "loss": 0.6847, "step": 7324 }, { "epoch": 1.0343123411465687, "grad_norm": 3.3131562789859736, "learning_rate": 1.5238756803518168e-05, "loss": 0.5221, "step": 7325 }, { "epoch": 1.0344535441965546, "grad_norm": 3.7647655097323476, "learning_rate": 1.5237458221646668e-05, "loss": 0.5773, "step": 7326 }, { "epoch": 1.0345947472465404, "grad_norm": 2.9186807654508433, "learning_rate": 1.5236159518058899e-05, "loss": 0.5646, "step": 7327 }, { "epoch": 1.0347359502965263, "grad_norm": 4.1361126685425145, "learning_rate": 1.5234860692785045e-05, "loss": 0.8542, "step": 7328 }, { "epoch": 1.0348771533465122, "grad_norm": 3.55142634764309, "learning_rate": 1.523356174585529e-05, "loss": 0.5493, "step": 7329 }, { "epoch": 1.035018356396498, "grad_norm": 3.64123543347846, "learning_rate": 1.5232262677299816e-05, "loss": 0.623, "step": 7330 }, { "epoch": 1.035159559446484, "grad_norm": 4.032997030393602, "learning_rate": 1.5230963487148822e-05, "loss": 0.6619, "step": 7331 }, { "epoch": 1.0353007624964699, "grad_norm": 4.371134730516989, "learning_rate": 1.5229664175432494e-05, "loss": 0.7491, "step": 7332 }, { "epoch": 1.0354419655464557, "grad_norm": 3.751046878987232, "learning_rate": 1.522836474218103e-05, "loss": 0.7113, "step": 7333 }, { "epoch": 1.0355831685964416, "grad_norm": 3.0360370045837675, "learning_rate": 1.5227065187424623e-05, "loss": 0.5548, "step": 7334 }, { "epoch": 1.0357243716464275, "grad_norm": 3.7096994522360682, "learning_rate": 1.5225765511193484e-05, "loss": 0.5859, "step": 7335 }, { "epoch": 1.0358655746964134, "grad_norm": 2.961591621264662, "learning_rate": 1.5224465713517811e-05, "loss": 0.5144, "step": 7336 }, { "epoch": 1.0360067777463993, "grad_norm": 3.5748022403040385, "learning_rate": 1.5223165794427806e-05, "loss": 0.6136, "step": 7337 }, { "epoch": 1.0361479807963851, "grad_norm": 3.8212245714036444, "learning_rate": 1.522186575395369e-05, "loss": 0.706, "step": 7338 }, { "epoch": 1.036289183846371, "grad_norm": 3.107151356138247, "learning_rate": 1.5220565592125667e-05, "loss": 0.6219, "step": 7339 }, { "epoch": 1.036430386896357, "grad_norm": 3.385505739314959, "learning_rate": 1.5219265308973952e-05, "loss": 0.6232, "step": 7340 }, { "epoch": 1.0365715899463428, "grad_norm": 3.256543567810861, "learning_rate": 1.5217964904528763e-05, "loss": 0.5081, "step": 7341 }, { "epoch": 1.0367127929963287, "grad_norm": 3.072779340396042, "learning_rate": 1.5216664378820327e-05, "loss": 0.5476, "step": 7342 }, { "epoch": 1.0368539960463146, "grad_norm": 4.128101825656565, "learning_rate": 1.5215363731878864e-05, "loss": 0.7562, "step": 7343 }, { "epoch": 1.0369951990963004, "grad_norm": 4.467909805898148, "learning_rate": 1.5214062963734599e-05, "loss": 0.8103, "step": 7344 }, { "epoch": 1.0371364021462863, "grad_norm": 3.8224830887596326, "learning_rate": 1.5212762074417766e-05, "loss": 0.632, "step": 7345 }, { "epoch": 1.0372776051962722, "grad_norm": 3.3097726839248285, "learning_rate": 1.5211461063958589e-05, "loss": 0.5466, "step": 7346 }, { "epoch": 1.037418808246258, "grad_norm": 3.651826705840981, "learning_rate": 1.5210159932387307e-05, "loss": 0.6395, "step": 7347 }, { "epoch": 1.037560011296244, "grad_norm": 3.211677103404582, "learning_rate": 1.5208858679734161e-05, "loss": 0.5243, "step": 7348 }, { "epoch": 1.0377012143462299, "grad_norm": 3.1099727950799663, "learning_rate": 1.5207557306029391e-05, "loss": 0.6373, "step": 7349 }, { "epoch": 1.0378424173962157, "grad_norm": 3.6614921879530997, "learning_rate": 1.5206255811303235e-05, "loss": 0.7319, "step": 7350 }, { "epoch": 1.0379836204462016, "grad_norm": 3.392348105198865, "learning_rate": 1.5204954195585942e-05, "loss": 0.5851, "step": 7351 }, { "epoch": 1.0381248234961875, "grad_norm": 3.641975669685366, "learning_rate": 1.5203652458907763e-05, "loss": 0.5136, "step": 7352 }, { "epoch": 1.0382660265461734, "grad_norm": 3.6645895624703386, "learning_rate": 1.5202350601298945e-05, "loss": 0.5675, "step": 7353 }, { "epoch": 1.0384072295961593, "grad_norm": 3.9729202018819234, "learning_rate": 1.5201048622789747e-05, "loss": 0.7805, "step": 7354 }, { "epoch": 1.0385484326461452, "grad_norm": 3.4089403946127876, "learning_rate": 1.5199746523410425e-05, "loss": 0.5358, "step": 7355 }, { "epoch": 1.038689635696131, "grad_norm": 3.8058085593983733, "learning_rate": 1.5198444303191244e-05, "loss": 0.7142, "step": 7356 }, { "epoch": 1.038830838746117, "grad_norm": 4.033607681245991, "learning_rate": 1.5197141962162456e-05, "loss": 0.816, "step": 7357 }, { "epoch": 1.0389720417961028, "grad_norm": 3.4404088652922686, "learning_rate": 1.5195839500354337e-05, "loss": 0.5964, "step": 7358 }, { "epoch": 1.0391132448460887, "grad_norm": 3.1172181180661904, "learning_rate": 1.5194536917797151e-05, "loss": 0.4997, "step": 7359 }, { "epoch": 1.0392544478960746, "grad_norm": 3.689935224655931, "learning_rate": 1.519323421452117e-05, "loss": 0.6379, "step": 7360 }, { "epoch": 1.0393956509460605, "grad_norm": 14.722567003616996, "learning_rate": 1.519193139055667e-05, "loss": 0.5282, "step": 7361 }, { "epoch": 1.0395368539960463, "grad_norm": 4.834506994641452, "learning_rate": 1.5190628445933925e-05, "loss": 0.7767, "step": 7362 }, { "epoch": 1.0396780570460322, "grad_norm": 3.460593472986546, "learning_rate": 1.5189325380683217e-05, "loss": 0.5374, "step": 7363 }, { "epoch": 1.039819260096018, "grad_norm": 3.1707477384831915, "learning_rate": 1.5188022194834831e-05, "loss": 0.5414, "step": 7364 }, { "epoch": 1.039960463146004, "grad_norm": 3.700137494546321, "learning_rate": 1.5186718888419046e-05, "loss": 0.5391, "step": 7365 }, { "epoch": 1.0401016661959899, "grad_norm": 4.028904853731261, "learning_rate": 1.5185415461466155e-05, "loss": 0.6582, "step": 7366 }, { "epoch": 1.0402428692459758, "grad_norm": 3.1556187236516515, "learning_rate": 1.5184111914006447e-05, "loss": 0.4892, "step": 7367 }, { "epoch": 1.0403840722959616, "grad_norm": 3.44326418341099, "learning_rate": 1.5182808246070222e-05, "loss": 0.6142, "step": 7368 }, { "epoch": 1.0405252753459475, "grad_norm": 4.35190487593451, "learning_rate": 1.5181504457687766e-05, "loss": 0.7644, "step": 7369 }, { "epoch": 1.0406664783959334, "grad_norm": 4.740148420597818, "learning_rate": 1.5180200548889387e-05, "loss": 0.5779, "step": 7370 }, { "epoch": 1.0408076814459193, "grad_norm": 3.7817542090604004, "learning_rate": 1.5178896519705381e-05, "loss": 0.6627, "step": 7371 }, { "epoch": 1.0409488844959052, "grad_norm": 3.728363448316156, "learning_rate": 1.517759237016606e-05, "loss": 0.6701, "step": 7372 }, { "epoch": 1.041090087545891, "grad_norm": 3.2582678725989944, "learning_rate": 1.517628810030173e-05, "loss": 0.6913, "step": 7373 }, { "epoch": 1.041231290595877, "grad_norm": 3.9472504242622124, "learning_rate": 1.5174983710142694e-05, "loss": 0.686, "step": 7374 }, { "epoch": 1.0413724936458628, "grad_norm": 3.8112333912673853, "learning_rate": 1.5173679199719277e-05, "loss": 0.7034, "step": 7375 }, { "epoch": 1.0415136966958487, "grad_norm": 3.9276258746303494, "learning_rate": 1.5172374569061787e-05, "loss": 0.7805, "step": 7376 }, { "epoch": 1.0416548997458346, "grad_norm": 3.169926377723543, "learning_rate": 1.5171069818200548e-05, "loss": 0.5705, "step": 7377 }, { "epoch": 1.0417961027958205, "grad_norm": 3.196572547897355, "learning_rate": 1.5169764947165879e-05, "loss": 0.6439, "step": 7378 }, { "epoch": 1.0419373058458063, "grad_norm": 4.129661497191095, "learning_rate": 1.5168459955988101e-05, "loss": 0.8093, "step": 7379 }, { "epoch": 1.0420785088957922, "grad_norm": 3.0023363654269546, "learning_rate": 1.5167154844697549e-05, "loss": 0.5028, "step": 7380 }, { "epoch": 1.0422197119457781, "grad_norm": 3.12638699450999, "learning_rate": 1.5165849613324552e-05, "loss": 0.5875, "step": 7381 }, { "epoch": 1.042360914995764, "grad_norm": 3.1267380485174043, "learning_rate": 1.5164544261899439e-05, "loss": 0.4964, "step": 7382 }, { "epoch": 1.0425021180457499, "grad_norm": 3.7777611824247437, "learning_rate": 1.5163238790452549e-05, "loss": 0.5903, "step": 7383 }, { "epoch": 1.0426433210957358, "grad_norm": 3.2267364985383207, "learning_rate": 1.5161933199014216e-05, "loss": 0.6329, "step": 7384 }, { "epoch": 1.0427845241457216, "grad_norm": 3.773615944071404, "learning_rate": 1.5160627487614788e-05, "loss": 0.6522, "step": 7385 }, { "epoch": 1.0429257271957075, "grad_norm": 3.3452590788957, "learning_rate": 1.5159321656284602e-05, "loss": 0.66, "step": 7386 }, { "epoch": 1.0430669302456934, "grad_norm": 3.596295699278905, "learning_rate": 1.5158015705054014e-05, "loss": 0.6309, "step": 7387 }, { "epoch": 1.0432081332956793, "grad_norm": 3.7115718986481103, "learning_rate": 1.5156709633953364e-05, "loss": 0.7384, "step": 7388 }, { "epoch": 1.0433493363456652, "grad_norm": 4.216258153471969, "learning_rate": 1.5155403443013011e-05, "loss": 0.691, "step": 7389 }, { "epoch": 1.0434905393956508, "grad_norm": 3.92343916859213, "learning_rate": 1.515409713226331e-05, "loss": 0.652, "step": 7390 }, { "epoch": 1.0436317424456367, "grad_norm": 3.2066603458712497, "learning_rate": 1.5152790701734614e-05, "loss": 0.5989, "step": 7391 }, { "epoch": 1.0437729454956226, "grad_norm": 3.555891824203542, "learning_rate": 1.5151484151457292e-05, "loss": 0.5144, "step": 7392 }, { "epoch": 1.0439141485456085, "grad_norm": 3.3866238029598437, "learning_rate": 1.51501774814617e-05, "loss": 0.6185, "step": 7393 }, { "epoch": 1.0440553515955944, "grad_norm": 5.3269643067542285, "learning_rate": 1.5148870691778208e-05, "loss": 0.7135, "step": 7394 }, { "epoch": 1.0441965546455803, "grad_norm": 2.7763628983341953, "learning_rate": 1.5147563782437184e-05, "loss": 0.4414, "step": 7395 }, { "epoch": 1.0443377576955661, "grad_norm": 4.079596287736071, "learning_rate": 1.5146256753469004e-05, "loss": 0.731, "step": 7396 }, { "epoch": 1.044478960745552, "grad_norm": 3.392449552655842, "learning_rate": 1.5144949604904036e-05, "loss": 0.5996, "step": 7397 }, { "epoch": 1.044620163795538, "grad_norm": 4.4421894997882, "learning_rate": 1.5143642336772663e-05, "loss": 0.734, "step": 7398 }, { "epoch": 1.0447613668455238, "grad_norm": 3.3691930255061298, "learning_rate": 1.5142334949105264e-05, "loss": 0.6903, "step": 7399 }, { "epoch": 1.0449025698955097, "grad_norm": 2.8613528542680813, "learning_rate": 1.5141027441932217e-05, "loss": 0.5719, "step": 7400 }, { "epoch": 1.0450437729454956, "grad_norm": 4.229748555029211, "learning_rate": 1.5139719815283918e-05, "loss": 0.7122, "step": 7401 }, { "epoch": 1.0451849759954814, "grad_norm": 3.7921642466616046, "learning_rate": 1.5138412069190747e-05, "loss": 0.6298, "step": 7402 }, { "epoch": 1.0453261790454673, "grad_norm": 2.960698740254174, "learning_rate": 1.5137104203683101e-05, "loss": 0.4793, "step": 7403 }, { "epoch": 1.0454673820954532, "grad_norm": 3.634729665811572, "learning_rate": 1.513579621879137e-05, "loss": 0.6725, "step": 7404 }, { "epoch": 1.045608585145439, "grad_norm": 3.698706245376152, "learning_rate": 1.5134488114545955e-05, "loss": 0.7789, "step": 7405 }, { "epoch": 1.045749788195425, "grad_norm": 3.2339822521861903, "learning_rate": 1.513317989097725e-05, "loss": 0.5946, "step": 7406 }, { "epoch": 1.0458909912454109, "grad_norm": 2.7188177783373386, "learning_rate": 1.5131871548115665e-05, "loss": 0.4835, "step": 7407 }, { "epoch": 1.0460321942953967, "grad_norm": 3.381232540024849, "learning_rate": 1.5130563085991599e-05, "loss": 0.6565, "step": 7408 }, { "epoch": 1.0461733973453826, "grad_norm": 4.1708425320806075, "learning_rate": 1.5129254504635462e-05, "loss": 0.6589, "step": 7409 }, { "epoch": 1.0463146003953685, "grad_norm": 3.149765447764325, "learning_rate": 1.5127945804077668e-05, "loss": 0.5909, "step": 7410 }, { "epoch": 1.0464558034453544, "grad_norm": 3.195517827037724, "learning_rate": 1.5126636984348627e-05, "loss": 0.5736, "step": 7411 }, { "epoch": 1.0465970064953403, "grad_norm": 3.6864764349453756, "learning_rate": 1.5125328045478755e-05, "loss": 0.659, "step": 7412 }, { "epoch": 1.0467382095453261, "grad_norm": 3.5001391864683016, "learning_rate": 1.5124018987498476e-05, "loss": 0.6102, "step": 7413 }, { "epoch": 1.046879412595312, "grad_norm": 3.288436257627931, "learning_rate": 1.5122709810438205e-05, "loss": 0.5519, "step": 7414 }, { "epoch": 1.047020615645298, "grad_norm": 3.836670119804425, "learning_rate": 1.5121400514328372e-05, "loss": 0.6489, "step": 7415 }, { "epoch": 1.0471618186952838, "grad_norm": 4.488628795241171, "learning_rate": 1.5120091099199403e-05, "loss": 0.8574, "step": 7416 }, { "epoch": 1.0473030217452697, "grad_norm": 3.4142512320133003, "learning_rate": 1.5118781565081727e-05, "loss": 0.5328, "step": 7417 }, { "epoch": 1.0474442247952556, "grad_norm": 3.4779448882904034, "learning_rate": 1.511747191200578e-05, "loss": 0.558, "step": 7418 }, { "epoch": 1.0475854278452414, "grad_norm": 3.672707093549081, "learning_rate": 1.5116162140001995e-05, "loss": 0.6446, "step": 7419 }, { "epoch": 1.0477266308952273, "grad_norm": 3.859986387093808, "learning_rate": 1.5114852249100811e-05, "loss": 0.7047, "step": 7420 }, { "epoch": 1.0478678339452132, "grad_norm": 3.1228017869302986, "learning_rate": 1.511354223933267e-05, "loss": 0.561, "step": 7421 }, { "epoch": 1.048009036995199, "grad_norm": 3.2214560491304454, "learning_rate": 1.5112232110728016e-05, "loss": 0.6111, "step": 7422 }, { "epoch": 1.048150240045185, "grad_norm": 2.8999403517133695, "learning_rate": 1.5110921863317293e-05, "loss": 0.435, "step": 7423 }, { "epoch": 1.0482914430951709, "grad_norm": 2.7053959193543307, "learning_rate": 1.5109611497130959e-05, "loss": 0.3695, "step": 7424 }, { "epoch": 1.0484326461451567, "grad_norm": 3.186433997251534, "learning_rate": 1.5108301012199453e-05, "loss": 0.4578, "step": 7425 }, { "epoch": 1.0485738491951426, "grad_norm": 3.460373505882675, "learning_rate": 1.510699040855324e-05, "loss": 0.5979, "step": 7426 }, { "epoch": 1.0487150522451285, "grad_norm": 3.7093788242314094, "learning_rate": 1.5105679686222778e-05, "loss": 0.6624, "step": 7427 }, { "epoch": 1.0488562552951144, "grad_norm": 3.3848084175705893, "learning_rate": 1.5104368845238525e-05, "loss": 0.5859, "step": 7428 }, { "epoch": 1.0489974583451003, "grad_norm": 2.7376348109628528, "learning_rate": 1.5103057885630943e-05, "loss": 0.479, "step": 7429 }, { "epoch": 1.0491386613950862, "grad_norm": 2.7689422133909005, "learning_rate": 1.5101746807430502e-05, "loss": 0.4889, "step": 7430 }, { "epoch": 1.049279864445072, "grad_norm": 2.9965226804700698, "learning_rate": 1.5100435610667662e-05, "loss": 0.5514, "step": 7431 }, { "epoch": 1.049421067495058, "grad_norm": 3.093923538565264, "learning_rate": 1.509912429537291e-05, "loss": 0.6037, "step": 7432 }, { "epoch": 1.0495622705450438, "grad_norm": 3.413616208841141, "learning_rate": 1.5097812861576704e-05, "loss": 0.7186, "step": 7433 }, { "epoch": 1.0497034735950297, "grad_norm": 3.8278888544529743, "learning_rate": 1.509650130930953e-05, "loss": 0.7622, "step": 7434 }, { "epoch": 1.0498446766450156, "grad_norm": 4.016613240521506, "learning_rate": 1.509518963860187e-05, "loss": 0.6444, "step": 7435 }, { "epoch": 1.0499858796950015, "grad_norm": 3.733262811227689, "learning_rate": 1.5093877849484201e-05, "loss": 0.6367, "step": 7436 }, { "epoch": 1.0501270827449873, "grad_norm": 2.9989286398882418, "learning_rate": 1.5092565941987012e-05, "loss": 0.6006, "step": 7437 }, { "epoch": 1.0502682857949732, "grad_norm": 2.9777676834748843, "learning_rate": 1.5091253916140789e-05, "loss": 0.5948, "step": 7438 }, { "epoch": 1.050409488844959, "grad_norm": 3.455822462089069, "learning_rate": 1.5089941771976024e-05, "loss": 0.601, "step": 7439 }, { "epoch": 1.050550691894945, "grad_norm": 3.743174034710143, "learning_rate": 1.5088629509523207e-05, "loss": 0.7842, "step": 7440 }, { "epoch": 1.0506918949449309, "grad_norm": 3.434393429880277, "learning_rate": 1.5087317128812844e-05, "loss": 0.6776, "step": 7441 }, { "epoch": 1.0508330979949168, "grad_norm": 3.096036638051984, "learning_rate": 1.5086004629875426e-05, "loss": 0.5185, "step": 7442 }, { "epoch": 1.0509743010449026, "grad_norm": 3.1307629881956367, "learning_rate": 1.5084692012741454e-05, "loss": 0.5604, "step": 7443 }, { "epoch": 1.0511155040948885, "grad_norm": 3.211231306395711, "learning_rate": 1.5083379277441437e-05, "loss": 0.5857, "step": 7444 }, { "epoch": 1.0512567071448744, "grad_norm": 4.262452023740573, "learning_rate": 1.5082066424005882e-05, "loss": 0.9585, "step": 7445 }, { "epoch": 1.0513979101948603, "grad_norm": 3.5637730166189874, "learning_rate": 1.5080753452465296e-05, "loss": 0.6088, "step": 7446 }, { "epoch": 1.0515391132448462, "grad_norm": 3.983384304827401, "learning_rate": 1.5079440362850195e-05, "loss": 0.5687, "step": 7447 }, { "epoch": 1.051680316294832, "grad_norm": 4.342816747738613, "learning_rate": 1.5078127155191094e-05, "loss": 0.5527, "step": 7448 }, { "epoch": 1.051821519344818, "grad_norm": 2.740461337857034, "learning_rate": 1.5076813829518512e-05, "loss": 0.4501, "step": 7449 }, { "epoch": 1.0519627223948038, "grad_norm": 3.452511235547144, "learning_rate": 1.5075500385862967e-05, "loss": 0.6266, "step": 7450 }, { "epoch": 1.0521039254447897, "grad_norm": 4.5790176224006, "learning_rate": 1.5074186824254983e-05, "loss": 0.6926, "step": 7451 }, { "epoch": 1.0522451284947756, "grad_norm": 3.180783483351663, "learning_rate": 1.5072873144725093e-05, "loss": 0.6156, "step": 7452 }, { "epoch": 1.0523863315447615, "grad_norm": 4.9027710457083495, "learning_rate": 1.5071559347303823e-05, "loss": 0.7018, "step": 7453 }, { "epoch": 1.0525275345947473, "grad_norm": 2.748495485534521, "learning_rate": 1.5070245432021699e-05, "loss": 0.5265, "step": 7454 }, { "epoch": 1.0526687376447332, "grad_norm": 3.950797575547279, "learning_rate": 1.5068931398909264e-05, "loss": 0.6218, "step": 7455 }, { "epoch": 1.052809940694719, "grad_norm": 3.808659260038404, "learning_rate": 1.5067617247997053e-05, "loss": 0.6687, "step": 7456 }, { "epoch": 1.0529511437447048, "grad_norm": 3.2592060288790083, "learning_rate": 1.5066302979315601e-05, "loss": 0.6068, "step": 7457 }, { "epoch": 1.0530923467946907, "grad_norm": 3.5046768286920362, "learning_rate": 1.5064988592895463e-05, "loss": 0.6857, "step": 7458 }, { "epoch": 1.0532335498446765, "grad_norm": 3.6437130601125967, "learning_rate": 1.5063674088767172e-05, "loss": 0.692, "step": 7459 }, { "epoch": 1.0533747528946624, "grad_norm": 3.170674864142198, "learning_rate": 1.5062359466961283e-05, "loss": 0.4721, "step": 7460 }, { "epoch": 1.0535159559446483, "grad_norm": 3.363711338645682, "learning_rate": 1.5061044727508347e-05, "loss": 0.5815, "step": 7461 }, { "epoch": 1.0536571589946342, "grad_norm": 4.028269225232735, "learning_rate": 1.5059729870438917e-05, "loss": 0.6246, "step": 7462 }, { "epoch": 1.05379836204462, "grad_norm": 2.97261708732574, "learning_rate": 1.505841489578355e-05, "loss": 0.4869, "step": 7463 }, { "epoch": 1.053939565094606, "grad_norm": 3.4221379711211166, "learning_rate": 1.5057099803572806e-05, "loss": 0.6255, "step": 7464 }, { "epoch": 1.0540807681445918, "grad_norm": 3.315766610580562, "learning_rate": 1.5055784593837246e-05, "loss": 0.5185, "step": 7465 }, { "epoch": 1.0542219711945777, "grad_norm": 3.3012482355425283, "learning_rate": 1.5054469266607435e-05, "loss": 0.5137, "step": 7466 }, { "epoch": 1.0543631742445636, "grad_norm": 3.880374949495192, "learning_rate": 1.5053153821913941e-05, "loss": 0.6522, "step": 7467 }, { "epoch": 1.0545043772945495, "grad_norm": 11.196966230218365, "learning_rate": 1.5051838259787332e-05, "loss": 0.7558, "step": 7468 }, { "epoch": 1.0546455803445354, "grad_norm": 3.2411956424599073, "learning_rate": 1.5050522580258189e-05, "loss": 0.5314, "step": 7469 }, { "epoch": 1.0547867833945213, "grad_norm": 4.355052367400331, "learning_rate": 1.5049206783357082e-05, "loss": 0.6862, "step": 7470 }, { "epoch": 1.0549279864445071, "grad_norm": 3.591200301144237, "learning_rate": 1.5047890869114588e-05, "loss": 0.5882, "step": 7471 }, { "epoch": 1.055069189494493, "grad_norm": 3.2208305234697177, "learning_rate": 1.5046574837561289e-05, "loss": 0.6379, "step": 7472 }, { "epoch": 1.055210392544479, "grad_norm": 4.340864282460565, "learning_rate": 1.5045258688727771e-05, "loss": 0.6483, "step": 7473 }, { "epoch": 1.0553515955944648, "grad_norm": 3.6945492391080403, "learning_rate": 1.504394242264462e-05, "loss": 0.5939, "step": 7474 }, { "epoch": 1.0554927986444507, "grad_norm": 3.2722205272211555, "learning_rate": 1.5042626039342426e-05, "loss": 0.6233, "step": 7475 }, { "epoch": 1.0556340016944366, "grad_norm": 3.817569072808398, "learning_rate": 1.504130953885178e-05, "loss": 0.6871, "step": 7476 }, { "epoch": 1.0557752047444224, "grad_norm": 3.357691434577466, "learning_rate": 1.5039992921203277e-05, "loss": 0.5563, "step": 7477 }, { "epoch": 1.0559164077944083, "grad_norm": 3.5292004608996486, "learning_rate": 1.5038676186427515e-05, "loss": 0.5888, "step": 7478 }, { "epoch": 1.0560576108443942, "grad_norm": 4.4109892943387585, "learning_rate": 1.5037359334555097e-05, "loss": 0.772, "step": 7479 }, { "epoch": 1.05619881389438, "grad_norm": 3.749373041522415, "learning_rate": 1.5036042365616621e-05, "loss": 0.6063, "step": 7480 }, { "epoch": 1.056340016944366, "grad_norm": 3.589771681614758, "learning_rate": 1.5034725279642697e-05, "loss": 0.6442, "step": 7481 }, { "epoch": 1.0564812199943519, "grad_norm": 3.5482656259913314, "learning_rate": 1.5033408076663932e-05, "loss": 0.6308, "step": 7482 }, { "epoch": 1.0566224230443377, "grad_norm": 3.848960554247446, "learning_rate": 1.5032090756710935e-05, "loss": 0.7211, "step": 7483 }, { "epoch": 1.0567636260943236, "grad_norm": 3.8321164839995157, "learning_rate": 1.5030773319814324e-05, "loss": 0.7429, "step": 7484 }, { "epoch": 1.0569048291443095, "grad_norm": 3.958040827533154, "learning_rate": 1.5029455766004713e-05, "loss": 0.6684, "step": 7485 }, { "epoch": 1.0570460321942954, "grad_norm": 5.343076222815828, "learning_rate": 1.502813809531272e-05, "loss": 0.4492, "step": 7486 }, { "epoch": 1.0571872352442813, "grad_norm": 3.2021279579511117, "learning_rate": 1.5026820307768972e-05, "loss": 0.535, "step": 7487 }, { "epoch": 1.0573284382942671, "grad_norm": 5.771382938756304, "learning_rate": 1.5025502403404089e-05, "loss": 0.7977, "step": 7488 }, { "epoch": 1.057469641344253, "grad_norm": 2.9854358368963556, "learning_rate": 1.50241843822487e-05, "loss": 0.5148, "step": 7489 }, { "epoch": 1.057610844394239, "grad_norm": 3.2948350321201114, "learning_rate": 1.5022866244333438e-05, "loss": 0.5334, "step": 7490 }, { "epoch": 1.0577520474442248, "grad_norm": 3.5060815020004092, "learning_rate": 1.5021547989688932e-05, "loss": 0.5158, "step": 7491 }, { "epoch": 1.0578932504942107, "grad_norm": 3.8088992684814125, "learning_rate": 1.502022961834582e-05, "loss": 0.6802, "step": 7492 }, { "epoch": 1.0580344535441966, "grad_norm": 4.058020841161868, "learning_rate": 1.5018911130334743e-05, "loss": 0.7878, "step": 7493 }, { "epoch": 1.0581756565941824, "grad_norm": 3.450488720826541, "learning_rate": 1.5017592525686333e-05, "loss": 0.5903, "step": 7494 }, { "epoch": 1.0583168596441683, "grad_norm": 3.3564542121079, "learning_rate": 1.5016273804431242e-05, "loss": 0.6173, "step": 7495 }, { "epoch": 1.0584580626941542, "grad_norm": 3.8017849495120495, "learning_rate": 1.5014954966600117e-05, "loss": 0.6507, "step": 7496 }, { "epoch": 1.05859926574414, "grad_norm": 3.3315085431162275, "learning_rate": 1.50136360122236e-05, "loss": 0.559, "step": 7497 }, { "epoch": 1.058740468794126, "grad_norm": 3.017688589943004, "learning_rate": 1.501231694133235e-05, "loss": 0.5167, "step": 7498 }, { "epoch": 1.0588816718441119, "grad_norm": 3.1276445663908765, "learning_rate": 1.5010997753957019e-05, "loss": 0.5682, "step": 7499 }, { "epoch": 1.0590228748940977, "grad_norm": 3.3594841172204335, "learning_rate": 1.5009678450128263e-05, "loss": 0.5415, "step": 7500 }, { "epoch": 1.0591640779440836, "grad_norm": 3.4523451802869753, "learning_rate": 1.5008359029876744e-05, "loss": 0.6595, "step": 7501 }, { "epoch": 1.0593052809940695, "grad_norm": 3.603985321563, "learning_rate": 1.5007039493233123e-05, "loss": 0.6279, "step": 7502 }, { "epoch": 1.0594464840440554, "grad_norm": 3.492555144892753, "learning_rate": 1.5005719840228067e-05, "loss": 0.5134, "step": 7503 }, { "epoch": 1.0595876870940413, "grad_norm": 3.4260397401173117, "learning_rate": 1.5004400070892246e-05, "loss": 0.6125, "step": 7504 }, { "epoch": 1.0597288901440272, "grad_norm": 3.282513387151086, "learning_rate": 1.5003080185256325e-05, "loss": 0.5451, "step": 7505 }, { "epoch": 1.059870093194013, "grad_norm": 3.17519695623594, "learning_rate": 1.5001760183350981e-05, "loss": 0.5206, "step": 7506 }, { "epoch": 1.060011296243999, "grad_norm": 3.1958468776056734, "learning_rate": 1.5000440065206894e-05, "loss": 0.6137, "step": 7507 }, { "epoch": 1.0601524992939848, "grad_norm": 3.04599694227866, "learning_rate": 1.4999119830854739e-05, "loss": 0.4917, "step": 7508 }, { "epoch": 1.0602937023439707, "grad_norm": 3.1309171298108986, "learning_rate": 1.4997799480325198e-05, "loss": 0.5512, "step": 7509 }, { "epoch": 1.0604349053939566, "grad_norm": 3.805606256651297, "learning_rate": 1.4996479013648952e-05, "loss": 0.7471, "step": 7510 }, { "epoch": 1.0605761084439425, "grad_norm": 2.8719624263945023, "learning_rate": 1.4995158430856694e-05, "loss": 0.4947, "step": 7511 }, { "epoch": 1.0607173114939283, "grad_norm": 3.6564176435247435, "learning_rate": 1.499383773197911e-05, "loss": 0.6395, "step": 7512 }, { "epoch": 1.0608585145439142, "grad_norm": 3.290271917881279, "learning_rate": 1.4992516917046898e-05, "loss": 0.6416, "step": 7513 }, { "epoch": 1.0609997175939, "grad_norm": 3.53772107864924, "learning_rate": 1.4991195986090744e-05, "loss": 0.5964, "step": 7514 }, { "epoch": 1.061140920643886, "grad_norm": 3.336683061733831, "learning_rate": 1.498987493914135e-05, "loss": 0.5567, "step": 7515 }, { "epoch": 1.0612821236938719, "grad_norm": 3.3387703378662676, "learning_rate": 1.4988553776229421e-05, "loss": 0.5771, "step": 7516 }, { "epoch": 1.0614233267438578, "grad_norm": 3.251470817094527, "learning_rate": 1.4987232497385658e-05, "loss": 0.6343, "step": 7517 }, { "epoch": 1.0615645297938436, "grad_norm": 3.043507279995774, "learning_rate": 1.4985911102640762e-05, "loss": 0.5585, "step": 7518 }, { "epoch": 1.0617057328438295, "grad_norm": 3.681708052883947, "learning_rate": 1.4984589592025447e-05, "loss": 0.6069, "step": 7519 }, { "epoch": 1.0618469358938154, "grad_norm": 3.366318051333044, "learning_rate": 1.498326796557042e-05, "loss": 0.6458, "step": 7520 }, { "epoch": 1.0619881389438013, "grad_norm": 3.600289380414608, "learning_rate": 1.4981946223306403e-05, "loss": 0.6498, "step": 7521 }, { "epoch": 1.0621293419937872, "grad_norm": 3.8605253117532525, "learning_rate": 1.4980624365264103e-05, "loss": 0.677, "step": 7522 }, { "epoch": 1.062270545043773, "grad_norm": 3.994073151951099, "learning_rate": 1.4979302391474243e-05, "loss": 0.6897, "step": 7523 }, { "epoch": 1.062411748093759, "grad_norm": 4.09317896139282, "learning_rate": 1.4977980301967549e-05, "loss": 0.8015, "step": 7524 }, { "epoch": 1.0625529511437448, "grad_norm": 3.1804775013132316, "learning_rate": 1.4976658096774741e-05, "loss": 0.5536, "step": 7525 }, { "epoch": 1.0626941541937307, "grad_norm": 3.9162123095840187, "learning_rate": 1.4975335775926547e-05, "loss": 0.6109, "step": 7526 }, { "epoch": 1.0628353572437164, "grad_norm": 3.5512062239734887, "learning_rate": 1.4974013339453702e-05, "loss": 0.6854, "step": 7527 }, { "epoch": 1.0629765602937022, "grad_norm": 3.9867972009588706, "learning_rate": 1.497269078738693e-05, "loss": 0.6303, "step": 7528 }, { "epoch": 1.0631177633436881, "grad_norm": 3.4550987613491433, "learning_rate": 1.4971368119756973e-05, "loss": 0.6529, "step": 7529 }, { "epoch": 1.063258966393674, "grad_norm": 3.5634808439474503, "learning_rate": 1.4970045336594571e-05, "loss": 0.6055, "step": 7530 }, { "epoch": 1.06340016944366, "grad_norm": 3.034343028285317, "learning_rate": 1.4968722437930458e-05, "loss": 0.5048, "step": 7531 }, { "epoch": 1.0635413724936458, "grad_norm": 4.63345251180764, "learning_rate": 1.4967399423795384e-05, "loss": 0.7329, "step": 7532 }, { "epoch": 1.0636825755436317, "grad_norm": 3.1313017848389766, "learning_rate": 1.4966076294220093e-05, "loss": 0.5242, "step": 7533 }, { "epoch": 1.0638237785936175, "grad_norm": 3.8110378415908603, "learning_rate": 1.4964753049235333e-05, "loss": 0.6428, "step": 7534 }, { "epoch": 1.0639649816436034, "grad_norm": 3.3063080129857627, "learning_rate": 1.4963429688871856e-05, "loss": 0.5411, "step": 7535 }, { "epoch": 1.0641061846935893, "grad_norm": 3.365464060708699, "learning_rate": 1.4962106213160415e-05, "loss": 0.6883, "step": 7536 }, { "epoch": 1.0642473877435752, "grad_norm": 4.020474233319959, "learning_rate": 1.496078262213177e-05, "loss": 0.7421, "step": 7537 }, { "epoch": 1.064388590793561, "grad_norm": 3.485984328032362, "learning_rate": 1.4959458915816681e-05, "loss": 0.5488, "step": 7538 }, { "epoch": 1.064529793843547, "grad_norm": 2.8794823734910504, "learning_rate": 1.4958135094245904e-05, "loss": 0.5286, "step": 7539 }, { "epoch": 1.0646709968935328, "grad_norm": 4.306493706420228, "learning_rate": 1.495681115745021e-05, "loss": 0.8215, "step": 7540 }, { "epoch": 1.0648121999435187, "grad_norm": 4.300476451912054, "learning_rate": 1.4955487105460366e-05, "loss": 0.8642, "step": 7541 }, { "epoch": 1.0649534029935046, "grad_norm": 3.442390099742231, "learning_rate": 1.4954162938307143e-05, "loss": 0.5555, "step": 7542 }, { "epoch": 1.0650946060434905, "grad_norm": 3.856525549539632, "learning_rate": 1.4952838656021313e-05, "loss": 0.6088, "step": 7543 }, { "epoch": 1.0652358090934764, "grad_norm": 3.20853720532316, "learning_rate": 1.4951514258633652e-05, "loss": 0.4813, "step": 7544 }, { "epoch": 1.0653770121434623, "grad_norm": 3.4231474508280884, "learning_rate": 1.4950189746174936e-05, "loss": 0.5431, "step": 7545 }, { "epoch": 1.0655182151934481, "grad_norm": 2.9608809082398095, "learning_rate": 1.4948865118675948e-05, "loss": 0.5577, "step": 7546 }, { "epoch": 1.065659418243434, "grad_norm": 3.2823943586273927, "learning_rate": 1.4947540376167476e-05, "loss": 0.5265, "step": 7547 }, { "epoch": 1.06580062129342, "grad_norm": 3.01082516576592, "learning_rate": 1.4946215518680299e-05, "loss": 0.5177, "step": 7548 }, { "epoch": 1.0659418243434058, "grad_norm": 3.6755709223995083, "learning_rate": 1.4944890546245208e-05, "loss": 0.7278, "step": 7549 }, { "epoch": 1.0660830273933917, "grad_norm": 3.874485351060745, "learning_rate": 1.4943565458892999e-05, "loss": 0.679, "step": 7550 }, { "epoch": 1.0662242304433776, "grad_norm": 3.03004440974935, "learning_rate": 1.4942240256654463e-05, "loss": 0.4616, "step": 7551 }, { "epoch": 1.0663654334933634, "grad_norm": 3.3961447614169087, "learning_rate": 1.4940914939560398e-05, "loss": 0.6846, "step": 7552 }, { "epoch": 1.0665066365433493, "grad_norm": 3.442757919854349, "learning_rate": 1.4939589507641602e-05, "loss": 0.5952, "step": 7553 }, { "epoch": 1.0666478395933352, "grad_norm": 3.4805179683929364, "learning_rate": 1.4938263960928878e-05, "loss": 0.5536, "step": 7554 }, { "epoch": 1.066789042643321, "grad_norm": 3.6988463737556154, "learning_rate": 1.4936938299453038e-05, "loss": 0.6101, "step": 7555 }, { "epoch": 1.066930245693307, "grad_norm": 3.7529118511670134, "learning_rate": 1.493561252324488e-05, "loss": 0.5991, "step": 7556 }, { "epoch": 1.0670714487432928, "grad_norm": 3.835308498155701, "learning_rate": 1.4934286632335217e-05, "loss": 0.6281, "step": 7557 }, { "epoch": 1.0672126517932787, "grad_norm": 3.55306135812472, "learning_rate": 1.4932960626754867e-05, "loss": 0.6062, "step": 7558 }, { "epoch": 1.0673538548432646, "grad_norm": 3.753758703565222, "learning_rate": 1.493163450653464e-05, "loss": 0.6067, "step": 7559 }, { "epoch": 1.0674950578932505, "grad_norm": 3.0307273525430176, "learning_rate": 1.4930308271705357e-05, "loss": 0.5583, "step": 7560 }, { "epoch": 1.0676362609432364, "grad_norm": 4.007621045962966, "learning_rate": 1.4928981922297842e-05, "loss": 0.5929, "step": 7561 }, { "epoch": 1.0677774639932223, "grad_norm": 2.762194156328269, "learning_rate": 1.4927655458342914e-05, "loss": 0.4892, "step": 7562 }, { "epoch": 1.0679186670432081, "grad_norm": 3.4816142633936233, "learning_rate": 1.49263288798714e-05, "loss": 0.5714, "step": 7563 }, { "epoch": 1.068059870093194, "grad_norm": 3.8332577405031025, "learning_rate": 1.4925002186914133e-05, "loss": 0.6085, "step": 7564 }, { "epoch": 1.06820107314318, "grad_norm": 3.3468516587537693, "learning_rate": 1.4923675379501939e-05, "loss": 0.5322, "step": 7565 }, { "epoch": 1.0683422761931658, "grad_norm": 3.564624976409129, "learning_rate": 1.4922348457665656e-05, "loss": 0.5532, "step": 7566 }, { "epoch": 1.0684834792431517, "grad_norm": 2.5940057281703286, "learning_rate": 1.492102142143612e-05, "loss": 0.3667, "step": 7567 }, { "epoch": 1.0686246822931376, "grad_norm": 3.5447768501585206, "learning_rate": 1.4919694270844176e-05, "loss": 0.6436, "step": 7568 }, { "epoch": 1.0687658853431234, "grad_norm": 3.805530456573539, "learning_rate": 1.491836700592066e-05, "loss": 0.7294, "step": 7569 }, { "epoch": 1.0689070883931093, "grad_norm": 3.149888999331618, "learning_rate": 1.4917039626696416e-05, "loss": 0.5156, "step": 7570 }, { "epoch": 1.0690482914430952, "grad_norm": 3.650028535373315, "learning_rate": 1.4915712133202295e-05, "loss": 0.6967, "step": 7571 }, { "epoch": 1.069189494493081, "grad_norm": 3.635960047623998, "learning_rate": 1.491438452546915e-05, "loss": 0.5806, "step": 7572 }, { "epoch": 1.069330697543067, "grad_norm": 4.840031626334433, "learning_rate": 1.491305680352783e-05, "loss": 0.5131, "step": 7573 }, { "epoch": 1.0694719005930529, "grad_norm": 3.154831540045117, "learning_rate": 1.4911728967409189e-05, "loss": 0.5363, "step": 7574 }, { "epoch": 1.0696131036430387, "grad_norm": 3.8790833178624364, "learning_rate": 1.4910401017144089e-05, "loss": 0.7669, "step": 7575 }, { "epoch": 1.0697543066930246, "grad_norm": 2.9692260619469297, "learning_rate": 1.4909072952763395e-05, "loss": 0.4953, "step": 7576 }, { "epoch": 1.0698955097430105, "grad_norm": 3.651285317662748, "learning_rate": 1.4907744774297959e-05, "loss": 0.5821, "step": 7577 }, { "epoch": 1.0700367127929964, "grad_norm": 3.9534177845410423, "learning_rate": 1.4906416481778656e-05, "loss": 0.6126, "step": 7578 }, { "epoch": 1.0701779158429823, "grad_norm": 3.1297149509715614, "learning_rate": 1.4905088075236354e-05, "loss": 0.4664, "step": 7579 }, { "epoch": 1.0703191188929682, "grad_norm": 3.5436653758979735, "learning_rate": 1.4903759554701922e-05, "loss": 0.6684, "step": 7580 }, { "epoch": 1.070460321942954, "grad_norm": 5.656603129785557, "learning_rate": 1.4902430920206237e-05, "loss": 0.6696, "step": 7581 }, { "epoch": 1.07060152499294, "grad_norm": 3.27774201805249, "learning_rate": 1.4901102171780175e-05, "loss": 0.535, "step": 7582 }, { "epoch": 1.0707427280429258, "grad_norm": 3.4019494472068295, "learning_rate": 1.4899773309454612e-05, "loss": 0.5839, "step": 7583 }, { "epoch": 1.0708839310929117, "grad_norm": 3.127608689751116, "learning_rate": 1.4898444333260436e-05, "loss": 0.4842, "step": 7584 }, { "epoch": 1.0710251341428976, "grad_norm": 4.954692826458377, "learning_rate": 1.489711524322853e-05, "loss": 0.8708, "step": 7585 }, { "epoch": 1.0711663371928835, "grad_norm": 3.011817888229896, "learning_rate": 1.4895786039389779e-05, "loss": 0.5064, "step": 7586 }, { "epoch": 1.0713075402428693, "grad_norm": 3.8146781213231145, "learning_rate": 1.4894456721775074e-05, "loss": 0.5515, "step": 7587 }, { "epoch": 1.0714487432928552, "grad_norm": 3.1272211140532247, "learning_rate": 1.4893127290415312e-05, "loss": 0.5242, "step": 7588 }, { "epoch": 1.071589946342841, "grad_norm": 3.7911799193351046, "learning_rate": 1.4891797745341382e-05, "loss": 0.6034, "step": 7589 }, { "epoch": 1.071731149392827, "grad_norm": 3.424871647402132, "learning_rate": 1.4890468086584187e-05, "loss": 0.5179, "step": 7590 }, { "epoch": 1.0718723524428126, "grad_norm": 4.230405734623407, "learning_rate": 1.4889138314174622e-05, "loss": 0.681, "step": 7591 }, { "epoch": 1.0720135554927985, "grad_norm": 4.4977089110824195, "learning_rate": 1.4887808428143595e-05, "loss": 0.6991, "step": 7592 }, { "epoch": 1.0721547585427844, "grad_norm": 3.8438138741702157, "learning_rate": 1.4886478428522015e-05, "loss": 0.6189, "step": 7593 }, { "epoch": 1.0722959615927703, "grad_norm": 3.1189362515146235, "learning_rate": 1.4885148315340783e-05, "loss": 0.5299, "step": 7594 }, { "epoch": 1.0724371646427562, "grad_norm": 3.399345304536633, "learning_rate": 1.4883818088630814e-05, "loss": 0.5906, "step": 7595 }, { "epoch": 1.072578367692742, "grad_norm": 4.281357910940498, "learning_rate": 1.4882487748423025e-05, "loss": 0.7668, "step": 7596 }, { "epoch": 1.072719570742728, "grad_norm": 2.9761829070312777, "learning_rate": 1.4881157294748326e-05, "loss": 0.5936, "step": 7597 }, { "epoch": 1.0728607737927138, "grad_norm": 3.4519175520290486, "learning_rate": 1.487982672763764e-05, "loss": 0.6778, "step": 7598 }, { "epoch": 1.0730019768426997, "grad_norm": 3.6180887649020383, "learning_rate": 1.487849604712189e-05, "loss": 0.6057, "step": 7599 }, { "epoch": 1.0731431798926856, "grad_norm": 3.1535660596412542, "learning_rate": 1.4877165253231995e-05, "loss": 0.5221, "step": 7600 }, { "epoch": 1.0732843829426715, "grad_norm": 4.608452945595066, "learning_rate": 1.487583434599889e-05, "loss": 0.697, "step": 7601 }, { "epoch": 1.0734255859926574, "grad_norm": 3.407972489161472, "learning_rate": 1.48745033254535e-05, "loss": 0.5867, "step": 7602 }, { "epoch": 1.0735667890426432, "grad_norm": 3.492042085488486, "learning_rate": 1.4873172191626758e-05, "loss": 0.7386, "step": 7603 }, { "epoch": 1.0737079920926291, "grad_norm": 3.1550892529680574, "learning_rate": 1.4871840944549596e-05, "loss": 0.4928, "step": 7604 }, { "epoch": 1.073849195142615, "grad_norm": 3.2078855118346805, "learning_rate": 1.4870509584252956e-05, "loss": 0.5393, "step": 7605 }, { "epoch": 1.073990398192601, "grad_norm": 3.086574489391303, "learning_rate": 1.4869178110767777e-05, "loss": 0.6672, "step": 7606 }, { "epoch": 1.0741316012425868, "grad_norm": 2.8514914840894803, "learning_rate": 1.4867846524125e-05, "loss": 0.6009, "step": 7607 }, { "epoch": 1.0742728042925727, "grad_norm": 3.245570287100149, "learning_rate": 1.4866514824355572e-05, "loss": 0.554, "step": 7608 }, { "epoch": 1.0744140073425585, "grad_norm": 4.328808605990061, "learning_rate": 1.4865183011490442e-05, "loss": 0.7472, "step": 7609 }, { "epoch": 1.0745552103925444, "grad_norm": 3.2079907951634947, "learning_rate": 1.4863851085560563e-05, "loss": 0.5045, "step": 7610 }, { "epoch": 1.0746964134425303, "grad_norm": 4.473656497834196, "learning_rate": 1.4862519046596882e-05, "loss": 0.854, "step": 7611 }, { "epoch": 1.0748376164925162, "grad_norm": 4.049502187332494, "learning_rate": 1.4861186894630359e-05, "loss": 0.6456, "step": 7612 }, { "epoch": 1.074978819542502, "grad_norm": 3.434216185523278, "learning_rate": 1.4859854629691953e-05, "loss": 0.6046, "step": 7613 }, { "epoch": 1.075120022592488, "grad_norm": 4.2654503052946815, "learning_rate": 1.4858522251812621e-05, "loss": 0.816, "step": 7614 }, { "epoch": 1.0752612256424738, "grad_norm": 3.189915906748601, "learning_rate": 1.4857189761023333e-05, "loss": 0.5261, "step": 7615 }, { "epoch": 1.0754024286924597, "grad_norm": 4.005094332110878, "learning_rate": 1.485585715735505e-05, "loss": 0.6334, "step": 7616 }, { "epoch": 1.0755436317424456, "grad_norm": 3.7451104276339318, "learning_rate": 1.4854524440838747e-05, "loss": 0.5723, "step": 7617 }, { "epoch": 1.0756848347924315, "grad_norm": 3.5316333793634973, "learning_rate": 1.4853191611505391e-05, "loss": 0.5165, "step": 7618 }, { "epoch": 1.0758260378424174, "grad_norm": 3.755696230381059, "learning_rate": 1.4851858669385956e-05, "loss": 0.6229, "step": 7619 }, { "epoch": 1.0759672408924033, "grad_norm": 3.1475517592833606, "learning_rate": 1.4850525614511427e-05, "loss": 0.6059, "step": 7620 }, { "epoch": 1.0761084439423891, "grad_norm": 3.609776004015949, "learning_rate": 1.4849192446912772e-05, "loss": 0.6284, "step": 7621 }, { "epoch": 1.076249646992375, "grad_norm": 3.6821877459686037, "learning_rate": 1.484785916662098e-05, "loss": 0.6236, "step": 7622 }, { "epoch": 1.076390850042361, "grad_norm": 3.323719201610466, "learning_rate": 1.4846525773667035e-05, "loss": 0.5576, "step": 7623 }, { "epoch": 1.0765320530923468, "grad_norm": 3.3645919381406415, "learning_rate": 1.4845192268081924e-05, "loss": 0.6504, "step": 7624 }, { "epoch": 1.0766732561423327, "grad_norm": 3.5371097461597034, "learning_rate": 1.4843858649896634e-05, "loss": 0.7498, "step": 7625 }, { "epoch": 1.0768144591923186, "grad_norm": 4.212588905954183, "learning_rate": 1.4842524919142164e-05, "loss": 0.5804, "step": 7626 }, { "epoch": 1.0769556622423044, "grad_norm": 3.7709444531599954, "learning_rate": 1.484119107584951e-05, "loss": 0.6788, "step": 7627 }, { "epoch": 1.0770968652922903, "grad_norm": 3.8216243941455184, "learning_rate": 1.4839857120049658e-05, "loss": 0.6369, "step": 7628 }, { "epoch": 1.0772380683422762, "grad_norm": 4.1985980718051605, "learning_rate": 1.4838523051773623e-05, "loss": 0.5482, "step": 7629 }, { "epoch": 1.077379271392262, "grad_norm": 5.773035503138816, "learning_rate": 1.4837188871052399e-05, "loss": 0.8512, "step": 7630 }, { "epoch": 1.077520474442248, "grad_norm": 3.012776880298254, "learning_rate": 1.4835854577916996e-05, "loss": 0.5806, "step": 7631 }, { "epoch": 1.0776616774922338, "grad_norm": 3.2774031644410226, "learning_rate": 1.4834520172398421e-05, "loss": 0.503, "step": 7632 }, { "epoch": 1.0778028805422197, "grad_norm": 3.8412632221969587, "learning_rate": 1.4833185654527684e-05, "loss": 0.6788, "step": 7633 }, { "epoch": 1.0779440835922056, "grad_norm": 3.5161676741055756, "learning_rate": 1.4831851024335799e-05, "loss": 0.6225, "step": 7634 }, { "epoch": 1.0780852866421915, "grad_norm": 3.613079335783669, "learning_rate": 1.4830516281853784e-05, "loss": 0.5847, "step": 7635 }, { "epoch": 1.0782264896921774, "grad_norm": 3.3238132849414286, "learning_rate": 1.482918142711266e-05, "loss": 0.5302, "step": 7636 }, { "epoch": 1.0783676927421633, "grad_norm": 3.318184973899602, "learning_rate": 1.482784646014344e-05, "loss": 0.5835, "step": 7637 }, { "epoch": 1.0785088957921491, "grad_norm": 3.9292520173944765, "learning_rate": 1.4826511380977155e-05, "loss": 0.6874, "step": 7638 }, { "epoch": 1.078650098842135, "grad_norm": 3.3281695228232873, "learning_rate": 1.482517618964483e-05, "loss": 0.6311, "step": 7639 }, { "epoch": 1.078791301892121, "grad_norm": 3.8660137373117585, "learning_rate": 1.4823840886177494e-05, "loss": 0.6056, "step": 7640 }, { "epoch": 1.0789325049421068, "grad_norm": 3.332326544865899, "learning_rate": 1.482250547060618e-05, "loss": 0.4995, "step": 7641 }, { "epoch": 1.0790737079920927, "grad_norm": 3.452357178130149, "learning_rate": 1.4821169942961922e-05, "loss": 0.703, "step": 7642 }, { "epoch": 1.0792149110420786, "grad_norm": 2.8171779919595163, "learning_rate": 1.4819834303275755e-05, "loss": 0.441, "step": 7643 }, { "epoch": 1.0793561140920644, "grad_norm": 3.3903399785820847, "learning_rate": 1.4818498551578721e-05, "loss": 0.6046, "step": 7644 }, { "epoch": 1.0794973171420503, "grad_norm": 3.74799106392717, "learning_rate": 1.481716268790186e-05, "loss": 0.5454, "step": 7645 }, { "epoch": 1.0796385201920362, "grad_norm": 3.3196627393887974, "learning_rate": 1.481582671227622e-05, "loss": 0.622, "step": 7646 }, { "epoch": 1.079779723242022, "grad_norm": 3.221491836941583, "learning_rate": 1.481449062473285e-05, "loss": 0.5008, "step": 7647 }, { "epoch": 1.079920926292008, "grad_norm": 4.673303700394533, "learning_rate": 1.4813154425302791e-05, "loss": 0.7063, "step": 7648 }, { "epoch": 1.0800621293419939, "grad_norm": 3.8820088138559576, "learning_rate": 1.4811818114017106e-05, "loss": 0.6317, "step": 7649 }, { "epoch": 1.0802033323919797, "grad_norm": 2.5476669310754465, "learning_rate": 1.4810481690906846e-05, "loss": 0.4809, "step": 7650 }, { "epoch": 1.0803445354419656, "grad_norm": 4.089025500660644, "learning_rate": 1.4809145156003066e-05, "loss": 0.6916, "step": 7651 }, { "epoch": 1.0804857384919515, "grad_norm": 3.424099058394347, "learning_rate": 1.4807808509336831e-05, "loss": 0.7123, "step": 7652 }, { "epoch": 1.0806269415419374, "grad_norm": 2.7510376951055875, "learning_rate": 1.4806471750939206e-05, "loss": 0.4977, "step": 7653 }, { "epoch": 1.0807681445919233, "grad_norm": 4.012657112290507, "learning_rate": 1.480513488084125e-05, "loss": 0.6895, "step": 7654 }, { "epoch": 1.0809093476419092, "grad_norm": 2.8220191558244068, "learning_rate": 1.4803797899074035e-05, "loss": 0.4297, "step": 7655 }, { "epoch": 1.081050550691895, "grad_norm": 3.1434667538784176, "learning_rate": 1.480246080566863e-05, "loss": 0.4945, "step": 7656 }, { "epoch": 1.081191753741881, "grad_norm": 3.501852560632229, "learning_rate": 1.4801123600656114e-05, "loss": 0.6198, "step": 7657 }, { "epoch": 1.0813329567918668, "grad_norm": 3.320461502862783, "learning_rate": 1.4799786284067554e-05, "loss": 0.6715, "step": 7658 }, { "epoch": 1.0814741598418527, "grad_norm": 3.0165389680699253, "learning_rate": 1.4798448855934035e-05, "loss": 0.5479, "step": 7659 }, { "epoch": 1.0816153628918386, "grad_norm": 2.947107089699705, "learning_rate": 1.4797111316286639e-05, "loss": 0.5798, "step": 7660 }, { "epoch": 1.0817565659418245, "grad_norm": 3.3977708368612065, "learning_rate": 1.4795773665156448e-05, "loss": 0.5796, "step": 7661 }, { "epoch": 1.0818977689918103, "grad_norm": 3.5144869824496965, "learning_rate": 1.4794435902574543e-05, "loss": 0.6682, "step": 7662 }, { "epoch": 1.082038972041796, "grad_norm": 3.8371186182317394, "learning_rate": 1.4793098028572024e-05, "loss": 0.6418, "step": 7663 }, { "epoch": 1.0821801750917819, "grad_norm": 2.8775446069495643, "learning_rate": 1.4791760043179975e-05, "loss": 0.5117, "step": 7664 }, { "epoch": 1.0823213781417678, "grad_norm": 3.6190774549901055, "learning_rate": 1.4790421946429491e-05, "loss": 0.6588, "step": 7665 }, { "epoch": 1.0824625811917536, "grad_norm": 3.7124544137249464, "learning_rate": 1.478908373835167e-05, "loss": 0.6459, "step": 7666 }, { "epoch": 1.0826037842417395, "grad_norm": 2.847208039224612, "learning_rate": 1.4787745418977612e-05, "loss": 0.4707, "step": 7667 }, { "epoch": 1.0827449872917254, "grad_norm": 3.0652538058129335, "learning_rate": 1.4786406988338415e-05, "loss": 0.5315, "step": 7668 }, { "epoch": 1.0828861903417113, "grad_norm": 3.917785567250713, "learning_rate": 1.4785068446465189e-05, "loss": 0.7106, "step": 7669 }, { "epoch": 1.0830273933916972, "grad_norm": 3.5715054087861544, "learning_rate": 1.4783729793389043e-05, "loss": 0.7457, "step": 7670 }, { "epoch": 1.083168596441683, "grad_norm": 3.37483754931424, "learning_rate": 1.4782391029141077e-05, "loss": 0.5401, "step": 7671 }, { "epoch": 1.083309799491669, "grad_norm": 3.588543601734951, "learning_rate": 1.4781052153752411e-05, "loss": 0.5628, "step": 7672 }, { "epoch": 1.0834510025416548, "grad_norm": 3.6425670995466923, "learning_rate": 1.4779713167254157e-05, "loss": 0.6412, "step": 7673 }, { "epoch": 1.0835922055916407, "grad_norm": 3.632937290511941, "learning_rate": 1.4778374069677435e-05, "loss": 0.5632, "step": 7674 }, { "epoch": 1.0837334086416266, "grad_norm": 3.943526049199452, "learning_rate": 1.477703486105336e-05, "loss": 0.6971, "step": 7675 }, { "epoch": 1.0838746116916125, "grad_norm": 3.692651928110072, "learning_rate": 1.4775695541413063e-05, "loss": 0.5641, "step": 7676 }, { "epoch": 1.0840158147415984, "grad_norm": 3.634161232944167, "learning_rate": 1.4774356110787657e-05, "loss": 0.6334, "step": 7677 }, { "epoch": 1.0841570177915842, "grad_norm": 3.142057361186876, "learning_rate": 1.4773016569208283e-05, "loss": 0.5813, "step": 7678 }, { "epoch": 1.0842982208415701, "grad_norm": 3.1806693785983775, "learning_rate": 1.4771676916706063e-05, "loss": 0.5318, "step": 7679 }, { "epoch": 1.084439423891556, "grad_norm": 4.837396753903249, "learning_rate": 1.4770337153312131e-05, "loss": 0.7523, "step": 7680 }, { "epoch": 1.084580626941542, "grad_norm": 3.9395758528665388, "learning_rate": 1.4768997279057624e-05, "loss": 0.5592, "step": 7681 }, { "epoch": 1.0847218299915278, "grad_norm": 3.3969029060507054, "learning_rate": 1.476765729397368e-05, "loss": 0.6799, "step": 7682 }, { "epoch": 1.0848630330415137, "grad_norm": 3.7169186205491394, "learning_rate": 1.476631719809144e-05, "loss": 0.6307, "step": 7683 }, { "epoch": 1.0850042360914995, "grad_norm": 3.290156896305463, "learning_rate": 1.4764976991442045e-05, "loss": 0.6364, "step": 7684 }, { "epoch": 1.0851454391414854, "grad_norm": 3.864198070394517, "learning_rate": 1.4763636674056646e-05, "loss": 0.6885, "step": 7685 }, { "epoch": 1.0852866421914713, "grad_norm": 4.083706067313782, "learning_rate": 1.4762296245966387e-05, "loss": 0.688, "step": 7686 }, { "epoch": 1.0854278452414572, "grad_norm": 3.6715130743360023, "learning_rate": 1.476095570720242e-05, "loss": 0.675, "step": 7687 }, { "epoch": 1.085569048291443, "grad_norm": 4.313387898771667, "learning_rate": 1.4759615057795895e-05, "loss": 0.669, "step": 7688 }, { "epoch": 1.085710251341429, "grad_norm": 3.368996217545799, "learning_rate": 1.4758274297777974e-05, "loss": 0.5682, "step": 7689 }, { "epoch": 1.0858514543914148, "grad_norm": 2.6796123196495256, "learning_rate": 1.4756933427179814e-05, "loss": 0.4548, "step": 7690 }, { "epoch": 1.0859926574414007, "grad_norm": 3.4924615278274955, "learning_rate": 1.4755592446032576e-05, "loss": 0.5272, "step": 7691 }, { "epoch": 1.0861338604913866, "grad_norm": 3.6780020580981767, "learning_rate": 1.475425135436742e-05, "loss": 0.5829, "step": 7692 }, { "epoch": 1.0862750635413725, "grad_norm": 3.356910711620978, "learning_rate": 1.475291015221552e-05, "loss": 0.588, "step": 7693 }, { "epoch": 1.0864162665913584, "grad_norm": 4.3427998415794065, "learning_rate": 1.4751568839608036e-05, "loss": 0.8802, "step": 7694 }, { "epoch": 1.0865574696413443, "grad_norm": 2.826537870391396, "learning_rate": 1.475022741657615e-05, "loss": 0.4491, "step": 7695 }, { "epoch": 1.0866986726913301, "grad_norm": 3.7640186052150786, "learning_rate": 1.4748885883151028e-05, "loss": 0.6987, "step": 7696 }, { "epoch": 1.086839875741316, "grad_norm": 2.8545900324697633, "learning_rate": 1.4747544239363846e-05, "loss": 0.507, "step": 7697 }, { "epoch": 1.086981078791302, "grad_norm": 3.9918760572072833, "learning_rate": 1.4746202485245789e-05, "loss": 0.7448, "step": 7698 }, { "epoch": 1.0871222818412878, "grad_norm": 4.128515412270134, "learning_rate": 1.4744860620828034e-05, "loss": 0.7309, "step": 7699 }, { "epoch": 1.0872634848912737, "grad_norm": 3.73825083604541, "learning_rate": 1.474351864614177e-05, "loss": 0.5917, "step": 7700 }, { "epoch": 1.0874046879412596, "grad_norm": 3.1329917887241403, "learning_rate": 1.474217656121818e-05, "loss": 0.6208, "step": 7701 }, { "epoch": 1.0875458909912454, "grad_norm": 3.6308912832373754, "learning_rate": 1.4740834366088454e-05, "loss": 0.6721, "step": 7702 }, { "epoch": 1.0876870940412313, "grad_norm": 4.178957061798229, "learning_rate": 1.4739492060783787e-05, "loss": 0.6853, "step": 7703 }, { "epoch": 1.0878282970912172, "grad_norm": 3.295400953874177, "learning_rate": 1.4738149645335369e-05, "loss": 0.5849, "step": 7704 }, { "epoch": 1.087969500141203, "grad_norm": 3.377253858923734, "learning_rate": 1.47368071197744e-05, "loss": 0.6513, "step": 7705 }, { "epoch": 1.088110703191189, "grad_norm": 3.182587799336959, "learning_rate": 1.4735464484132079e-05, "loss": 0.5257, "step": 7706 }, { "epoch": 1.0882519062411748, "grad_norm": 3.147312263271996, "learning_rate": 1.473412173843961e-05, "loss": 0.5661, "step": 7707 }, { "epoch": 1.0883931092911607, "grad_norm": 3.081011010253514, "learning_rate": 1.4732778882728193e-05, "loss": 0.5715, "step": 7708 }, { "epoch": 1.0885343123411466, "grad_norm": 11.539098034700615, "learning_rate": 1.473143591702904e-05, "loss": 0.8634, "step": 7709 }, { "epoch": 1.0886755153911325, "grad_norm": 4.211549305181795, "learning_rate": 1.4730092841373362e-05, "loss": 0.6738, "step": 7710 }, { "epoch": 1.0888167184411184, "grad_norm": 3.2229816773682938, "learning_rate": 1.4728749655792367e-05, "loss": 0.5335, "step": 7711 }, { "epoch": 1.0889579214911043, "grad_norm": 3.840789445851332, "learning_rate": 1.4727406360317274e-05, "loss": 0.6091, "step": 7712 }, { "epoch": 1.0890991245410901, "grad_norm": 3.992168857421405, "learning_rate": 1.4726062954979296e-05, "loss": 0.7044, "step": 7713 }, { "epoch": 1.089240327591076, "grad_norm": 3.412593876722597, "learning_rate": 1.4724719439809659e-05, "loss": 0.601, "step": 7714 }, { "epoch": 1.089381530641062, "grad_norm": 3.40264751201409, "learning_rate": 1.472337581483958e-05, "loss": 0.589, "step": 7715 }, { "epoch": 1.0895227336910478, "grad_norm": 4.227426778325474, "learning_rate": 1.472203208010029e-05, "loss": 0.6007, "step": 7716 }, { "epoch": 1.0896639367410337, "grad_norm": 3.315217980428389, "learning_rate": 1.472068823562301e-05, "loss": 0.6216, "step": 7717 }, { "epoch": 1.0898051397910196, "grad_norm": 3.305687771359706, "learning_rate": 1.4719344281438977e-05, "loss": 0.5749, "step": 7718 }, { "epoch": 1.0899463428410054, "grad_norm": 3.66761936308962, "learning_rate": 1.471800021757942e-05, "loss": 0.5962, "step": 7719 }, { "epoch": 1.0900875458909913, "grad_norm": 3.506245668525747, "learning_rate": 1.4716656044075577e-05, "loss": 0.4483, "step": 7720 }, { "epoch": 1.0902287489409772, "grad_norm": 3.6671245744582923, "learning_rate": 1.4715311760958682e-05, "loss": 0.6775, "step": 7721 }, { "epoch": 1.090369951990963, "grad_norm": 2.8927250101618287, "learning_rate": 1.4713967368259981e-05, "loss": 0.4471, "step": 7722 }, { "epoch": 1.090511155040949, "grad_norm": 3.7355088319410683, "learning_rate": 1.4712622866010709e-05, "loss": 0.5822, "step": 7723 }, { "epoch": 1.0906523580909349, "grad_norm": 3.1659860512365414, "learning_rate": 1.471127825424212e-05, "loss": 0.5709, "step": 7724 }, { "epoch": 1.0907935611409207, "grad_norm": 3.400972442962781, "learning_rate": 1.4709933532985458e-05, "loss": 0.5153, "step": 7725 }, { "epoch": 1.0909347641909066, "grad_norm": 3.660754240848205, "learning_rate": 1.4708588702271978e-05, "loss": 0.6312, "step": 7726 }, { "epoch": 1.0910759672408923, "grad_norm": 3.4956945888443967, "learning_rate": 1.4707243762132927e-05, "loss": 0.6452, "step": 7727 }, { "epoch": 1.0912171702908782, "grad_norm": 4.419085105925884, "learning_rate": 1.4705898712599563e-05, "loss": 0.8022, "step": 7728 }, { "epoch": 1.091358373340864, "grad_norm": 3.2879254336430064, "learning_rate": 1.4704553553703148e-05, "loss": 0.4783, "step": 7729 }, { "epoch": 1.09149957639085, "grad_norm": 3.9258437449185006, "learning_rate": 1.4703208285474942e-05, "loss": 0.6026, "step": 7730 }, { "epoch": 1.0916407794408358, "grad_norm": 2.642947171308434, "learning_rate": 1.47018629079462e-05, "loss": 0.4765, "step": 7731 }, { "epoch": 1.0917819824908217, "grad_norm": 3.8015128490651517, "learning_rate": 1.4700517421148199e-05, "loss": 0.6552, "step": 7732 }, { "epoch": 1.0919231855408076, "grad_norm": 4.1279238851345506, "learning_rate": 1.4699171825112206e-05, "loss": 0.6893, "step": 7733 }, { "epoch": 1.0920643885907935, "grad_norm": 3.3375311327790955, "learning_rate": 1.4697826119869483e-05, "loss": 0.6366, "step": 7734 }, { "epoch": 1.0922055916407793, "grad_norm": 3.2068403139614516, "learning_rate": 1.4696480305451313e-05, "loss": 0.5727, "step": 7735 }, { "epoch": 1.0923467946907652, "grad_norm": 3.2756519391559458, "learning_rate": 1.4695134381888969e-05, "loss": 0.5248, "step": 7736 }, { "epoch": 1.0924879977407511, "grad_norm": 3.1589507382766406, "learning_rate": 1.4693788349213729e-05, "loss": 0.5314, "step": 7737 }, { "epoch": 1.092629200790737, "grad_norm": 3.579626312188878, "learning_rate": 1.4692442207456875e-05, "loss": 0.603, "step": 7738 }, { "epoch": 1.0927704038407229, "grad_norm": 2.9995292102255044, "learning_rate": 1.469109595664969e-05, "loss": 0.4995, "step": 7739 }, { "epoch": 1.0929116068907088, "grad_norm": 3.9753316500252187, "learning_rate": 1.468974959682346e-05, "loss": 0.7579, "step": 7740 }, { "epoch": 1.0930528099406946, "grad_norm": 4.294777544309448, "learning_rate": 1.4688403128009477e-05, "loss": 0.6987, "step": 7741 }, { "epoch": 1.0931940129906805, "grad_norm": 3.6495303442917, "learning_rate": 1.468705655023903e-05, "loss": 0.6572, "step": 7742 }, { "epoch": 1.0933352160406664, "grad_norm": 2.8004883813199615, "learning_rate": 1.4685709863543412e-05, "loss": 0.444, "step": 7743 }, { "epoch": 1.0934764190906523, "grad_norm": 3.7867684773572545, "learning_rate": 1.468436306795392e-05, "loss": 0.7659, "step": 7744 }, { "epoch": 1.0936176221406382, "grad_norm": 3.4306558268382763, "learning_rate": 1.4683016163501855e-05, "loss": 0.515, "step": 7745 }, { "epoch": 1.093758825190624, "grad_norm": 3.63794221100813, "learning_rate": 1.4681669150218516e-05, "loss": 0.5577, "step": 7746 }, { "epoch": 1.09390002824061, "grad_norm": 3.415415323582596, "learning_rate": 1.4680322028135208e-05, "loss": 0.5388, "step": 7747 }, { "epoch": 1.0940412312905958, "grad_norm": 2.837129728461303, "learning_rate": 1.4678974797283235e-05, "loss": 0.5122, "step": 7748 }, { "epoch": 1.0941824343405817, "grad_norm": 3.4413649445199392, "learning_rate": 1.467762745769391e-05, "loss": 0.6312, "step": 7749 }, { "epoch": 1.0943236373905676, "grad_norm": 3.9963929080664133, "learning_rate": 1.4676280009398544e-05, "loss": 0.5671, "step": 7750 }, { "epoch": 1.0944648404405535, "grad_norm": 3.65024202277792, "learning_rate": 1.4674932452428449e-05, "loss": 0.6386, "step": 7751 }, { "epoch": 1.0946060434905394, "grad_norm": 4.066079033500336, "learning_rate": 1.4673584786814943e-05, "loss": 0.792, "step": 7752 }, { "epoch": 1.0947472465405252, "grad_norm": 3.9657715251004264, "learning_rate": 1.4672237012589345e-05, "loss": 0.5664, "step": 7753 }, { "epoch": 1.0948884495905111, "grad_norm": 3.6803043861779283, "learning_rate": 1.4670889129782976e-05, "loss": 0.5699, "step": 7754 }, { "epoch": 1.095029652640497, "grad_norm": 4.9960771706770615, "learning_rate": 1.4669541138427164e-05, "loss": 0.7969, "step": 7755 }, { "epoch": 1.095170855690483, "grad_norm": 4.903308346815577, "learning_rate": 1.4668193038553229e-05, "loss": 0.7935, "step": 7756 }, { "epoch": 1.0953120587404688, "grad_norm": 3.669656306715767, "learning_rate": 1.4666844830192504e-05, "loss": 0.5531, "step": 7757 }, { "epoch": 1.0954532617904547, "grad_norm": 3.5228433733498847, "learning_rate": 1.466549651337632e-05, "loss": 0.6824, "step": 7758 }, { "epoch": 1.0955944648404405, "grad_norm": 3.7558130818602593, "learning_rate": 1.4664148088136015e-05, "loss": 0.6843, "step": 7759 }, { "epoch": 1.0957356678904264, "grad_norm": 3.3921698572480836, "learning_rate": 1.466279955450292e-05, "loss": 0.5582, "step": 7760 }, { "epoch": 1.0958768709404123, "grad_norm": 3.520896591589031, "learning_rate": 1.4661450912508379e-05, "loss": 0.5666, "step": 7761 }, { "epoch": 1.0960180739903982, "grad_norm": 3.7955380490964945, "learning_rate": 1.4660102162183732e-05, "loss": 0.7396, "step": 7762 }, { "epoch": 1.096159277040384, "grad_norm": 3.5258617402707917, "learning_rate": 1.4658753303560322e-05, "loss": 0.6674, "step": 7763 }, { "epoch": 1.09630048009037, "grad_norm": 3.6918204313264997, "learning_rate": 1.4657404336669498e-05, "loss": 0.6779, "step": 7764 }, { "epoch": 1.0964416831403558, "grad_norm": 3.819468449125098, "learning_rate": 1.4656055261542605e-05, "loss": 0.6012, "step": 7765 }, { "epoch": 1.0965828861903417, "grad_norm": 3.551736959731318, "learning_rate": 1.4654706078211003e-05, "loss": 0.5546, "step": 7766 }, { "epoch": 1.0967240892403276, "grad_norm": 4.18147791355012, "learning_rate": 1.4653356786706043e-05, "loss": 0.7529, "step": 7767 }, { "epoch": 1.0968652922903135, "grad_norm": 3.2017484757665198, "learning_rate": 1.4652007387059077e-05, "loss": 0.5326, "step": 7768 }, { "epoch": 1.0970064953402994, "grad_norm": 3.4245555506166356, "learning_rate": 1.4650657879301471e-05, "loss": 0.6441, "step": 7769 }, { "epoch": 1.0971476983902853, "grad_norm": 3.525466499718918, "learning_rate": 1.4649308263464583e-05, "loss": 0.5982, "step": 7770 }, { "epoch": 1.0972889014402711, "grad_norm": 2.744184608810498, "learning_rate": 1.4647958539579779e-05, "loss": 0.43, "step": 7771 }, { "epoch": 1.097430104490257, "grad_norm": 2.6201528990653893, "learning_rate": 1.4646608707678428e-05, "loss": 0.5038, "step": 7772 }, { "epoch": 1.097571307540243, "grad_norm": 3.3736036531476166, "learning_rate": 1.4645258767791892e-05, "loss": 0.6937, "step": 7773 }, { "epoch": 1.0977125105902288, "grad_norm": 3.328475328718485, "learning_rate": 1.4643908719951551e-05, "loss": 0.5456, "step": 7774 }, { "epoch": 1.0978537136402147, "grad_norm": 4.367293985370606, "learning_rate": 1.4642558564188781e-05, "loss": 0.6067, "step": 7775 }, { "epoch": 1.0979949166902006, "grad_norm": 3.767829317927306, "learning_rate": 1.464120830053495e-05, "loss": 0.6131, "step": 7776 }, { "epoch": 1.0981361197401864, "grad_norm": 3.080562223734928, "learning_rate": 1.4639857929021441e-05, "loss": 0.4589, "step": 7777 }, { "epoch": 1.0982773227901723, "grad_norm": 3.2825126428306834, "learning_rate": 1.4638507449679642e-05, "loss": 0.5849, "step": 7778 }, { "epoch": 1.0984185258401582, "grad_norm": 3.395887689016264, "learning_rate": 1.4637156862540934e-05, "loss": 0.5328, "step": 7779 }, { "epoch": 1.098559728890144, "grad_norm": 3.8383745200727235, "learning_rate": 1.4635806167636698e-05, "loss": 0.6374, "step": 7780 }, { "epoch": 1.09870093194013, "grad_norm": 3.5912089738821535, "learning_rate": 1.4634455364998332e-05, "loss": 0.6114, "step": 7781 }, { "epoch": 1.0988421349901158, "grad_norm": 3.182133617108485, "learning_rate": 1.4633104454657225e-05, "loss": 0.5402, "step": 7782 }, { "epoch": 1.0989833380401017, "grad_norm": 3.1548877192541047, "learning_rate": 1.4631753436644769e-05, "loss": 0.4529, "step": 7783 }, { "epoch": 1.0991245410900876, "grad_norm": 3.942223905794716, "learning_rate": 1.4630402310992367e-05, "loss": 0.6547, "step": 7784 }, { "epoch": 1.0992657441400735, "grad_norm": 4.63374999452884, "learning_rate": 1.4629051077731412e-05, "loss": 0.6439, "step": 7785 }, { "epoch": 1.0994069471900594, "grad_norm": 4.393550774046112, "learning_rate": 1.4627699736893309e-05, "loss": 0.6724, "step": 7786 }, { "epoch": 1.0995481502400453, "grad_norm": 4.593537380361326, "learning_rate": 1.4626348288509465e-05, "loss": 0.7602, "step": 7787 }, { "epoch": 1.0996893532900311, "grad_norm": 3.258452204600365, "learning_rate": 1.4624996732611284e-05, "loss": 0.5972, "step": 7788 }, { "epoch": 1.099830556340017, "grad_norm": 3.6441556434184337, "learning_rate": 1.4623645069230174e-05, "loss": 0.5343, "step": 7789 }, { "epoch": 1.099971759390003, "grad_norm": 3.316857697131328, "learning_rate": 1.4622293298397554e-05, "loss": 0.5353, "step": 7790 }, { "epoch": 1.1001129624399888, "grad_norm": 3.9569567406955803, "learning_rate": 1.4620941420144828e-05, "loss": 0.7519, "step": 7791 }, { "epoch": 1.1002541654899747, "grad_norm": 3.954889100787794, "learning_rate": 1.4619589434503426e-05, "loss": 0.6567, "step": 7792 }, { "epoch": 1.1003953685399606, "grad_norm": 4.374201804369765, "learning_rate": 1.4618237341504754e-05, "loss": 0.7825, "step": 7793 }, { "epoch": 1.1005365715899464, "grad_norm": 3.450342345875821, "learning_rate": 1.4616885141180244e-05, "loss": 0.5746, "step": 7794 }, { "epoch": 1.1006777746399323, "grad_norm": 2.837036113805083, "learning_rate": 1.4615532833561317e-05, "loss": 0.5259, "step": 7795 }, { "epoch": 1.1008189776899182, "grad_norm": 3.257726215233877, "learning_rate": 1.46141804186794e-05, "loss": 0.6069, "step": 7796 }, { "epoch": 1.100960180739904, "grad_norm": 3.241984490455417, "learning_rate": 1.4612827896565922e-05, "loss": 0.594, "step": 7797 }, { "epoch": 1.10110138378989, "grad_norm": 3.0874217907495543, "learning_rate": 1.4611475267252318e-05, "loss": 0.6056, "step": 7798 }, { "epoch": 1.1012425868398759, "grad_norm": 3.205784122610946, "learning_rate": 1.461012253077002e-05, "loss": 0.4582, "step": 7799 }, { "epoch": 1.1013837898898615, "grad_norm": 4.483148965402953, "learning_rate": 1.4608769687150459e-05, "loss": 0.6732, "step": 7800 }, { "epoch": 1.1015249929398474, "grad_norm": 2.8650037634397996, "learning_rate": 1.460741673642509e-05, "loss": 0.467, "step": 7801 }, { "epoch": 1.1016661959898333, "grad_norm": 3.95813080443705, "learning_rate": 1.460606367862534e-05, "loss": 0.6976, "step": 7802 }, { "epoch": 1.1018073990398192, "grad_norm": 3.3221092910405416, "learning_rate": 1.460471051378266e-05, "loss": 0.6026, "step": 7803 }, { "epoch": 1.101948602089805, "grad_norm": 3.872561137847206, "learning_rate": 1.4603357241928499e-05, "loss": 0.5976, "step": 7804 }, { "epoch": 1.102089805139791, "grad_norm": 3.573724705465519, "learning_rate": 1.4602003863094303e-05, "loss": 0.5184, "step": 7805 }, { "epoch": 1.1022310081897768, "grad_norm": 3.5048546940005334, "learning_rate": 1.4600650377311523e-05, "loss": 0.5797, "step": 7806 }, { "epoch": 1.1023722112397627, "grad_norm": 3.3487253908055004, "learning_rate": 1.4599296784611617e-05, "loss": 0.509, "step": 7807 }, { "epoch": 1.1025134142897486, "grad_norm": 4.160982313432065, "learning_rate": 1.4597943085026037e-05, "loss": 0.6656, "step": 7808 }, { "epoch": 1.1026546173397345, "grad_norm": 4.183974074568474, "learning_rate": 1.4596589278586248e-05, "loss": 0.712, "step": 7809 }, { "epoch": 1.1027958203897203, "grad_norm": 3.428799258318688, "learning_rate": 1.459523536532371e-05, "loss": 0.5621, "step": 7810 }, { "epoch": 1.1029370234397062, "grad_norm": 3.2063473124525905, "learning_rate": 1.4593881345269885e-05, "loss": 0.4692, "step": 7811 }, { "epoch": 1.1030782264896921, "grad_norm": 3.646138489855479, "learning_rate": 1.459252721845624e-05, "loss": 0.5596, "step": 7812 }, { "epoch": 1.103219429539678, "grad_norm": 3.2485433122195793, "learning_rate": 1.4591172984914248e-05, "loss": 0.6639, "step": 7813 }, { "epoch": 1.1033606325896639, "grad_norm": 3.8247334888845494, "learning_rate": 1.4589818644675378e-05, "loss": 0.581, "step": 7814 }, { "epoch": 1.1035018356396498, "grad_norm": 4.413777695502822, "learning_rate": 1.4588464197771106e-05, "loss": 0.7267, "step": 7815 }, { "epoch": 1.1036430386896356, "grad_norm": 3.250930815663188, "learning_rate": 1.4587109644232906e-05, "loss": 0.5478, "step": 7816 }, { "epoch": 1.1037842417396215, "grad_norm": 3.1614385866756836, "learning_rate": 1.4585754984092257e-05, "loss": 0.601, "step": 7817 }, { "epoch": 1.1039254447896074, "grad_norm": 3.8377236357632, "learning_rate": 1.4584400217380647e-05, "loss": 0.6595, "step": 7818 }, { "epoch": 1.1040666478395933, "grad_norm": 3.0901686672976214, "learning_rate": 1.4583045344129556e-05, "loss": 0.4225, "step": 7819 }, { "epoch": 1.1042078508895792, "grad_norm": 3.3799772139983215, "learning_rate": 1.4581690364370466e-05, "loss": 0.6498, "step": 7820 }, { "epoch": 1.104349053939565, "grad_norm": 2.6768283998672073, "learning_rate": 1.4580335278134873e-05, "loss": 0.4169, "step": 7821 }, { "epoch": 1.104490256989551, "grad_norm": 4.164338851058762, "learning_rate": 1.4578980085454268e-05, "loss": 0.7127, "step": 7822 }, { "epoch": 1.1046314600395368, "grad_norm": 4.188348294921656, "learning_rate": 1.4577624786360141e-05, "loss": 0.6602, "step": 7823 }, { "epoch": 1.1047726630895227, "grad_norm": 3.5550282058570644, "learning_rate": 1.4576269380883992e-05, "loss": 0.605, "step": 7824 }, { "epoch": 1.1049138661395086, "grad_norm": 3.588329475121107, "learning_rate": 1.4574913869057319e-05, "loss": 0.6488, "step": 7825 }, { "epoch": 1.1050550691894945, "grad_norm": 2.885477416623634, "learning_rate": 1.4573558250911624e-05, "loss": 0.5544, "step": 7826 }, { "epoch": 1.1051962722394804, "grad_norm": 4.131872877766003, "learning_rate": 1.457220252647841e-05, "loss": 0.7971, "step": 7827 }, { "epoch": 1.1053374752894662, "grad_norm": 4.0065120227389315, "learning_rate": 1.457084669578918e-05, "loss": 0.5347, "step": 7828 }, { "epoch": 1.1054786783394521, "grad_norm": 3.343720472098241, "learning_rate": 1.4569490758875451e-05, "loss": 0.5742, "step": 7829 }, { "epoch": 1.105619881389438, "grad_norm": 3.3412583197230674, "learning_rate": 1.4568134715768727e-05, "loss": 0.58, "step": 7830 }, { "epoch": 1.105761084439424, "grad_norm": 3.0735209976000526, "learning_rate": 1.4566778566500528e-05, "loss": 0.546, "step": 7831 }, { "epoch": 1.1059022874894098, "grad_norm": 3.75119416276313, "learning_rate": 1.4565422311102367e-05, "loss": 0.7095, "step": 7832 }, { "epoch": 1.1060434905393957, "grad_norm": 3.9449037858382305, "learning_rate": 1.4564065949605763e-05, "loss": 0.5789, "step": 7833 }, { "epoch": 1.1061846935893815, "grad_norm": 4.300112711832745, "learning_rate": 1.4562709482042237e-05, "loss": 0.6504, "step": 7834 }, { "epoch": 1.1063258966393674, "grad_norm": 3.3098782902314756, "learning_rate": 1.4561352908443313e-05, "loss": 0.5767, "step": 7835 }, { "epoch": 1.1064670996893533, "grad_norm": 2.9959811622569035, "learning_rate": 1.4559996228840518e-05, "loss": 0.5827, "step": 7836 }, { "epoch": 1.1066083027393392, "grad_norm": 3.5476613947421116, "learning_rate": 1.4558639443265379e-05, "loss": 0.5903, "step": 7837 }, { "epoch": 1.106749505789325, "grad_norm": 2.96902744974858, "learning_rate": 1.4557282551749428e-05, "loss": 0.4487, "step": 7838 }, { "epoch": 1.106890708839311, "grad_norm": 3.232105054044569, "learning_rate": 1.45559255543242e-05, "loss": 0.569, "step": 7839 }, { "epoch": 1.1070319118892968, "grad_norm": 4.107841451718482, "learning_rate": 1.455456845102123e-05, "loss": 0.681, "step": 7840 }, { "epoch": 1.1071731149392827, "grad_norm": 3.508393237420716, "learning_rate": 1.4553211241872054e-05, "loss": 0.6274, "step": 7841 }, { "epoch": 1.1073143179892686, "grad_norm": 3.5155265034841285, "learning_rate": 1.4551853926908215e-05, "loss": 0.6972, "step": 7842 }, { "epoch": 1.1074555210392545, "grad_norm": 3.3399668811061, "learning_rate": 1.4550496506161258e-05, "loss": 0.5138, "step": 7843 }, { "epoch": 1.1075967240892404, "grad_norm": 4.012000541125126, "learning_rate": 1.454913897966273e-05, "loss": 0.728, "step": 7844 }, { "epoch": 1.1077379271392263, "grad_norm": 3.634471627724593, "learning_rate": 1.454778134744417e-05, "loss": 0.6325, "step": 7845 }, { "epoch": 1.1078791301892121, "grad_norm": 3.7984707366269386, "learning_rate": 1.454642360953714e-05, "loss": 0.6501, "step": 7846 }, { "epoch": 1.108020333239198, "grad_norm": 3.1373729854239922, "learning_rate": 1.454506576597319e-05, "loss": 0.5193, "step": 7847 }, { "epoch": 1.108161536289184, "grad_norm": 3.753705835873736, "learning_rate": 1.454370781678387e-05, "loss": 0.6931, "step": 7848 }, { "epoch": 1.1083027393391698, "grad_norm": 3.869339341721548, "learning_rate": 1.4542349762000747e-05, "loss": 0.7346, "step": 7849 }, { "epoch": 1.1084439423891557, "grad_norm": 3.623323755066261, "learning_rate": 1.4540991601655374e-05, "loss": 0.7027, "step": 7850 }, { "epoch": 1.1085851454391416, "grad_norm": 3.233144135909736, "learning_rate": 1.4539633335779318e-05, "loss": 0.536, "step": 7851 }, { "epoch": 1.1087263484891274, "grad_norm": 3.8755719844220744, "learning_rate": 1.4538274964404146e-05, "loss": 0.5846, "step": 7852 }, { "epoch": 1.1088675515391133, "grad_norm": 2.795064000339568, "learning_rate": 1.4536916487561423e-05, "loss": 0.4815, "step": 7853 }, { "epoch": 1.1090087545890992, "grad_norm": 2.8778644159903095, "learning_rate": 1.4535557905282716e-05, "loss": 0.48, "step": 7854 }, { "epoch": 1.109149957639085, "grad_norm": 3.8060882304694115, "learning_rate": 1.4534199217599608e-05, "loss": 0.6187, "step": 7855 }, { "epoch": 1.109291160689071, "grad_norm": 3.8226041953001153, "learning_rate": 1.4532840424543664e-05, "loss": 0.6253, "step": 7856 }, { "epoch": 1.1094323637390568, "grad_norm": 3.2786791425761947, "learning_rate": 1.453148152614647e-05, "loss": 0.6214, "step": 7857 }, { "epoch": 1.1095735667890427, "grad_norm": 4.371592262850529, "learning_rate": 1.4530122522439598e-05, "loss": 0.6048, "step": 7858 }, { "epoch": 1.1097147698390286, "grad_norm": 2.7640635834712053, "learning_rate": 1.4528763413454638e-05, "loss": 0.4284, "step": 7859 }, { "epoch": 1.1098559728890145, "grad_norm": 3.474302012448272, "learning_rate": 1.4527404199223173e-05, "loss": 0.646, "step": 7860 }, { "epoch": 1.1099971759390004, "grad_norm": 3.789964134041785, "learning_rate": 1.4526044879776788e-05, "loss": 0.708, "step": 7861 }, { "epoch": 1.1101383789889863, "grad_norm": 3.3456972625337857, "learning_rate": 1.4524685455147071e-05, "loss": 0.6955, "step": 7862 }, { "epoch": 1.110279582038972, "grad_norm": 3.5345505032357742, "learning_rate": 1.4523325925365623e-05, "loss": 0.5629, "step": 7863 }, { "epoch": 1.1104207850889578, "grad_norm": 3.433202813113254, "learning_rate": 1.4521966290464033e-05, "loss": 0.582, "step": 7864 }, { "epoch": 1.1105619881389437, "grad_norm": 3.8992703100265147, "learning_rate": 1.45206065504739e-05, "loss": 0.6374, "step": 7865 }, { "epoch": 1.1107031911889296, "grad_norm": 3.3194222320322506, "learning_rate": 1.4519246705426822e-05, "loss": 0.5824, "step": 7866 }, { "epoch": 1.1108443942389155, "grad_norm": 3.644766909108725, "learning_rate": 1.4517886755354403e-05, "loss": 0.6885, "step": 7867 }, { "epoch": 1.1109855972889013, "grad_norm": 3.8811833698450986, "learning_rate": 1.4516526700288243e-05, "loss": 0.6531, "step": 7868 }, { "epoch": 1.1111268003388872, "grad_norm": 3.4941044355056015, "learning_rate": 1.4515166540259957e-05, "loss": 0.5669, "step": 7869 }, { "epoch": 1.111268003388873, "grad_norm": 3.97471037815537, "learning_rate": 1.451380627530115e-05, "loss": 0.7076, "step": 7870 }, { "epoch": 1.111409206438859, "grad_norm": 3.330975150699957, "learning_rate": 1.4512445905443433e-05, "loss": 0.5384, "step": 7871 }, { "epoch": 1.1115504094888449, "grad_norm": 3.775330749671015, "learning_rate": 1.4511085430718423e-05, "loss": 0.5937, "step": 7872 }, { "epoch": 1.1116916125388308, "grad_norm": 3.322317146681728, "learning_rate": 1.450972485115774e-05, "loss": 0.4898, "step": 7873 }, { "epoch": 1.1118328155888166, "grad_norm": 3.3695921498495927, "learning_rate": 1.4508364166792993e-05, "loss": 0.6871, "step": 7874 }, { "epoch": 1.1119740186388025, "grad_norm": 3.475986923491055, "learning_rate": 1.4507003377655813e-05, "loss": 0.581, "step": 7875 }, { "epoch": 1.1121152216887884, "grad_norm": 3.8631442375011202, "learning_rate": 1.450564248377782e-05, "loss": 0.6962, "step": 7876 }, { "epoch": 1.1122564247387743, "grad_norm": 3.1832464689064746, "learning_rate": 1.4504281485190639e-05, "loss": 0.4989, "step": 7877 }, { "epoch": 1.1123976277887602, "grad_norm": 3.595818630366584, "learning_rate": 1.4502920381925905e-05, "loss": 0.5927, "step": 7878 }, { "epoch": 1.112538830838746, "grad_norm": 3.5225167747688184, "learning_rate": 1.4501559174015245e-05, "loss": 0.5516, "step": 7879 }, { "epoch": 1.112680033888732, "grad_norm": 3.6428752986088786, "learning_rate": 1.4500197861490293e-05, "loss": 0.7149, "step": 7880 }, { "epoch": 1.1128212369387178, "grad_norm": 3.4019754795215404, "learning_rate": 1.4498836444382689e-05, "loss": 0.6277, "step": 7881 }, { "epoch": 1.1129624399887037, "grad_norm": 3.282833009947888, "learning_rate": 1.4497474922724064e-05, "loss": 0.5469, "step": 7882 }, { "epoch": 1.1131036430386896, "grad_norm": 3.9334948964230683, "learning_rate": 1.4496113296546068e-05, "loss": 0.7585, "step": 7883 }, { "epoch": 1.1132448460886755, "grad_norm": 2.9847821444186597, "learning_rate": 1.4494751565880338e-05, "loss": 0.4719, "step": 7884 }, { "epoch": 1.1133860491386613, "grad_norm": 2.742510621037806, "learning_rate": 1.4493389730758525e-05, "loss": 0.475, "step": 7885 }, { "epoch": 1.1135272521886472, "grad_norm": 2.9885406855049736, "learning_rate": 1.4492027791212275e-05, "loss": 0.5707, "step": 7886 }, { "epoch": 1.1136684552386331, "grad_norm": 3.634929104410084, "learning_rate": 1.4490665747273238e-05, "loss": 0.638, "step": 7887 }, { "epoch": 1.113809658288619, "grad_norm": 3.872864363694069, "learning_rate": 1.4489303598973067e-05, "loss": 0.6473, "step": 7888 }, { "epoch": 1.1139508613386049, "grad_norm": 3.737025603394471, "learning_rate": 1.4487941346343423e-05, "loss": 0.7725, "step": 7889 }, { "epoch": 1.1140920643885908, "grad_norm": 3.3132694666127147, "learning_rate": 1.448657898941596e-05, "loss": 0.5655, "step": 7890 }, { "epoch": 1.1142332674385766, "grad_norm": 3.4368769639969665, "learning_rate": 1.4485216528222336e-05, "loss": 0.5794, "step": 7891 }, { "epoch": 1.1143744704885625, "grad_norm": 3.403826365906247, "learning_rate": 1.4483853962794218e-05, "loss": 0.6242, "step": 7892 }, { "epoch": 1.1145156735385484, "grad_norm": 3.4006330416533155, "learning_rate": 1.4482491293163273e-05, "loss": 0.4992, "step": 7893 }, { "epoch": 1.1146568765885343, "grad_norm": 2.928934174469643, "learning_rate": 1.4481128519361163e-05, "loss": 0.5215, "step": 7894 }, { "epoch": 1.1147980796385202, "grad_norm": 3.7987835059519264, "learning_rate": 1.4479765641419561e-05, "loss": 0.6305, "step": 7895 }, { "epoch": 1.114939282688506, "grad_norm": 3.344805030712181, "learning_rate": 1.4478402659370144e-05, "loss": 0.5601, "step": 7896 }, { "epoch": 1.115080485738492, "grad_norm": 4.2006141075272385, "learning_rate": 1.4477039573244577e-05, "loss": 0.7168, "step": 7897 }, { "epoch": 1.1152216887884778, "grad_norm": 3.7491839616705884, "learning_rate": 1.447567638307455e-05, "loss": 0.7401, "step": 7898 }, { "epoch": 1.1153628918384637, "grad_norm": 4.776126246322276, "learning_rate": 1.4474313088891734e-05, "loss": 0.7319, "step": 7899 }, { "epoch": 1.1155040948884496, "grad_norm": 3.1544416331419365, "learning_rate": 1.4472949690727813e-05, "loss": 0.5832, "step": 7900 }, { "epoch": 1.1156452979384355, "grad_norm": 4.608978134426898, "learning_rate": 1.4471586188614475e-05, "loss": 0.8741, "step": 7901 }, { "epoch": 1.1157865009884214, "grad_norm": 3.0758037812421106, "learning_rate": 1.4470222582583404e-05, "loss": 0.5281, "step": 7902 }, { "epoch": 1.1159277040384072, "grad_norm": 3.56456740530919, "learning_rate": 1.4468858872666292e-05, "loss": 0.523, "step": 7903 }, { "epoch": 1.1160689070883931, "grad_norm": 3.9068822992952565, "learning_rate": 1.4467495058894829e-05, "loss": 0.6262, "step": 7904 }, { "epoch": 1.116210110138379, "grad_norm": 3.5907728510669803, "learning_rate": 1.4466131141300708e-05, "loss": 0.6103, "step": 7905 }, { "epoch": 1.116351313188365, "grad_norm": 4.010818038853137, "learning_rate": 1.446476711991563e-05, "loss": 0.8251, "step": 7906 }, { "epoch": 1.1164925162383508, "grad_norm": 4.970793172637347, "learning_rate": 1.4463402994771295e-05, "loss": 0.8508, "step": 7907 }, { "epoch": 1.1166337192883367, "grad_norm": 2.7989165483081897, "learning_rate": 1.4462038765899397e-05, "loss": 0.5315, "step": 7908 }, { "epoch": 1.1167749223383225, "grad_norm": 3.705768698215458, "learning_rate": 1.4460674433331647e-05, "loss": 0.7603, "step": 7909 }, { "epoch": 1.1169161253883084, "grad_norm": 3.0329925131468527, "learning_rate": 1.445930999709975e-05, "loss": 0.5263, "step": 7910 }, { "epoch": 1.1170573284382943, "grad_norm": 2.9226169271081814, "learning_rate": 1.4457945457235416e-05, "loss": 0.5062, "step": 7911 }, { "epoch": 1.1171985314882802, "grad_norm": 2.7144345965121945, "learning_rate": 1.4456580813770353e-05, "loss": 0.573, "step": 7912 }, { "epoch": 1.117339734538266, "grad_norm": 3.13229739693429, "learning_rate": 1.4455216066736278e-05, "loss": 0.5061, "step": 7913 }, { "epoch": 1.117480937588252, "grad_norm": 2.9767180097400336, "learning_rate": 1.4453851216164902e-05, "loss": 0.5517, "step": 7914 }, { "epoch": 1.1176221406382378, "grad_norm": 2.8957543255816645, "learning_rate": 1.4452486262087953e-05, "loss": 0.5066, "step": 7915 }, { "epoch": 1.1177633436882237, "grad_norm": 3.6619997337740426, "learning_rate": 1.4451121204537144e-05, "loss": 0.6799, "step": 7916 }, { "epoch": 1.1179045467382096, "grad_norm": 3.36245773607999, "learning_rate": 1.4449756043544197e-05, "loss": 0.6311, "step": 7917 }, { "epoch": 1.1180457497881955, "grad_norm": 3.476816275683861, "learning_rate": 1.4448390779140844e-05, "loss": 0.6594, "step": 7918 }, { "epoch": 1.1181869528381814, "grad_norm": 3.902083070889247, "learning_rate": 1.444702541135881e-05, "loss": 0.81, "step": 7919 }, { "epoch": 1.1183281558881673, "grad_norm": 3.184969466696758, "learning_rate": 1.4445659940229827e-05, "loss": 0.5088, "step": 7920 }, { "epoch": 1.1184693589381531, "grad_norm": 3.3340183550948614, "learning_rate": 1.4444294365785627e-05, "loss": 0.5875, "step": 7921 }, { "epoch": 1.118610561988139, "grad_norm": 4.639294049035789, "learning_rate": 1.4442928688057945e-05, "loss": 0.8231, "step": 7922 }, { "epoch": 1.118751765038125, "grad_norm": 3.6958372337249155, "learning_rate": 1.4441562907078515e-05, "loss": 0.6819, "step": 7923 }, { "epoch": 1.1188929680881108, "grad_norm": 3.8052564827946473, "learning_rate": 1.4440197022879088e-05, "loss": 0.6306, "step": 7924 }, { "epoch": 1.1190341711380967, "grad_norm": 3.5808524087844447, "learning_rate": 1.4438831035491392e-05, "loss": 0.577, "step": 7925 }, { "epoch": 1.1191753741880826, "grad_norm": 3.9595287617811064, "learning_rate": 1.4437464944947186e-05, "loss": 0.5724, "step": 7926 }, { "epoch": 1.1193165772380684, "grad_norm": 3.769728453735003, "learning_rate": 1.4436098751278209e-05, "loss": 0.73, "step": 7927 }, { "epoch": 1.1194577802880543, "grad_norm": 3.8000047314658465, "learning_rate": 1.4434732454516214e-05, "loss": 0.6584, "step": 7928 }, { "epoch": 1.1195989833380402, "grad_norm": 3.442824658082659, "learning_rate": 1.443336605469295e-05, "loss": 0.6977, "step": 7929 }, { "epoch": 1.119740186388026, "grad_norm": 3.4098277596310584, "learning_rate": 1.4431999551840175e-05, "loss": 0.5824, "step": 7930 }, { "epoch": 1.119881389438012, "grad_norm": 3.508687191325764, "learning_rate": 1.4430632945989643e-05, "loss": 0.6234, "step": 7931 }, { "epoch": 1.1200225924879978, "grad_norm": 3.0475893400491856, "learning_rate": 1.4429266237173116e-05, "loss": 0.5084, "step": 7932 }, { "epoch": 1.1201637955379837, "grad_norm": 4.239280685901594, "learning_rate": 1.4427899425422354e-05, "loss": 0.6969, "step": 7933 }, { "epoch": 1.1203049985879696, "grad_norm": 3.5199850270247652, "learning_rate": 1.442653251076912e-05, "loss": 0.6244, "step": 7934 }, { "epoch": 1.1204462016379555, "grad_norm": 3.6987858934057654, "learning_rate": 1.4425165493245183e-05, "loss": 0.5922, "step": 7935 }, { "epoch": 1.1205874046879412, "grad_norm": 3.4517869791055675, "learning_rate": 1.4423798372882315e-05, "loss": 0.5901, "step": 7936 }, { "epoch": 1.120728607737927, "grad_norm": 2.877165692422827, "learning_rate": 1.442243114971228e-05, "loss": 0.5053, "step": 7937 }, { "epoch": 1.120869810787913, "grad_norm": 3.840989322478067, "learning_rate": 1.4421063823766855e-05, "loss": 0.6688, "step": 7938 }, { "epoch": 1.1210110138378988, "grad_norm": 2.9147339201257236, "learning_rate": 1.4419696395077816e-05, "loss": 0.5499, "step": 7939 }, { "epoch": 1.1211522168878847, "grad_norm": 3.855218325141458, "learning_rate": 1.441832886367694e-05, "loss": 0.6966, "step": 7940 }, { "epoch": 1.1212934199378706, "grad_norm": 3.616300304653319, "learning_rate": 1.4416961229596013e-05, "loss": 0.6364, "step": 7941 }, { "epoch": 1.1214346229878565, "grad_norm": 3.994384697940939, "learning_rate": 1.441559349286681e-05, "loss": 0.6097, "step": 7942 }, { "epoch": 1.1215758260378423, "grad_norm": 3.5763703165175356, "learning_rate": 1.4414225653521125e-05, "loss": 0.6927, "step": 7943 }, { "epoch": 1.1217170290878282, "grad_norm": 3.1831095652791, "learning_rate": 1.4412857711590742e-05, "loss": 0.5492, "step": 7944 }, { "epoch": 1.121858232137814, "grad_norm": 4.196942667327182, "learning_rate": 1.441148966710745e-05, "loss": 0.7177, "step": 7945 }, { "epoch": 1.1219994351878, "grad_norm": 4.237336048233413, "learning_rate": 1.4410121520103045e-05, "loss": 0.6281, "step": 7946 }, { "epoch": 1.1221406382377859, "grad_norm": 3.3103023495428046, "learning_rate": 1.4408753270609318e-05, "loss": 0.5448, "step": 7947 }, { "epoch": 1.1222818412877718, "grad_norm": 3.2081056077624037, "learning_rate": 1.440738491865807e-05, "loss": 0.5696, "step": 7948 }, { "epoch": 1.1224230443377576, "grad_norm": 3.5149348505291873, "learning_rate": 1.44060164642811e-05, "loss": 0.648, "step": 7949 }, { "epoch": 1.1225642473877435, "grad_norm": 4.092007143277288, "learning_rate": 1.4404647907510212e-05, "loss": 0.6178, "step": 7950 }, { "epoch": 1.1227054504377294, "grad_norm": 4.103672124058433, "learning_rate": 1.4403279248377207e-05, "loss": 0.6801, "step": 7951 }, { "epoch": 1.1228466534877153, "grad_norm": 3.72210190572086, "learning_rate": 1.4401910486913892e-05, "loss": 0.7458, "step": 7952 }, { "epoch": 1.1229878565377012, "grad_norm": 3.4625328086633274, "learning_rate": 1.4400541623152084e-05, "loss": 0.6479, "step": 7953 }, { "epoch": 1.123129059587687, "grad_norm": 3.9122006971772265, "learning_rate": 1.4399172657123584e-05, "loss": 0.6577, "step": 7954 }, { "epoch": 1.123270262637673, "grad_norm": 3.220392244991836, "learning_rate": 1.4397803588860213e-05, "loss": 0.5251, "step": 7955 }, { "epoch": 1.1234114656876588, "grad_norm": 3.497784611356537, "learning_rate": 1.4396434418393786e-05, "loss": 0.6971, "step": 7956 }, { "epoch": 1.1235526687376447, "grad_norm": 3.831570551997078, "learning_rate": 1.4395065145756118e-05, "loss": 0.6927, "step": 7957 }, { "epoch": 1.1236938717876306, "grad_norm": 3.317478727476699, "learning_rate": 1.4393695770979038e-05, "loss": 0.5294, "step": 7958 }, { "epoch": 1.1238350748376165, "grad_norm": 3.413107251677628, "learning_rate": 1.4392326294094365e-05, "loss": 0.6854, "step": 7959 }, { "epoch": 1.1239762778876023, "grad_norm": 3.0376805576092005, "learning_rate": 1.4390956715133928e-05, "loss": 0.5363, "step": 7960 }, { "epoch": 1.1241174809375882, "grad_norm": 3.6833449997947074, "learning_rate": 1.438958703412955e-05, "loss": 0.5677, "step": 7961 }, { "epoch": 1.1242586839875741, "grad_norm": 3.1442189866414827, "learning_rate": 1.4388217251113068e-05, "loss": 0.514, "step": 7962 }, { "epoch": 1.12439988703756, "grad_norm": 3.4723783741248346, "learning_rate": 1.4386847366116313e-05, "loss": 0.5331, "step": 7963 }, { "epoch": 1.1245410900875459, "grad_norm": 2.999395593588539, "learning_rate": 1.4385477379171116e-05, "loss": 0.5202, "step": 7964 }, { "epoch": 1.1246822931375318, "grad_norm": 3.756574244699541, "learning_rate": 1.4384107290309317e-05, "loss": 0.706, "step": 7965 }, { "epoch": 1.1248234961875176, "grad_norm": 2.943707967649355, "learning_rate": 1.4382737099562765e-05, "loss": 0.61, "step": 7966 }, { "epoch": 1.1249646992375035, "grad_norm": 3.295947557751666, "learning_rate": 1.4381366806963291e-05, "loss": 0.5894, "step": 7967 }, { "epoch": 1.1251059022874894, "grad_norm": 3.9318882015101493, "learning_rate": 1.4379996412542742e-05, "loss": 0.5938, "step": 7968 }, { "epoch": 1.1252471053374753, "grad_norm": 3.2779360004570157, "learning_rate": 1.4378625916332971e-05, "loss": 0.6174, "step": 7969 }, { "epoch": 1.1253883083874612, "grad_norm": 3.5026178708860396, "learning_rate": 1.4377255318365828e-05, "loss": 0.6425, "step": 7970 }, { "epoch": 1.125529511437447, "grad_norm": 3.933042695927975, "learning_rate": 1.4375884618673156e-05, "loss": 0.798, "step": 7971 }, { "epoch": 1.125670714487433, "grad_norm": 3.625369035897644, "learning_rate": 1.4374513817286819e-05, "loss": 0.5973, "step": 7972 }, { "epoch": 1.1258119175374188, "grad_norm": 3.1590705228041998, "learning_rate": 1.4373142914238669e-05, "loss": 0.5434, "step": 7973 }, { "epoch": 1.1259531205874047, "grad_norm": 4.38318676199993, "learning_rate": 1.4371771909560566e-05, "loss": 0.7327, "step": 7974 }, { "epoch": 1.1260943236373906, "grad_norm": 3.6657412804290535, "learning_rate": 1.4370400803284374e-05, "loss": 0.6319, "step": 7975 }, { "epoch": 1.1262355266873765, "grad_norm": 2.7155885274217315, "learning_rate": 1.4369029595441953e-05, "loss": 0.557, "step": 7976 }, { "epoch": 1.1263767297373624, "grad_norm": 3.6045340765390037, "learning_rate": 1.4367658286065168e-05, "loss": 0.6744, "step": 7977 }, { "epoch": 1.1265179327873482, "grad_norm": 3.8483966727508845, "learning_rate": 1.4366286875185895e-05, "loss": 0.6875, "step": 7978 }, { "epoch": 1.1266591358373341, "grad_norm": 3.619813848703416, "learning_rate": 1.4364915362835999e-05, "loss": 0.6724, "step": 7979 }, { "epoch": 1.12680033888732, "grad_norm": 3.5649810468131324, "learning_rate": 1.4363543749047354e-05, "loss": 0.6184, "step": 7980 }, { "epoch": 1.126941541937306, "grad_norm": 4.372705927484826, "learning_rate": 1.436217203385184e-05, "loss": 0.7917, "step": 7981 }, { "epoch": 1.1270827449872918, "grad_norm": 3.204333960688123, "learning_rate": 1.436080021728133e-05, "loss": 0.5321, "step": 7982 }, { "epoch": 1.1272239480372777, "grad_norm": 3.081117257736331, "learning_rate": 1.4359428299367707e-05, "loss": 0.5845, "step": 7983 }, { "epoch": 1.1273651510872635, "grad_norm": 2.3727841395344753, "learning_rate": 1.4358056280142851e-05, "loss": 0.4592, "step": 7984 }, { "epoch": 1.1275063541372494, "grad_norm": 3.7718101687526215, "learning_rate": 1.4356684159638647e-05, "loss": 0.7019, "step": 7985 }, { "epoch": 1.1276475571872353, "grad_norm": 4.401860308044719, "learning_rate": 1.4355311937886988e-05, "loss": 0.8887, "step": 7986 }, { "epoch": 1.1277887602372212, "grad_norm": 3.7985635063426937, "learning_rate": 1.4353939614919763e-05, "loss": 0.5451, "step": 7987 }, { "epoch": 1.127929963287207, "grad_norm": 3.5271589522757716, "learning_rate": 1.4352567190768859e-05, "loss": 0.5388, "step": 7988 }, { "epoch": 1.128071166337193, "grad_norm": 4.0730218919175005, "learning_rate": 1.4351194665466173e-05, "loss": 0.6806, "step": 7989 }, { "epoch": 1.1282123693871788, "grad_norm": 3.3661866266448643, "learning_rate": 1.4349822039043602e-05, "loss": 0.5363, "step": 7990 }, { "epoch": 1.1283535724371647, "grad_norm": 3.580187011081078, "learning_rate": 1.4348449311533043e-05, "loss": 0.5436, "step": 7991 }, { "epoch": 1.1284947754871506, "grad_norm": 3.0649271113437626, "learning_rate": 1.4347076482966404e-05, "loss": 0.4958, "step": 7992 }, { "epoch": 1.1286359785371365, "grad_norm": 3.109031077366945, "learning_rate": 1.4345703553375585e-05, "loss": 0.5205, "step": 7993 }, { "epoch": 1.1287771815871224, "grad_norm": 3.9196086108273427, "learning_rate": 1.4344330522792489e-05, "loss": 0.7327, "step": 7994 }, { "epoch": 1.1289183846371083, "grad_norm": 4.007752223044708, "learning_rate": 1.434295739124903e-05, "loss": 0.6488, "step": 7995 }, { "epoch": 1.1290595876870941, "grad_norm": 3.2037687643685695, "learning_rate": 1.4341584158777115e-05, "loss": 0.5453, "step": 7996 }, { "epoch": 1.12920079073708, "grad_norm": 4.8321674792305265, "learning_rate": 1.4340210825408663e-05, "loss": 0.7488, "step": 7997 }, { "epoch": 1.1293419937870657, "grad_norm": 3.1158658712236322, "learning_rate": 1.4338837391175582e-05, "loss": 0.5715, "step": 7998 }, { "epoch": 1.1294831968370516, "grad_norm": 4.384202269682889, "learning_rate": 1.4337463856109796e-05, "loss": 0.5558, "step": 7999 }, { "epoch": 1.1296243998870374, "grad_norm": 3.459322116799393, "learning_rate": 1.4336090220243222e-05, "loss": 0.5719, "step": 8000 }, { "epoch": 1.1297656029370233, "grad_norm": 4.088307199936969, "learning_rate": 1.4334716483607785e-05, "loss": 0.7876, "step": 8001 }, { "epoch": 1.1299068059870092, "grad_norm": 4.929629642313057, "learning_rate": 1.4333342646235407e-05, "loss": 0.8954, "step": 8002 }, { "epoch": 1.130048009036995, "grad_norm": 3.6197237824251167, "learning_rate": 1.433196870815802e-05, "loss": 0.6106, "step": 8003 }, { "epoch": 1.130189212086981, "grad_norm": 4.036390912956681, "learning_rate": 1.433059466940755e-05, "loss": 0.6077, "step": 8004 }, { "epoch": 1.1303304151369669, "grad_norm": 3.464229162365807, "learning_rate": 1.4329220530015928e-05, "loss": 0.5948, "step": 8005 }, { "epoch": 1.1304716181869527, "grad_norm": 3.2297970336805366, "learning_rate": 1.4327846290015094e-05, "loss": 0.5848, "step": 8006 }, { "epoch": 1.1306128212369386, "grad_norm": 3.1033205424633983, "learning_rate": 1.4326471949436977e-05, "loss": 0.5241, "step": 8007 }, { "epoch": 1.1307540242869245, "grad_norm": 3.563990061420415, "learning_rate": 1.4325097508313524e-05, "loss": 0.5485, "step": 8008 }, { "epoch": 1.1308952273369104, "grad_norm": 3.2317305462116876, "learning_rate": 1.4323722966676671e-05, "loss": 0.6201, "step": 8009 }, { "epoch": 1.1310364303868963, "grad_norm": 3.4773652501691275, "learning_rate": 1.4322348324558366e-05, "loss": 0.5111, "step": 8010 }, { "epoch": 1.1311776334368822, "grad_norm": 2.9880192659032, "learning_rate": 1.4320973581990548e-05, "loss": 0.5407, "step": 8011 }, { "epoch": 1.131318836486868, "grad_norm": 3.7692026447828604, "learning_rate": 1.4319598739005174e-05, "loss": 0.643, "step": 8012 }, { "epoch": 1.131460039536854, "grad_norm": 3.6703571986758794, "learning_rate": 1.431822379563419e-05, "loss": 0.7077, "step": 8013 }, { "epoch": 1.1316012425868398, "grad_norm": 3.3797312949930096, "learning_rate": 1.431684875190955e-05, "loss": 0.5443, "step": 8014 }, { "epoch": 1.1317424456368257, "grad_norm": 3.3223370247385913, "learning_rate": 1.4315473607863206e-05, "loss": 0.4807, "step": 8015 }, { "epoch": 1.1318836486868116, "grad_norm": 4.5717501029329926, "learning_rate": 1.4314098363527122e-05, "loss": 0.6158, "step": 8016 }, { "epoch": 1.1320248517367975, "grad_norm": 4.420681644562101, "learning_rate": 1.4312723018933257e-05, "loss": 0.7762, "step": 8017 }, { "epoch": 1.1321660547867833, "grad_norm": 3.5560078947528018, "learning_rate": 1.431134757411357e-05, "loss": 0.5924, "step": 8018 }, { "epoch": 1.1323072578367692, "grad_norm": 3.326806329440733, "learning_rate": 1.4309972029100026e-05, "loss": 0.5385, "step": 8019 }, { "epoch": 1.132448460886755, "grad_norm": 3.0965945327170585, "learning_rate": 1.4308596383924593e-05, "loss": 0.5705, "step": 8020 }, { "epoch": 1.132589663936741, "grad_norm": 4.532091990120125, "learning_rate": 1.4307220638619244e-05, "loss": 0.8211, "step": 8021 }, { "epoch": 1.1327308669867269, "grad_norm": 3.3676096115847067, "learning_rate": 1.4305844793215943e-05, "loss": 0.6195, "step": 8022 }, { "epoch": 1.1328720700367128, "grad_norm": 4.505915134112574, "learning_rate": 1.4304468847746673e-05, "loss": 0.7486, "step": 8023 }, { "epoch": 1.1330132730866986, "grad_norm": 3.9072039099521194, "learning_rate": 1.4303092802243404e-05, "loss": 0.6677, "step": 8024 }, { "epoch": 1.1331544761366845, "grad_norm": 3.371072509361814, "learning_rate": 1.4301716656738116e-05, "loss": 0.5629, "step": 8025 }, { "epoch": 1.1332956791866704, "grad_norm": 3.319081543617124, "learning_rate": 1.4300340411262792e-05, "loss": 0.6208, "step": 8026 }, { "epoch": 1.1334368822366563, "grad_norm": 3.33618824226898, "learning_rate": 1.4298964065849412e-05, "loss": 0.552, "step": 8027 }, { "epoch": 1.1335780852866422, "grad_norm": 4.134918301489257, "learning_rate": 1.4297587620529965e-05, "loss": 0.7376, "step": 8028 }, { "epoch": 1.133719288336628, "grad_norm": 3.417682290141868, "learning_rate": 1.4296211075336437e-05, "loss": 0.6469, "step": 8029 }, { "epoch": 1.133860491386614, "grad_norm": 3.942233335081193, "learning_rate": 1.4294834430300822e-05, "loss": 0.6173, "step": 8030 }, { "epoch": 1.1340016944365998, "grad_norm": 3.3919699016721148, "learning_rate": 1.4293457685455106e-05, "loss": 0.7197, "step": 8031 }, { "epoch": 1.1341428974865857, "grad_norm": 3.262675953380986, "learning_rate": 1.4292080840831289e-05, "loss": 0.5181, "step": 8032 }, { "epoch": 1.1342841005365716, "grad_norm": 3.669778902693423, "learning_rate": 1.4290703896461367e-05, "loss": 0.6159, "step": 8033 }, { "epoch": 1.1344253035865575, "grad_norm": 3.272941509852461, "learning_rate": 1.4289326852377339e-05, "loss": 0.642, "step": 8034 }, { "epoch": 1.1345665066365433, "grad_norm": 3.950748063640626, "learning_rate": 1.428794970861121e-05, "loss": 0.6363, "step": 8035 }, { "epoch": 1.1347077096865292, "grad_norm": 3.0700171775654463, "learning_rate": 1.4286572465194977e-05, "loss": 0.5542, "step": 8036 }, { "epoch": 1.1348489127365151, "grad_norm": 3.59033521870062, "learning_rate": 1.4285195122160652e-05, "loss": 0.7387, "step": 8037 }, { "epoch": 1.134990115786501, "grad_norm": 4.415996621530364, "learning_rate": 1.4283817679540246e-05, "loss": 0.8377, "step": 8038 }, { "epoch": 1.1351313188364869, "grad_norm": 3.1221454208875237, "learning_rate": 1.4282440137365762e-05, "loss": 0.6553, "step": 8039 }, { "epoch": 1.1352725218864728, "grad_norm": 2.588264556454677, "learning_rate": 1.4281062495669224e-05, "loss": 0.471, "step": 8040 }, { "epoch": 1.1354137249364586, "grad_norm": 3.5446844992314044, "learning_rate": 1.427968475448264e-05, "loss": 0.5623, "step": 8041 }, { "epoch": 1.1355549279864445, "grad_norm": 3.0308874592429778, "learning_rate": 1.4278306913838034e-05, "loss": 0.6026, "step": 8042 }, { "epoch": 1.1356961310364304, "grad_norm": 3.887352159434565, "learning_rate": 1.4276928973767418e-05, "loss": 0.6056, "step": 8043 }, { "epoch": 1.1358373340864163, "grad_norm": 4.405600416081371, "learning_rate": 1.4275550934302822e-05, "loss": 0.789, "step": 8044 }, { "epoch": 1.1359785371364022, "grad_norm": 3.3246968497931495, "learning_rate": 1.4274172795476267e-05, "loss": 0.6166, "step": 8045 }, { "epoch": 1.136119740186388, "grad_norm": 3.726343988740029, "learning_rate": 1.4272794557319785e-05, "loss": 0.6553, "step": 8046 }, { "epoch": 1.136260943236374, "grad_norm": 3.5068900025639045, "learning_rate": 1.4271416219865403e-05, "loss": 0.5735, "step": 8047 }, { "epoch": 1.1364021462863598, "grad_norm": 3.866590982362031, "learning_rate": 1.427003778314515e-05, "loss": 0.6142, "step": 8048 }, { "epoch": 1.1365433493363457, "grad_norm": 3.499273493846996, "learning_rate": 1.4268659247191066e-05, "loss": 0.6598, "step": 8049 }, { "epoch": 1.1366845523863316, "grad_norm": 3.4072538372772736, "learning_rate": 1.426728061203518e-05, "loss": 0.4915, "step": 8050 }, { "epoch": 1.1368257554363175, "grad_norm": 3.529738661978145, "learning_rate": 1.4265901877709541e-05, "loss": 0.5996, "step": 8051 }, { "epoch": 1.1369669584863034, "grad_norm": 3.968659093978716, "learning_rate": 1.426452304424618e-05, "loss": 0.7222, "step": 8052 }, { "epoch": 1.1371081615362892, "grad_norm": 3.776310659022357, "learning_rate": 1.4263144111677148e-05, "loss": 0.6406, "step": 8053 }, { "epoch": 1.1372493645862751, "grad_norm": 3.690271177317174, "learning_rate": 1.4261765080034487e-05, "loss": 0.5826, "step": 8054 }, { "epoch": 1.137390567636261, "grad_norm": 4.458457864471298, "learning_rate": 1.4260385949350247e-05, "loss": 0.7813, "step": 8055 }, { "epoch": 1.137531770686247, "grad_norm": 3.848854527383235, "learning_rate": 1.4259006719656476e-05, "loss": 0.6369, "step": 8056 }, { "epoch": 1.1376729737362328, "grad_norm": 3.3458802726988686, "learning_rate": 1.4257627390985227e-05, "loss": 0.5097, "step": 8057 }, { "epoch": 1.1378141767862187, "grad_norm": 3.630745009448528, "learning_rate": 1.425624796336856e-05, "loss": 0.707, "step": 8058 }, { "epoch": 1.1379553798362045, "grad_norm": 3.7527264943354566, "learning_rate": 1.4254868436838525e-05, "loss": 0.6749, "step": 8059 }, { "epoch": 1.1380965828861904, "grad_norm": 3.584071361580181, "learning_rate": 1.4253488811427188e-05, "loss": 0.6799, "step": 8060 }, { "epoch": 1.1382377859361763, "grad_norm": 3.543826670830915, "learning_rate": 1.4252109087166605e-05, "loss": 0.5211, "step": 8061 }, { "epoch": 1.1383789889861622, "grad_norm": 3.3088601323032742, "learning_rate": 1.4250729264088845e-05, "loss": 0.5936, "step": 8062 }, { "epoch": 1.138520192036148, "grad_norm": 3.407415926900606, "learning_rate": 1.4249349342225971e-05, "loss": 0.6009, "step": 8063 }, { "epoch": 1.138661395086134, "grad_norm": 3.528219698526353, "learning_rate": 1.4247969321610055e-05, "loss": 0.5294, "step": 8064 }, { "epoch": 1.1388025981361198, "grad_norm": 3.238406072603319, "learning_rate": 1.4246589202273167e-05, "loss": 0.5156, "step": 8065 }, { "epoch": 1.1389438011861057, "grad_norm": 3.2240887520519483, "learning_rate": 1.4245208984247378e-05, "loss": 0.5919, "step": 8066 }, { "epoch": 1.1390850042360916, "grad_norm": 3.569261097170348, "learning_rate": 1.4243828667564767e-05, "loss": 0.6271, "step": 8067 }, { "epoch": 1.1392262072860775, "grad_norm": 3.442920369538943, "learning_rate": 1.424244825225741e-05, "loss": 0.6958, "step": 8068 }, { "epoch": 1.1393674103360634, "grad_norm": 3.019791690585976, "learning_rate": 1.4241067738357388e-05, "loss": 0.5936, "step": 8069 }, { "epoch": 1.1395086133860493, "grad_norm": 3.995494347722344, "learning_rate": 1.4239687125896783e-05, "loss": 0.5098, "step": 8070 }, { "epoch": 1.1396498164360351, "grad_norm": 3.3361149793804152, "learning_rate": 1.4238306414907678e-05, "loss": 0.6189, "step": 8071 }, { "epoch": 1.139791019486021, "grad_norm": 3.5075362882396846, "learning_rate": 1.423692560542217e-05, "loss": 0.5747, "step": 8072 }, { "epoch": 1.139932222536007, "grad_norm": 3.322814817587759, "learning_rate": 1.4235544697472335e-05, "loss": 0.5696, "step": 8073 }, { "epoch": 1.1400734255859926, "grad_norm": 4.3845571669174435, "learning_rate": 1.4234163691090269e-05, "loss": 0.6784, "step": 8074 }, { "epoch": 1.1402146286359784, "grad_norm": 3.307965748163738, "learning_rate": 1.4232782586308069e-05, "loss": 0.6278, "step": 8075 }, { "epoch": 1.1403558316859643, "grad_norm": 3.5317557617565427, "learning_rate": 1.423140138315783e-05, "loss": 0.5998, "step": 8076 }, { "epoch": 1.1404970347359502, "grad_norm": 3.0436316704137756, "learning_rate": 1.4230020081671651e-05, "loss": 0.5646, "step": 8077 }, { "epoch": 1.140638237785936, "grad_norm": 3.7729434338464216, "learning_rate": 1.4228638681881633e-05, "loss": 0.6073, "step": 8078 }, { "epoch": 1.140779440835922, "grad_norm": 3.512487627709849, "learning_rate": 1.4227257183819876e-05, "loss": 0.6482, "step": 8079 }, { "epoch": 1.1409206438859079, "grad_norm": 3.0292147493064947, "learning_rate": 1.4225875587518485e-05, "loss": 0.5401, "step": 8080 }, { "epoch": 1.1410618469358937, "grad_norm": 3.463318237144269, "learning_rate": 1.4224493893009577e-05, "loss": 0.5587, "step": 8081 }, { "epoch": 1.1412030499858796, "grad_norm": 4.09600474933207, "learning_rate": 1.422311210032525e-05, "loss": 0.5597, "step": 8082 }, { "epoch": 1.1413442530358655, "grad_norm": 3.525488485618092, "learning_rate": 1.4221730209497625e-05, "loss": 0.5314, "step": 8083 }, { "epoch": 1.1414854560858514, "grad_norm": 2.6917574992551256, "learning_rate": 1.422034822055881e-05, "loss": 0.4601, "step": 8084 }, { "epoch": 1.1416266591358373, "grad_norm": 3.11161014729673, "learning_rate": 1.4218966133540928e-05, "loss": 0.5073, "step": 8085 }, { "epoch": 1.1417678621858232, "grad_norm": 2.9471924638202216, "learning_rate": 1.4217583948476094e-05, "loss": 0.4656, "step": 8086 }, { "epoch": 1.141909065235809, "grad_norm": 4.400216782181998, "learning_rate": 1.4216201665396429e-05, "loss": 0.5948, "step": 8087 }, { "epoch": 1.142050268285795, "grad_norm": 3.6545419479568952, "learning_rate": 1.4214819284334058e-05, "loss": 0.697, "step": 8088 }, { "epoch": 1.1421914713357808, "grad_norm": 3.3098133376144876, "learning_rate": 1.421343680532111e-05, "loss": 0.5342, "step": 8089 }, { "epoch": 1.1423326743857667, "grad_norm": 4.303964373610726, "learning_rate": 1.4212054228389712e-05, "loss": 0.6712, "step": 8090 }, { "epoch": 1.1424738774357526, "grad_norm": 3.172856528258202, "learning_rate": 1.4210671553571986e-05, "loss": 0.5141, "step": 8091 }, { "epoch": 1.1426150804857385, "grad_norm": 3.897681278890643, "learning_rate": 1.4209288780900074e-05, "loss": 0.668, "step": 8092 }, { "epoch": 1.1427562835357243, "grad_norm": 3.382455034506384, "learning_rate": 1.4207905910406113e-05, "loss": 0.6563, "step": 8093 }, { "epoch": 1.1428974865857102, "grad_norm": 3.560304051930665, "learning_rate": 1.4206522942122232e-05, "loss": 0.6969, "step": 8094 }, { "epoch": 1.143038689635696, "grad_norm": 3.479739387146171, "learning_rate": 1.4205139876080574e-05, "loss": 0.6168, "step": 8095 }, { "epoch": 1.143179892685682, "grad_norm": 3.048107947315489, "learning_rate": 1.4203756712313284e-05, "loss": 0.5107, "step": 8096 }, { "epoch": 1.1433210957356679, "grad_norm": 3.771851907862194, "learning_rate": 1.42023734508525e-05, "loss": 0.6714, "step": 8097 }, { "epoch": 1.1434622987856538, "grad_norm": 2.7350997776684087, "learning_rate": 1.4200990091730376e-05, "loss": 0.4566, "step": 8098 }, { "epoch": 1.1436035018356396, "grad_norm": 4.271817291250578, "learning_rate": 1.4199606634979054e-05, "loss": 0.7686, "step": 8099 }, { "epoch": 1.1437447048856255, "grad_norm": 3.096084920790781, "learning_rate": 1.4198223080630686e-05, "loss": 0.5297, "step": 8100 }, { "epoch": 1.1438859079356114, "grad_norm": 3.4597248211942895, "learning_rate": 1.4196839428717428e-05, "loss": 0.5803, "step": 8101 }, { "epoch": 1.1440271109855973, "grad_norm": 3.385262987980981, "learning_rate": 1.4195455679271435e-05, "loss": 0.5413, "step": 8102 }, { "epoch": 1.1441683140355832, "grad_norm": 3.745718931335684, "learning_rate": 1.4194071832324863e-05, "loss": 0.6381, "step": 8103 }, { "epoch": 1.144309517085569, "grad_norm": 4.609173837585445, "learning_rate": 1.4192687887909875e-05, "loss": 0.6194, "step": 8104 }, { "epoch": 1.144450720135555, "grad_norm": 4.347929334789232, "learning_rate": 1.4191303846058628e-05, "loss": 0.6238, "step": 8105 }, { "epoch": 1.1445919231855408, "grad_norm": 3.722237735465618, "learning_rate": 1.4189919706803292e-05, "loss": 0.6349, "step": 8106 }, { "epoch": 1.1447331262355267, "grad_norm": 3.390300131956305, "learning_rate": 1.4188535470176033e-05, "loss": 0.6367, "step": 8107 }, { "epoch": 1.1448743292855126, "grad_norm": 3.584121022673777, "learning_rate": 1.4187151136209015e-05, "loss": 0.622, "step": 8108 }, { "epoch": 1.1450155323354985, "grad_norm": 4.256336235160171, "learning_rate": 1.4185766704934411e-05, "loss": 0.6814, "step": 8109 }, { "epoch": 1.1451567353854843, "grad_norm": 3.4519615154053307, "learning_rate": 1.4184382176384403e-05, "loss": 0.5797, "step": 8110 }, { "epoch": 1.1452979384354702, "grad_norm": 4.425484809236835, "learning_rate": 1.4182997550591155e-05, "loss": 0.6675, "step": 8111 }, { "epoch": 1.1454391414854561, "grad_norm": 4.1076512916247605, "learning_rate": 1.4181612827586854e-05, "loss": 0.6802, "step": 8112 }, { "epoch": 1.145580344535442, "grad_norm": 3.50289977568371, "learning_rate": 1.4180228007403676e-05, "loss": 0.5075, "step": 8113 }, { "epoch": 1.1457215475854279, "grad_norm": 4.148552016269906, "learning_rate": 1.4178843090073802e-05, "loss": 0.6524, "step": 8114 }, { "epoch": 1.1458627506354138, "grad_norm": 3.377993296146897, "learning_rate": 1.4177458075629422e-05, "loss": 0.5914, "step": 8115 }, { "epoch": 1.1460039536853996, "grad_norm": 3.8656728075330564, "learning_rate": 1.4176072964102719e-05, "loss": 0.7054, "step": 8116 }, { "epoch": 1.1461451567353855, "grad_norm": 3.0973466588485508, "learning_rate": 1.4174687755525882e-05, "loss": 0.5151, "step": 8117 }, { "epoch": 1.1462863597853714, "grad_norm": 4.913310481951282, "learning_rate": 1.4173302449931107e-05, "loss": 0.8259, "step": 8118 }, { "epoch": 1.1464275628353573, "grad_norm": 3.3256874468735, "learning_rate": 1.4171917047350586e-05, "loss": 0.5903, "step": 8119 }, { "epoch": 1.1465687658853432, "grad_norm": 4.173796831917979, "learning_rate": 1.4170531547816513e-05, "loss": 0.7197, "step": 8120 }, { "epoch": 1.146709968935329, "grad_norm": 2.7219784884728053, "learning_rate": 1.416914595136109e-05, "loss": 0.4772, "step": 8121 }, { "epoch": 1.146851171985315, "grad_norm": 3.5822018814537167, "learning_rate": 1.4167760258016514e-05, "loss": 0.7387, "step": 8122 }, { "epoch": 1.1469923750353008, "grad_norm": 3.4049410457458102, "learning_rate": 1.416637446781499e-05, "loss": 0.5843, "step": 8123 }, { "epoch": 1.1471335780852867, "grad_norm": 3.497128157445856, "learning_rate": 1.4164988580788723e-05, "loss": 0.6104, "step": 8124 }, { "epoch": 1.1472747811352726, "grad_norm": 4.218975688142539, "learning_rate": 1.4163602596969917e-05, "loss": 0.6136, "step": 8125 }, { "epoch": 1.1474159841852585, "grad_norm": 3.3338549557085395, "learning_rate": 1.4162216516390787e-05, "loss": 0.6635, "step": 8126 }, { "epoch": 1.1475571872352444, "grad_norm": 3.2847238133362837, "learning_rate": 1.4160830339083544e-05, "loss": 0.6439, "step": 8127 }, { "epoch": 1.1476983902852302, "grad_norm": 2.9964596840582503, "learning_rate": 1.4159444065080398e-05, "loss": 0.5001, "step": 8128 }, { "epoch": 1.1478395933352161, "grad_norm": 3.9432677693612135, "learning_rate": 1.4158057694413572e-05, "loss": 0.6083, "step": 8129 }, { "epoch": 1.147980796385202, "grad_norm": 3.6007401494381934, "learning_rate": 1.4156671227115278e-05, "loss": 0.6174, "step": 8130 }, { "epoch": 1.148121999435188, "grad_norm": 2.6909012564739156, "learning_rate": 1.415528466321774e-05, "loss": 0.4757, "step": 8131 }, { "epoch": 1.1482632024851738, "grad_norm": 3.640285119891178, "learning_rate": 1.4153898002753184e-05, "loss": 0.661, "step": 8132 }, { "epoch": 1.1484044055351597, "grad_norm": 3.620044653348565, "learning_rate": 1.4152511245753828e-05, "loss": 0.6509, "step": 8133 }, { "epoch": 1.1485456085851453, "grad_norm": 2.8746923393774253, "learning_rate": 1.4151124392251905e-05, "loss": 0.4544, "step": 8134 }, { "epoch": 1.1486868116351312, "grad_norm": 3.004189653586562, "learning_rate": 1.4149737442279645e-05, "loss": 0.5559, "step": 8135 }, { "epoch": 1.148828014685117, "grad_norm": 3.821614570217079, "learning_rate": 1.4148350395869279e-05, "loss": 0.5988, "step": 8136 }, { "epoch": 1.148969217735103, "grad_norm": 3.790070055383577, "learning_rate": 1.414696325305304e-05, "loss": 0.65, "step": 8137 }, { "epoch": 1.1491104207850888, "grad_norm": 3.0046950491815836, "learning_rate": 1.414557601386317e-05, "loss": 0.4735, "step": 8138 }, { "epoch": 1.1492516238350747, "grad_norm": 3.369014208563364, "learning_rate": 1.41441886783319e-05, "loss": 0.6184, "step": 8139 }, { "epoch": 1.1493928268850606, "grad_norm": 4.459355891006322, "learning_rate": 1.4142801246491476e-05, "loss": 0.7221, "step": 8140 }, { "epoch": 1.1495340299350465, "grad_norm": 3.775783415908075, "learning_rate": 1.414141371837414e-05, "loss": 0.6911, "step": 8141 }, { "epoch": 1.1496752329850324, "grad_norm": 3.507550696299888, "learning_rate": 1.4140026094012136e-05, "loss": 0.6513, "step": 8142 }, { "epoch": 1.1498164360350183, "grad_norm": 4.029857857277068, "learning_rate": 1.4138638373437713e-05, "loss": 0.7478, "step": 8143 }, { "epoch": 1.1499576390850041, "grad_norm": 3.132778643999065, "learning_rate": 1.4137250556683129e-05, "loss": 0.5326, "step": 8144 }, { "epoch": 1.15009884213499, "grad_norm": 4.346210884243398, "learning_rate": 1.413586264378062e-05, "loss": 0.7506, "step": 8145 }, { "epoch": 1.150240045184976, "grad_norm": 4.50166634545284, "learning_rate": 1.4134474634762454e-05, "loss": 0.7582, "step": 8146 }, { "epoch": 1.1503812482349618, "grad_norm": 4.356499918691893, "learning_rate": 1.4133086529660881e-05, "loss": 0.6666, "step": 8147 }, { "epoch": 1.1505224512849477, "grad_norm": 3.579998590848396, "learning_rate": 1.4131698328508163e-05, "loss": 0.6416, "step": 8148 }, { "epoch": 1.1506636543349336, "grad_norm": 2.810639898316791, "learning_rate": 1.413031003133656e-05, "loss": 0.4253, "step": 8149 }, { "epoch": 1.1508048573849194, "grad_norm": 2.8006785903372, "learning_rate": 1.4128921638178336e-05, "loss": 0.5341, "step": 8150 }, { "epoch": 1.1509460604349053, "grad_norm": 3.2209723933840624, "learning_rate": 1.4127533149065756e-05, "loss": 0.5517, "step": 8151 }, { "epoch": 1.1510872634848912, "grad_norm": 3.3408849974865054, "learning_rate": 1.4126144564031085e-05, "loss": 0.5214, "step": 8152 }, { "epoch": 1.151228466534877, "grad_norm": 3.2681881536602817, "learning_rate": 1.4124755883106602e-05, "loss": 0.5476, "step": 8153 }, { "epoch": 1.151369669584863, "grad_norm": 3.6744226824858064, "learning_rate": 1.412336710632457e-05, "loss": 0.5762, "step": 8154 }, { "epoch": 1.1515108726348489, "grad_norm": 2.893841612491844, "learning_rate": 1.4121978233717269e-05, "loss": 0.4545, "step": 8155 }, { "epoch": 1.1516520756848347, "grad_norm": 3.4228677521409887, "learning_rate": 1.4120589265316974e-05, "loss": 0.5936, "step": 8156 }, { "epoch": 1.1517932787348206, "grad_norm": 3.1816121874173215, "learning_rate": 1.4119200201155963e-05, "loss": 0.6313, "step": 8157 }, { "epoch": 1.1519344817848065, "grad_norm": 3.4269762736471745, "learning_rate": 1.4117811041266518e-05, "loss": 0.6248, "step": 8158 }, { "epoch": 1.1520756848347924, "grad_norm": 3.7703587482541274, "learning_rate": 1.4116421785680923e-05, "loss": 0.5943, "step": 8159 }, { "epoch": 1.1522168878847783, "grad_norm": 3.1524325754643026, "learning_rate": 1.4115032434431461e-05, "loss": 0.6141, "step": 8160 }, { "epoch": 1.1523580909347642, "grad_norm": 3.5406119279976993, "learning_rate": 1.411364298755043e-05, "loss": 0.6164, "step": 8161 }, { "epoch": 1.15249929398475, "grad_norm": 3.8682901272025343, "learning_rate": 1.4112253445070103e-05, "loss": 0.6345, "step": 8162 }, { "epoch": 1.152640497034736, "grad_norm": 3.16344294373622, "learning_rate": 1.4110863807022788e-05, "loss": 0.5797, "step": 8163 }, { "epoch": 1.1527817000847218, "grad_norm": 3.119799345128174, "learning_rate": 1.410947407344077e-05, "loss": 0.5673, "step": 8164 }, { "epoch": 1.1529229031347077, "grad_norm": 3.901952948524144, "learning_rate": 1.4108084244356353e-05, "loss": 0.7494, "step": 8165 }, { "epoch": 1.1530641061846936, "grad_norm": 2.698194817560956, "learning_rate": 1.410669431980183e-05, "loss": 0.5093, "step": 8166 }, { "epoch": 1.1532053092346795, "grad_norm": 3.8951359228476368, "learning_rate": 1.4105304299809505e-05, "loss": 0.6469, "step": 8167 }, { "epoch": 1.1533465122846653, "grad_norm": 3.6001222701312257, "learning_rate": 1.410391418441168e-05, "loss": 0.6413, "step": 8168 }, { "epoch": 1.1534877153346512, "grad_norm": 3.728120905409748, "learning_rate": 1.4102523973640665e-05, "loss": 0.5315, "step": 8169 }, { "epoch": 1.153628918384637, "grad_norm": 3.617784104953617, "learning_rate": 1.4101133667528761e-05, "loss": 0.6133, "step": 8170 }, { "epoch": 1.153770121434623, "grad_norm": 4.267306386602099, "learning_rate": 1.4099743266108281e-05, "loss": 0.6904, "step": 8171 }, { "epoch": 1.1539113244846089, "grad_norm": 4.192866002205385, "learning_rate": 1.409835276941154e-05, "loss": 0.6971, "step": 8172 }, { "epoch": 1.1540525275345948, "grad_norm": 3.073096575355924, "learning_rate": 1.4096962177470851e-05, "loss": 0.4921, "step": 8173 }, { "epoch": 1.1541937305845806, "grad_norm": 3.331287449235198, "learning_rate": 1.4095571490318532e-05, "loss": 0.5664, "step": 8174 }, { "epoch": 1.1543349336345665, "grad_norm": 3.176471881744591, "learning_rate": 1.4094180707986897e-05, "loss": 0.5177, "step": 8175 }, { "epoch": 1.1544761366845524, "grad_norm": 3.2591693074042047, "learning_rate": 1.4092789830508271e-05, "loss": 0.5757, "step": 8176 }, { "epoch": 1.1546173397345383, "grad_norm": 3.1566431196366382, "learning_rate": 1.4091398857914978e-05, "loss": 0.5607, "step": 8177 }, { "epoch": 1.1547585427845242, "grad_norm": 3.4869177774061355, "learning_rate": 1.4090007790239344e-05, "loss": 0.556, "step": 8178 }, { "epoch": 1.15489974583451, "grad_norm": 3.852770474018618, "learning_rate": 1.4088616627513691e-05, "loss": 0.6287, "step": 8179 }, { "epoch": 1.155040948884496, "grad_norm": 3.909684332050486, "learning_rate": 1.4087225369770356e-05, "loss": 0.5752, "step": 8180 }, { "epoch": 1.1551821519344818, "grad_norm": 3.2785300018096626, "learning_rate": 1.4085834017041669e-05, "loss": 0.5948, "step": 8181 }, { "epoch": 1.1553233549844677, "grad_norm": 3.6689428787647045, "learning_rate": 1.4084442569359964e-05, "loss": 0.7646, "step": 8182 }, { "epoch": 1.1554645580344536, "grad_norm": 3.5636362488466005, "learning_rate": 1.4083051026757578e-05, "loss": 0.6114, "step": 8183 }, { "epoch": 1.1556057610844395, "grad_norm": 4.159618274870332, "learning_rate": 1.4081659389266846e-05, "loss": 0.751, "step": 8184 }, { "epoch": 1.1557469641344253, "grad_norm": 3.234703171706695, "learning_rate": 1.4080267656920112e-05, "loss": 0.5834, "step": 8185 }, { "epoch": 1.1558881671844112, "grad_norm": 3.407195289164041, "learning_rate": 1.4078875829749724e-05, "loss": 0.6389, "step": 8186 }, { "epoch": 1.1560293702343971, "grad_norm": 3.4099677016712397, "learning_rate": 1.4077483907788026e-05, "loss": 0.6008, "step": 8187 }, { "epoch": 1.156170573284383, "grad_norm": 3.6868236007994852, "learning_rate": 1.4076091891067355e-05, "loss": 0.6065, "step": 8188 }, { "epoch": 1.1563117763343689, "grad_norm": 3.6144039531676664, "learning_rate": 1.4074699779620073e-05, "loss": 0.7386, "step": 8189 }, { "epoch": 1.1564529793843548, "grad_norm": 3.270569326735332, "learning_rate": 1.4073307573478528e-05, "loss": 0.5111, "step": 8190 }, { "epoch": 1.1565941824343406, "grad_norm": 2.9213847594137974, "learning_rate": 1.4071915272675074e-05, "loss": 0.467, "step": 8191 }, { "epoch": 1.1567353854843265, "grad_norm": 2.842145383359047, "learning_rate": 1.4070522877242068e-05, "loss": 0.4333, "step": 8192 }, { "epoch": 1.1568765885343124, "grad_norm": 4.6565940521982725, "learning_rate": 1.4069130387211867e-05, "loss": 0.6103, "step": 8193 }, { "epoch": 1.1570177915842983, "grad_norm": 4.012139964648908, "learning_rate": 1.4067737802616832e-05, "loss": 0.668, "step": 8194 }, { "epoch": 1.1571589946342842, "grad_norm": 3.6080963740655254, "learning_rate": 1.406634512348933e-05, "loss": 0.6923, "step": 8195 }, { "epoch": 1.15730019768427, "grad_norm": 3.202099071341995, "learning_rate": 1.406495234986172e-05, "loss": 0.5884, "step": 8196 }, { "epoch": 1.157441400734256, "grad_norm": 2.6115673601981837, "learning_rate": 1.4063559481766375e-05, "loss": 0.4009, "step": 8197 }, { "epoch": 1.1575826037842418, "grad_norm": 3.2237665247432212, "learning_rate": 1.4062166519235665e-05, "loss": 0.5402, "step": 8198 }, { "epoch": 1.1577238068342277, "grad_norm": 3.9092128260486354, "learning_rate": 1.4060773462301955e-05, "loss": 0.6783, "step": 8199 }, { "epoch": 1.1578650098842136, "grad_norm": 2.8350704980508246, "learning_rate": 1.4059380310997626e-05, "loss": 0.4654, "step": 8200 }, { "epoch": 1.1580062129341995, "grad_norm": 3.6829213796043256, "learning_rate": 1.4057987065355052e-05, "loss": 0.7002, "step": 8201 }, { "epoch": 1.1581474159841854, "grad_norm": 3.571936527405481, "learning_rate": 1.4056593725406608e-05, "loss": 0.6275, "step": 8202 }, { "epoch": 1.1582886190341712, "grad_norm": 3.739708326509807, "learning_rate": 1.4055200291184683e-05, "loss": 0.5984, "step": 8203 }, { "epoch": 1.1584298220841571, "grad_norm": 3.850302344740976, "learning_rate": 1.4053806762721652e-05, "loss": 0.5563, "step": 8204 }, { "epoch": 1.158571025134143, "grad_norm": 3.518784388758248, "learning_rate": 1.4052413140049898e-05, "loss": 0.5231, "step": 8205 }, { "epoch": 1.158712228184129, "grad_norm": 3.607101718310263, "learning_rate": 1.405101942320182e-05, "loss": 0.6024, "step": 8206 }, { "epoch": 1.1588534312341148, "grad_norm": 3.7631804541130554, "learning_rate": 1.4049625612209797e-05, "loss": 0.5965, "step": 8207 }, { "epoch": 1.1589946342841007, "grad_norm": 4.784680716999602, "learning_rate": 1.4048231707106222e-05, "loss": 0.7696, "step": 8208 }, { "epoch": 1.1591358373340865, "grad_norm": 3.3307427819564515, "learning_rate": 1.4046837707923493e-05, "loss": 0.5908, "step": 8209 }, { "epoch": 1.1592770403840722, "grad_norm": 3.919756688641482, "learning_rate": 1.4045443614694004e-05, "loss": 0.6526, "step": 8210 }, { "epoch": 1.159418243434058, "grad_norm": 3.298925983746374, "learning_rate": 1.4044049427450146e-05, "loss": 0.5117, "step": 8211 }, { "epoch": 1.159559446484044, "grad_norm": 3.028725467685604, "learning_rate": 1.4042655146224333e-05, "loss": 0.5214, "step": 8212 }, { "epoch": 1.1597006495340298, "grad_norm": 3.685974020426489, "learning_rate": 1.4041260771048958e-05, "loss": 0.6354, "step": 8213 }, { "epoch": 1.1598418525840157, "grad_norm": 3.913447815072083, "learning_rate": 1.4039866301956427e-05, "loss": 0.7092, "step": 8214 }, { "epoch": 1.1599830556340016, "grad_norm": 2.926529419536438, "learning_rate": 1.403847173897915e-05, "loss": 0.5114, "step": 8215 }, { "epoch": 1.1601242586839875, "grad_norm": 3.89838089151196, "learning_rate": 1.4037077082149533e-05, "loss": 0.638, "step": 8216 }, { "epoch": 1.1602654617339734, "grad_norm": 3.505469362493737, "learning_rate": 1.4035682331499987e-05, "loss": 0.5861, "step": 8217 }, { "epoch": 1.1604066647839593, "grad_norm": 3.649312170653838, "learning_rate": 1.4034287487062928e-05, "loss": 0.5812, "step": 8218 }, { "epoch": 1.1605478678339451, "grad_norm": 3.3418896701069323, "learning_rate": 1.4032892548870769e-05, "loss": 0.552, "step": 8219 }, { "epoch": 1.160689070883931, "grad_norm": 4.645062352186488, "learning_rate": 1.403149751695593e-05, "loss": 0.8188, "step": 8220 }, { "epoch": 1.160830273933917, "grad_norm": 3.38680125481749, "learning_rate": 1.4030102391350828e-05, "loss": 0.5751, "step": 8221 }, { "epoch": 1.1609714769839028, "grad_norm": 3.9927982301599596, "learning_rate": 1.4028707172087885e-05, "loss": 0.6813, "step": 8222 }, { "epoch": 1.1611126800338887, "grad_norm": 3.4371551226942754, "learning_rate": 1.402731185919953e-05, "loss": 0.5199, "step": 8223 }, { "epoch": 1.1612538830838746, "grad_norm": 4.592460265089685, "learning_rate": 1.402591645271819e-05, "loss": 0.7916, "step": 8224 }, { "epoch": 1.1613950861338604, "grad_norm": 3.235586539467011, "learning_rate": 1.4024520952676286e-05, "loss": 0.6107, "step": 8225 }, { "epoch": 1.1615362891838463, "grad_norm": 3.4564133148555287, "learning_rate": 1.4023125359106253e-05, "loss": 0.5892, "step": 8226 }, { "epoch": 1.1616774922338322, "grad_norm": 4.743146425001028, "learning_rate": 1.4021729672040527e-05, "loss": 0.8198, "step": 8227 }, { "epoch": 1.161818695283818, "grad_norm": 3.562759063869971, "learning_rate": 1.4020333891511536e-05, "loss": 0.7455, "step": 8228 }, { "epoch": 1.161959898333804, "grad_norm": 3.6012549564426606, "learning_rate": 1.4018938017551727e-05, "loss": 0.6397, "step": 8229 }, { "epoch": 1.1621011013837899, "grad_norm": 3.571457410026541, "learning_rate": 1.4017542050193532e-05, "loss": 0.5837, "step": 8230 }, { "epoch": 1.1622423044337757, "grad_norm": 2.8428310801874965, "learning_rate": 1.401614598946939e-05, "loss": 0.4914, "step": 8231 }, { "epoch": 1.1623835074837616, "grad_norm": 3.439728449933566, "learning_rate": 1.4014749835411754e-05, "loss": 0.5854, "step": 8232 }, { "epoch": 1.1625247105337475, "grad_norm": 3.5435094927846196, "learning_rate": 1.4013353588053064e-05, "loss": 0.6096, "step": 8233 }, { "epoch": 1.1626659135837334, "grad_norm": 3.411961448348514, "learning_rate": 1.401195724742577e-05, "loss": 0.6951, "step": 8234 }, { "epoch": 1.1628071166337193, "grad_norm": 3.6003960524102663, "learning_rate": 1.4010560813562327e-05, "loss": 0.558, "step": 8235 }, { "epoch": 1.1629483196837052, "grad_norm": 3.5515967227176346, "learning_rate": 1.4009164286495179e-05, "loss": 0.6125, "step": 8236 }, { "epoch": 1.163089522733691, "grad_norm": 3.096296461027036, "learning_rate": 1.4007767666256783e-05, "loss": 0.4314, "step": 8237 }, { "epoch": 1.163230725783677, "grad_norm": 3.350280970181078, "learning_rate": 1.40063709528796e-05, "loss": 0.5526, "step": 8238 }, { "epoch": 1.1633719288336628, "grad_norm": 3.634435620366391, "learning_rate": 1.4004974146396082e-05, "loss": 0.6088, "step": 8239 }, { "epoch": 1.1635131318836487, "grad_norm": 3.431110944127314, "learning_rate": 1.40035772468387e-05, "loss": 0.6168, "step": 8240 }, { "epoch": 1.1636543349336346, "grad_norm": 5.358322257686108, "learning_rate": 1.4002180254239907e-05, "loss": 0.9033, "step": 8241 }, { "epoch": 1.1637955379836205, "grad_norm": 3.253048633261453, "learning_rate": 1.4000783168632178e-05, "loss": 0.494, "step": 8242 }, { "epoch": 1.1639367410336063, "grad_norm": 3.3078880252937957, "learning_rate": 1.3999385990047971e-05, "loss": 0.6062, "step": 8243 }, { "epoch": 1.1640779440835922, "grad_norm": 3.1605970448116856, "learning_rate": 1.3997988718519766e-05, "loss": 0.581, "step": 8244 }, { "epoch": 1.164219147133578, "grad_norm": 4.040110025344713, "learning_rate": 1.3996591354080024e-05, "loss": 0.6096, "step": 8245 }, { "epoch": 1.164360350183564, "grad_norm": 3.2546788620734532, "learning_rate": 1.399519389676123e-05, "loss": 0.566, "step": 8246 }, { "epoch": 1.1645015532335499, "grad_norm": 3.2712374807478715, "learning_rate": 1.3993796346595854e-05, "loss": 0.5439, "step": 8247 }, { "epoch": 1.1646427562835358, "grad_norm": 3.667470317953565, "learning_rate": 1.3992398703616372e-05, "loss": 0.6948, "step": 8248 }, { "epoch": 1.1647839593335216, "grad_norm": 4.168812369375702, "learning_rate": 1.3991000967855272e-05, "loss": 0.7419, "step": 8249 }, { "epoch": 1.1649251623835075, "grad_norm": 3.884926643780467, "learning_rate": 1.3989603139345037e-05, "loss": 0.7391, "step": 8250 }, { "epoch": 1.1650663654334934, "grad_norm": 4.317894937824882, "learning_rate": 1.3988205218118141e-05, "loss": 0.692, "step": 8251 }, { "epoch": 1.1652075684834793, "grad_norm": 3.777476044770958, "learning_rate": 1.3986807204207077e-05, "loss": 0.6687, "step": 8252 }, { "epoch": 1.1653487715334652, "grad_norm": 3.7104032857127898, "learning_rate": 1.398540909764434e-05, "loss": 0.6621, "step": 8253 }, { "epoch": 1.165489974583451, "grad_norm": 3.28607080658845, "learning_rate": 1.3984010898462417e-05, "loss": 0.4879, "step": 8254 }, { "epoch": 1.165631177633437, "grad_norm": 3.4411351596177093, "learning_rate": 1.39826126066938e-05, "loss": 0.5212, "step": 8255 }, { "epoch": 1.1657723806834228, "grad_norm": 3.66304061963536, "learning_rate": 1.3981214222370985e-05, "loss": 0.5571, "step": 8256 }, { "epoch": 1.1659135837334087, "grad_norm": 3.577863090664369, "learning_rate": 1.3979815745526471e-05, "loss": 0.6686, "step": 8257 }, { "epoch": 1.1660547867833946, "grad_norm": 3.503490475286253, "learning_rate": 1.397841717619276e-05, "loss": 0.5411, "step": 8258 }, { "epoch": 1.1661959898333805, "grad_norm": 3.67508426476766, "learning_rate": 1.397701851440235e-05, "loss": 0.7675, "step": 8259 }, { "epoch": 1.1663371928833663, "grad_norm": 3.153221490778555, "learning_rate": 1.3975619760187746e-05, "loss": 0.5166, "step": 8260 }, { "epoch": 1.1664783959333522, "grad_norm": 3.8751180980940476, "learning_rate": 1.3974220913581458e-05, "loss": 0.574, "step": 8261 }, { "epoch": 1.1666195989833381, "grad_norm": 3.644641565064742, "learning_rate": 1.3972821974615991e-05, "loss": 0.6282, "step": 8262 }, { "epoch": 1.166760802033324, "grad_norm": 4.138287659921616, "learning_rate": 1.3971422943323858e-05, "loss": 0.7505, "step": 8263 }, { "epoch": 1.1669020050833099, "grad_norm": 4.496561678500985, "learning_rate": 1.3970023819737573e-05, "loss": 0.9951, "step": 8264 }, { "epoch": 1.1670432081332958, "grad_norm": 3.35614206043222, "learning_rate": 1.3968624603889644e-05, "loss": 0.5212, "step": 8265 }, { "epoch": 1.1671844111832816, "grad_norm": 4.306881089586144, "learning_rate": 1.3967225295812595e-05, "loss": 0.7243, "step": 8266 }, { "epoch": 1.1673256142332675, "grad_norm": 3.765357929367898, "learning_rate": 1.3965825895538947e-05, "loss": 0.6868, "step": 8267 }, { "epoch": 1.1674668172832534, "grad_norm": 3.532371131285415, "learning_rate": 1.3964426403101212e-05, "loss": 0.6491, "step": 8268 }, { "epoch": 1.1676080203332393, "grad_norm": 4.1285447857942605, "learning_rate": 1.3963026818531924e-05, "loss": 0.7461, "step": 8269 }, { "epoch": 1.167749223383225, "grad_norm": 3.298625227096976, "learning_rate": 1.3961627141863603e-05, "loss": 0.5743, "step": 8270 }, { "epoch": 1.1678904264332108, "grad_norm": 3.01311193286848, "learning_rate": 1.3960227373128779e-05, "loss": 0.4528, "step": 8271 }, { "epoch": 1.1680316294831967, "grad_norm": 3.8081752452121638, "learning_rate": 1.3958827512359979e-05, "loss": 0.6205, "step": 8272 }, { "epoch": 1.1681728325331826, "grad_norm": 3.642622664440386, "learning_rate": 1.3957427559589738e-05, "loss": 0.5857, "step": 8273 }, { "epoch": 1.1683140355831685, "grad_norm": 3.785830079389926, "learning_rate": 1.395602751485059e-05, "loss": 0.6389, "step": 8274 }, { "epoch": 1.1684552386331544, "grad_norm": 3.307034370662792, "learning_rate": 1.3954627378175072e-05, "loss": 0.6378, "step": 8275 }, { "epoch": 1.1685964416831403, "grad_norm": 4.053214506615257, "learning_rate": 1.3953227149595718e-05, "loss": 0.6831, "step": 8276 }, { "epoch": 1.1687376447331261, "grad_norm": 2.9480271065795094, "learning_rate": 1.3951826829145074e-05, "loss": 0.5369, "step": 8277 }, { "epoch": 1.168878847783112, "grad_norm": 3.833672446881275, "learning_rate": 1.3950426416855685e-05, "loss": 0.6331, "step": 8278 }, { "epoch": 1.169020050833098, "grad_norm": 2.958382600890173, "learning_rate": 1.3949025912760088e-05, "loss": 0.576, "step": 8279 }, { "epoch": 1.1691612538830838, "grad_norm": 2.9413391028612366, "learning_rate": 1.3947625316890836e-05, "loss": 0.5332, "step": 8280 }, { "epoch": 1.1693024569330697, "grad_norm": 3.174621601032077, "learning_rate": 1.3946224629280476e-05, "loss": 0.5838, "step": 8281 }, { "epoch": 1.1694436599830556, "grad_norm": 4.260416608202992, "learning_rate": 1.3944823849961557e-05, "loss": 0.7717, "step": 8282 }, { "epoch": 1.1695848630330414, "grad_norm": 3.7520727617527476, "learning_rate": 1.394342297896664e-05, "loss": 0.6443, "step": 8283 }, { "epoch": 1.1697260660830273, "grad_norm": 3.222661245377341, "learning_rate": 1.3942022016328276e-05, "loss": 0.59, "step": 8284 }, { "epoch": 1.1698672691330132, "grad_norm": 3.9948122755584676, "learning_rate": 1.3940620962079018e-05, "loss": 0.6516, "step": 8285 }, { "epoch": 1.170008472182999, "grad_norm": 3.6564796644704414, "learning_rate": 1.3939219816251435e-05, "loss": 0.6581, "step": 8286 }, { "epoch": 1.170149675232985, "grad_norm": 4.228637208210019, "learning_rate": 1.3937818578878081e-05, "loss": 0.6694, "step": 8287 }, { "epoch": 1.1702908782829708, "grad_norm": 4.102501999733961, "learning_rate": 1.3936417249991528e-05, "loss": 0.6356, "step": 8288 }, { "epoch": 1.1704320813329567, "grad_norm": 3.095838545828639, "learning_rate": 1.3935015829624337e-05, "loss": 0.6505, "step": 8289 }, { "epoch": 1.1705732843829426, "grad_norm": 3.531918495720782, "learning_rate": 1.3933614317809076e-05, "loss": 0.63, "step": 8290 }, { "epoch": 1.1707144874329285, "grad_norm": 3.4244590728157265, "learning_rate": 1.3932212714578316e-05, "loss": 0.5943, "step": 8291 }, { "epoch": 1.1708556904829144, "grad_norm": 3.5008219869742043, "learning_rate": 1.3930811019964633e-05, "loss": 0.5981, "step": 8292 }, { "epoch": 1.1709968935329003, "grad_norm": 3.8861870520847828, "learning_rate": 1.39294092340006e-05, "loss": 0.6434, "step": 8293 }, { "epoch": 1.1711380965828861, "grad_norm": 4.202121696914486, "learning_rate": 1.3928007356718792e-05, "loss": 0.7005, "step": 8294 }, { "epoch": 1.171279299632872, "grad_norm": 3.3188395465168905, "learning_rate": 1.3926605388151791e-05, "loss": 0.55, "step": 8295 }, { "epoch": 1.171420502682858, "grad_norm": 4.284812885762325, "learning_rate": 1.3925203328332173e-05, "loss": 0.9057, "step": 8296 }, { "epoch": 1.1715617057328438, "grad_norm": 4.002515883312534, "learning_rate": 1.3923801177292529e-05, "loss": 0.5579, "step": 8297 }, { "epoch": 1.1717029087828297, "grad_norm": 3.4002931228706403, "learning_rate": 1.3922398935065437e-05, "loss": 0.5218, "step": 8298 }, { "epoch": 1.1718441118328156, "grad_norm": 3.264394441548996, "learning_rate": 1.392099660168349e-05, "loss": 0.4535, "step": 8299 }, { "epoch": 1.1719853148828014, "grad_norm": 4.208880127254522, "learning_rate": 1.3919594177179272e-05, "loss": 0.659, "step": 8300 }, { "epoch": 1.1721265179327873, "grad_norm": 3.4911476254059566, "learning_rate": 1.3918191661585384e-05, "loss": 0.5719, "step": 8301 }, { "epoch": 1.1722677209827732, "grad_norm": 4.150622998617397, "learning_rate": 1.3916789054934408e-05, "loss": 0.7169, "step": 8302 }, { "epoch": 1.172408924032759, "grad_norm": 3.573390661528358, "learning_rate": 1.3915386357258948e-05, "loss": 0.6264, "step": 8303 }, { "epoch": 1.172550127082745, "grad_norm": 4.4049483944352215, "learning_rate": 1.39139835685916e-05, "loss": 0.7604, "step": 8304 }, { "epoch": 1.1726913301327309, "grad_norm": 3.2214145451369376, "learning_rate": 1.3912580688964964e-05, "loss": 0.4604, "step": 8305 }, { "epoch": 1.1728325331827167, "grad_norm": 4.309509357272494, "learning_rate": 1.3911177718411643e-05, "loss": 0.6167, "step": 8306 }, { "epoch": 1.1729737362327026, "grad_norm": 4.826575652329133, "learning_rate": 1.3909774656964242e-05, "loss": 0.7904, "step": 8307 }, { "epoch": 1.1731149392826885, "grad_norm": 3.7086228340918796, "learning_rate": 1.3908371504655365e-05, "loss": 0.6548, "step": 8308 }, { "epoch": 1.1732561423326744, "grad_norm": 3.7580826952285853, "learning_rate": 1.3906968261517624e-05, "loss": 0.5661, "step": 8309 }, { "epoch": 1.1733973453826603, "grad_norm": 3.6180821137143724, "learning_rate": 1.3905564927583625e-05, "loss": 0.6274, "step": 8310 }, { "epoch": 1.1735385484326462, "grad_norm": 3.415224934719934, "learning_rate": 1.3904161502885985e-05, "loss": 0.4702, "step": 8311 }, { "epoch": 1.173679751482632, "grad_norm": 3.8132477084220304, "learning_rate": 1.3902757987457318e-05, "loss": 0.5765, "step": 8312 }, { "epoch": 1.173820954532618, "grad_norm": 3.26836043729517, "learning_rate": 1.3901354381330241e-05, "loss": 0.5859, "step": 8313 }, { "epoch": 1.1739621575826038, "grad_norm": 3.1226303203555013, "learning_rate": 1.3899950684537371e-05, "loss": 0.5853, "step": 8314 }, { "epoch": 1.1741033606325897, "grad_norm": 3.4058714289576724, "learning_rate": 1.3898546897111334e-05, "loss": 0.5389, "step": 8315 }, { "epoch": 1.1742445636825756, "grad_norm": 3.378011961602447, "learning_rate": 1.3897143019084749e-05, "loss": 0.6296, "step": 8316 }, { "epoch": 1.1743857667325615, "grad_norm": 3.739935881232564, "learning_rate": 1.3895739050490243e-05, "loss": 0.6554, "step": 8317 }, { "epoch": 1.1745269697825473, "grad_norm": 4.143397982433403, "learning_rate": 1.3894334991360448e-05, "loss": 0.7503, "step": 8318 }, { "epoch": 1.1746681728325332, "grad_norm": 3.466208838799215, "learning_rate": 1.3892930841727986e-05, "loss": 0.6546, "step": 8319 }, { "epoch": 1.174809375882519, "grad_norm": 3.7228977069159286, "learning_rate": 1.3891526601625492e-05, "loss": 0.6596, "step": 8320 }, { "epoch": 1.174950578932505, "grad_norm": 4.362461127909217, "learning_rate": 1.3890122271085601e-05, "loss": 0.7648, "step": 8321 }, { "epoch": 1.1750917819824909, "grad_norm": 3.4653020473009013, "learning_rate": 1.388871785014095e-05, "loss": 0.4865, "step": 8322 }, { "epoch": 1.1752329850324768, "grad_norm": 3.6148768352082623, "learning_rate": 1.3887313338824174e-05, "loss": 0.7391, "step": 8323 }, { "epoch": 1.1753741880824626, "grad_norm": 3.398800860716693, "learning_rate": 1.3885908737167918e-05, "loss": 0.6479, "step": 8324 }, { "epoch": 1.1755153911324485, "grad_norm": 3.4081488909869293, "learning_rate": 1.3884504045204813e-05, "loss": 0.6479, "step": 8325 }, { "epoch": 1.1756565941824344, "grad_norm": 3.756416271515069, "learning_rate": 1.3883099262967521e-05, "loss": 0.5735, "step": 8326 }, { "epoch": 1.1757977972324203, "grad_norm": 4.596490944387628, "learning_rate": 1.3881694390488675e-05, "loss": 0.663, "step": 8327 }, { "epoch": 1.1759390002824062, "grad_norm": 3.884484923109837, "learning_rate": 1.3880289427800925e-05, "loss": 0.5288, "step": 8328 }, { "epoch": 1.176080203332392, "grad_norm": 3.464032328949635, "learning_rate": 1.387888437493693e-05, "loss": 0.5348, "step": 8329 }, { "epoch": 1.176221406382378, "grad_norm": 3.8228028977411976, "learning_rate": 1.3877479231929333e-05, "loss": 0.7019, "step": 8330 }, { "epoch": 1.1763626094323638, "grad_norm": 3.685550372350385, "learning_rate": 1.3876073998810794e-05, "loss": 0.6144, "step": 8331 }, { "epoch": 1.1765038124823497, "grad_norm": 3.001103637796497, "learning_rate": 1.3874668675613972e-05, "loss": 0.511, "step": 8332 }, { "epoch": 1.1766450155323356, "grad_norm": 3.1192372408704223, "learning_rate": 1.3873263262371522e-05, "loss": 0.5263, "step": 8333 }, { "epoch": 1.1767862185823215, "grad_norm": 4.0135242614056, "learning_rate": 1.3871857759116104e-05, "loss": 0.6377, "step": 8334 }, { "epoch": 1.1769274216323073, "grad_norm": 3.2673788682814675, "learning_rate": 1.3870452165880389e-05, "loss": 0.5977, "step": 8335 }, { "epoch": 1.1770686246822932, "grad_norm": 3.6566080543905732, "learning_rate": 1.386904648269703e-05, "loss": 0.564, "step": 8336 }, { "epoch": 1.1772098277322791, "grad_norm": 3.313640091179194, "learning_rate": 1.3867640709598706e-05, "loss": 0.5413, "step": 8337 }, { "epoch": 1.177351030782265, "grad_norm": 4.2114623935040125, "learning_rate": 1.3866234846618083e-05, "loss": 0.7549, "step": 8338 }, { "epoch": 1.1774922338322509, "grad_norm": 3.5471479140207154, "learning_rate": 1.3864828893787832e-05, "loss": 0.4768, "step": 8339 }, { "epoch": 1.1776334368822368, "grad_norm": 3.781268653955553, "learning_rate": 1.3863422851140624e-05, "loss": 0.5236, "step": 8340 }, { "epoch": 1.1777746399322226, "grad_norm": 3.914379208872212, "learning_rate": 1.3862016718709139e-05, "loss": 0.6661, "step": 8341 }, { "epoch": 1.1779158429822085, "grad_norm": 4.127524743268275, "learning_rate": 1.386061049652605e-05, "loss": 0.6678, "step": 8342 }, { "epoch": 1.1780570460321944, "grad_norm": 3.8037868577972054, "learning_rate": 1.3859204184624047e-05, "loss": 0.5609, "step": 8343 }, { "epoch": 1.1781982490821803, "grad_norm": 3.74116522162721, "learning_rate": 1.3857797783035803e-05, "loss": 0.6308, "step": 8344 }, { "epoch": 1.1783394521321662, "grad_norm": 3.656034617196702, "learning_rate": 1.3856391291794001e-05, "loss": 0.5531, "step": 8345 }, { "epoch": 1.178480655182152, "grad_norm": 3.055077367158639, "learning_rate": 1.3854984710931332e-05, "loss": 0.5561, "step": 8346 }, { "epoch": 1.1786218582321377, "grad_norm": 3.708459349123848, "learning_rate": 1.3853578040480486e-05, "loss": 0.5627, "step": 8347 }, { "epoch": 1.1787630612821236, "grad_norm": 3.508671324356655, "learning_rate": 1.3852171280474147e-05, "loss": 0.6078, "step": 8348 }, { "epoch": 1.1789042643321095, "grad_norm": 3.4628667839587526, "learning_rate": 1.3850764430945015e-05, "loss": 0.5488, "step": 8349 }, { "epoch": 1.1790454673820954, "grad_norm": 3.658806655153012, "learning_rate": 1.3849357491925779e-05, "loss": 0.5298, "step": 8350 }, { "epoch": 1.1791866704320813, "grad_norm": 3.414388862097112, "learning_rate": 1.3847950463449134e-05, "loss": 0.5949, "step": 8351 }, { "epoch": 1.1793278734820671, "grad_norm": 4.130044502875767, "learning_rate": 1.3846543345547787e-05, "loss": 0.666, "step": 8352 }, { "epoch": 1.179469076532053, "grad_norm": 3.4830137720688636, "learning_rate": 1.3845136138254431e-05, "loss": 0.6961, "step": 8353 }, { "epoch": 1.179610279582039, "grad_norm": 3.241090487093376, "learning_rate": 1.3843728841601771e-05, "loss": 0.5524, "step": 8354 }, { "epoch": 1.1797514826320248, "grad_norm": 3.3497957314281286, "learning_rate": 1.3842321455622516e-05, "loss": 0.6078, "step": 8355 }, { "epoch": 1.1798926856820107, "grad_norm": 3.2634814275418953, "learning_rate": 1.3840913980349366e-05, "loss": 0.5993, "step": 8356 }, { "epoch": 1.1800338887319965, "grad_norm": 3.103644174151894, "learning_rate": 1.3839506415815037e-05, "loss": 0.5211, "step": 8357 }, { "epoch": 1.1801750917819824, "grad_norm": 4.265917003708214, "learning_rate": 1.3838098762052237e-05, "loss": 0.6296, "step": 8358 }, { "epoch": 1.1803162948319683, "grad_norm": 3.9357980891998707, "learning_rate": 1.3836691019093677e-05, "loss": 0.664, "step": 8359 }, { "epoch": 1.1804574978819542, "grad_norm": 2.867139229762761, "learning_rate": 1.3835283186972077e-05, "loss": 0.4986, "step": 8360 }, { "epoch": 1.18059870093194, "grad_norm": 3.206676009379892, "learning_rate": 1.3833875265720151e-05, "loss": 0.5336, "step": 8361 }, { "epoch": 1.180739903981926, "grad_norm": 3.3862288611033167, "learning_rate": 1.3832467255370618e-05, "loss": 0.5648, "step": 8362 }, { "epoch": 1.1808811070319118, "grad_norm": 3.86519326142991, "learning_rate": 1.3831059155956201e-05, "loss": 0.6319, "step": 8363 }, { "epoch": 1.1810223100818977, "grad_norm": 3.7328184709607375, "learning_rate": 1.3829650967509629e-05, "loss": 0.6697, "step": 8364 }, { "epoch": 1.1811635131318836, "grad_norm": 3.904237600076821, "learning_rate": 1.3828242690063616e-05, "loss": 0.6438, "step": 8365 }, { "epoch": 1.1813047161818695, "grad_norm": 3.8741684075295955, "learning_rate": 1.3826834323650899e-05, "loss": 0.7464, "step": 8366 }, { "epoch": 1.1814459192318554, "grad_norm": 3.601145524124925, "learning_rate": 1.3825425868304206e-05, "loss": 0.6681, "step": 8367 }, { "epoch": 1.1815871222818413, "grad_norm": 3.761479434174838, "learning_rate": 1.3824017324056264e-05, "loss": 0.6427, "step": 8368 }, { "epoch": 1.1817283253318271, "grad_norm": 2.5741328798285563, "learning_rate": 1.3822608690939816e-05, "loss": 0.5188, "step": 8369 }, { "epoch": 1.181869528381813, "grad_norm": 3.559961571546636, "learning_rate": 1.382119996898759e-05, "loss": 0.6745, "step": 8370 }, { "epoch": 1.182010731431799, "grad_norm": 3.389592836087978, "learning_rate": 1.3819791158232327e-05, "loss": 0.6109, "step": 8371 }, { "epoch": 1.1821519344817848, "grad_norm": 3.463514448181128, "learning_rate": 1.3818382258706766e-05, "loss": 0.5515, "step": 8372 }, { "epoch": 1.1822931375317707, "grad_norm": 2.8883469560914508, "learning_rate": 1.3816973270443654e-05, "loss": 0.5424, "step": 8373 }, { "epoch": 1.1824343405817566, "grad_norm": 3.477282999663746, "learning_rate": 1.381556419347573e-05, "loss": 0.6608, "step": 8374 }, { "epoch": 1.1825755436317424, "grad_norm": 3.475698718447275, "learning_rate": 1.3814155027835741e-05, "loss": 0.636, "step": 8375 }, { "epoch": 1.1827167466817283, "grad_norm": 3.0706042479970557, "learning_rate": 1.3812745773556438e-05, "loss": 0.5006, "step": 8376 }, { "epoch": 1.1828579497317142, "grad_norm": 3.7607126458555484, "learning_rate": 1.3811336430670568e-05, "loss": 0.6623, "step": 8377 }, { "epoch": 1.1829991527817, "grad_norm": 3.691589382587743, "learning_rate": 1.3809926999210886e-05, "loss": 0.5675, "step": 8378 }, { "epoch": 1.183140355831686, "grad_norm": 3.4228241870042075, "learning_rate": 1.3808517479210143e-05, "loss": 0.6875, "step": 8379 }, { "epoch": 1.1832815588816719, "grad_norm": 3.2513945103152184, "learning_rate": 1.3807107870701102e-05, "loss": 0.47, "step": 8380 }, { "epoch": 1.1834227619316577, "grad_norm": 3.0989333119496743, "learning_rate": 1.3805698173716522e-05, "loss": 0.5975, "step": 8381 }, { "epoch": 1.1835639649816436, "grad_norm": 3.4783733413830644, "learning_rate": 1.3804288388289152e-05, "loss": 0.6491, "step": 8382 }, { "epoch": 1.1837051680316295, "grad_norm": 3.2500485322399215, "learning_rate": 1.3802878514451767e-05, "loss": 0.538, "step": 8383 }, { "epoch": 1.1838463710816154, "grad_norm": 3.1990363014172996, "learning_rate": 1.3801468552237127e-05, "loss": 0.6089, "step": 8384 }, { "epoch": 1.1839875741316013, "grad_norm": 3.8387696230670163, "learning_rate": 1.3800058501678e-05, "loss": 0.6477, "step": 8385 }, { "epoch": 1.1841287771815872, "grad_norm": 3.5741734122717554, "learning_rate": 1.3798648362807154e-05, "loss": 0.5261, "step": 8386 }, { "epoch": 1.184269980231573, "grad_norm": 4.209473239181189, "learning_rate": 1.3797238135657363e-05, "loss": 0.7371, "step": 8387 }, { "epoch": 1.184411183281559, "grad_norm": 3.546938834048846, "learning_rate": 1.3795827820261392e-05, "loss": 0.5975, "step": 8388 }, { "epoch": 1.1845523863315448, "grad_norm": 3.1707022774178952, "learning_rate": 1.3794417416652027e-05, "loss": 0.5959, "step": 8389 }, { "epoch": 1.1846935893815307, "grad_norm": 4.853779947147142, "learning_rate": 1.379300692486204e-05, "loss": 0.7456, "step": 8390 }, { "epoch": 1.1848347924315166, "grad_norm": 3.9485023388192477, "learning_rate": 1.3791596344924206e-05, "loss": 0.5487, "step": 8391 }, { "epoch": 1.1849759954815025, "grad_norm": 3.119625911392598, "learning_rate": 1.3790185676871312e-05, "loss": 0.5593, "step": 8392 }, { "epoch": 1.1851171985314883, "grad_norm": 5.512215982449515, "learning_rate": 1.3788774920736142e-05, "loss": 0.7187, "step": 8393 }, { "epoch": 1.1852584015814742, "grad_norm": 3.787095604393217, "learning_rate": 1.3787364076551478e-05, "loss": 0.726, "step": 8394 }, { "epoch": 1.18539960463146, "grad_norm": 4.282618517932755, "learning_rate": 1.3785953144350108e-05, "loss": 0.7294, "step": 8395 }, { "epoch": 1.185540807681446, "grad_norm": 3.6887867919269666, "learning_rate": 1.3784542124164821e-05, "loss": 0.7391, "step": 8396 }, { "epoch": 1.1856820107314319, "grad_norm": 3.356123967667483, "learning_rate": 1.3783131016028408e-05, "loss": 0.6809, "step": 8397 }, { "epoch": 1.1858232137814178, "grad_norm": 3.567548466275041, "learning_rate": 1.378171981997367e-05, "loss": 0.6609, "step": 8398 }, { "epoch": 1.1859644168314036, "grad_norm": 3.580124029286584, "learning_rate": 1.378030853603339e-05, "loss": 0.6763, "step": 8399 }, { "epoch": 1.1861056198813895, "grad_norm": 2.9652953956030186, "learning_rate": 1.3778897164240378e-05, "loss": 0.5861, "step": 8400 }, { "epoch": 1.1862468229313754, "grad_norm": 4.469148906017059, "learning_rate": 1.3777485704627424e-05, "loss": 0.8899, "step": 8401 }, { "epoch": 1.1863880259813613, "grad_norm": 4.342840984553117, "learning_rate": 1.3776074157227335e-05, "loss": 0.7652, "step": 8402 }, { "epoch": 1.1865292290313472, "grad_norm": 3.628054985910775, "learning_rate": 1.3774662522072912e-05, "loss": 0.5921, "step": 8403 }, { "epoch": 1.186670432081333, "grad_norm": 4.4083318862956045, "learning_rate": 1.3773250799196964e-05, "loss": 0.8165, "step": 8404 }, { "epoch": 1.186811635131319, "grad_norm": 3.186653268399574, "learning_rate": 1.3771838988632295e-05, "loss": 0.5753, "step": 8405 }, { "epoch": 1.1869528381813046, "grad_norm": 3.9783873710803146, "learning_rate": 1.3770427090411717e-05, "loss": 0.6142, "step": 8406 }, { "epoch": 1.1870940412312905, "grad_norm": 3.827393791442306, "learning_rate": 1.3769015104568046e-05, "loss": 0.6166, "step": 8407 }, { "epoch": 1.1872352442812764, "grad_norm": 3.689853079758535, "learning_rate": 1.3767603031134087e-05, "loss": 0.708, "step": 8408 }, { "epoch": 1.1873764473312622, "grad_norm": 3.9974949816348193, "learning_rate": 1.3766190870142662e-05, "loss": 0.7098, "step": 8409 }, { "epoch": 1.1875176503812481, "grad_norm": 3.6933356777340087, "learning_rate": 1.3764778621626586e-05, "loss": 0.6512, "step": 8410 }, { "epoch": 1.187658853431234, "grad_norm": 4.1230929665490335, "learning_rate": 1.3763366285618685e-05, "loss": 0.7175, "step": 8411 }, { "epoch": 1.18780005648122, "grad_norm": 3.261845796214726, "learning_rate": 1.3761953862151773e-05, "loss": 0.4423, "step": 8412 }, { "epoch": 1.1879412595312058, "grad_norm": 3.663606594033593, "learning_rate": 1.376054135125868e-05, "loss": 0.6396, "step": 8413 }, { "epoch": 1.1880824625811917, "grad_norm": 3.7123692373156394, "learning_rate": 1.3759128752972229e-05, "loss": 0.6191, "step": 8414 }, { "epoch": 1.1882236656311775, "grad_norm": 3.5033470689398576, "learning_rate": 1.3757716067325251e-05, "loss": 0.4708, "step": 8415 }, { "epoch": 1.1883648686811634, "grad_norm": 3.1166529678224175, "learning_rate": 1.375630329435057e-05, "loss": 0.538, "step": 8416 }, { "epoch": 1.1885060717311493, "grad_norm": 3.252307266671689, "learning_rate": 1.3754890434081025e-05, "loss": 0.4973, "step": 8417 }, { "epoch": 1.1886472747811352, "grad_norm": 4.714718749878313, "learning_rate": 1.3753477486549449e-05, "loss": 0.8573, "step": 8418 }, { "epoch": 1.188788477831121, "grad_norm": 3.2500352083878563, "learning_rate": 1.3752064451788678e-05, "loss": 0.6157, "step": 8419 }, { "epoch": 1.188929680881107, "grad_norm": 4.114929593689272, "learning_rate": 1.3750651329831548e-05, "loss": 0.7551, "step": 8420 }, { "epoch": 1.1890708839310928, "grad_norm": 3.2482425839648017, "learning_rate": 1.3749238120710902e-05, "loss": 0.5369, "step": 8421 }, { "epoch": 1.1892120869810787, "grad_norm": 3.3801284425244096, "learning_rate": 1.3747824824459577e-05, "loss": 0.6063, "step": 8422 }, { "epoch": 1.1893532900310646, "grad_norm": 2.9444265574433794, "learning_rate": 1.3746411441110425e-05, "loss": 0.4955, "step": 8423 }, { "epoch": 1.1894944930810505, "grad_norm": 3.398336403997849, "learning_rate": 1.3744997970696291e-05, "loss": 0.6201, "step": 8424 }, { "epoch": 1.1896356961310364, "grad_norm": 4.777759873747928, "learning_rate": 1.3743584413250019e-05, "loss": 0.9248, "step": 8425 }, { "epoch": 1.1897768991810223, "grad_norm": 2.9728537264917287, "learning_rate": 1.3742170768804464e-05, "loss": 0.5553, "step": 8426 }, { "epoch": 1.1899181022310081, "grad_norm": 2.9204095687261016, "learning_rate": 1.3740757037392474e-05, "loss": 0.6315, "step": 8427 }, { "epoch": 1.190059305280994, "grad_norm": 3.7423754302762613, "learning_rate": 1.3739343219046908e-05, "loss": 0.6599, "step": 8428 }, { "epoch": 1.19020050833098, "grad_norm": 4.097185546243689, "learning_rate": 1.373792931380062e-05, "loss": 0.6482, "step": 8429 }, { "epoch": 1.1903417113809658, "grad_norm": 3.477306396290203, "learning_rate": 1.3736515321686468e-05, "loss": 0.6132, "step": 8430 }, { "epoch": 1.1904829144309517, "grad_norm": 4.142551396089924, "learning_rate": 1.3735101242737313e-05, "loss": 0.5596, "step": 8431 }, { "epoch": 1.1906241174809375, "grad_norm": 3.9633752559349382, "learning_rate": 1.373368707698602e-05, "loss": 0.6352, "step": 8432 }, { "epoch": 1.1907653205309234, "grad_norm": 3.3197447769286006, "learning_rate": 1.373227282446545e-05, "loss": 0.6155, "step": 8433 }, { "epoch": 1.1909065235809093, "grad_norm": 4.021473478734827, "learning_rate": 1.3730858485208471e-05, "loss": 0.7521, "step": 8434 }, { "epoch": 1.1910477266308952, "grad_norm": 3.88054815220302, "learning_rate": 1.3729444059247954e-05, "loss": 0.6559, "step": 8435 }, { "epoch": 1.191188929680881, "grad_norm": 3.0387360084160178, "learning_rate": 1.3728029546616769e-05, "loss": 0.5875, "step": 8436 }, { "epoch": 1.191330132730867, "grad_norm": 3.5073141787512743, "learning_rate": 1.3726614947347784e-05, "loss": 0.5851, "step": 8437 }, { "epoch": 1.1914713357808528, "grad_norm": 3.704859812609219, "learning_rate": 1.3725200261473879e-05, "loss": 0.7267, "step": 8438 }, { "epoch": 1.1916125388308387, "grad_norm": 3.472071453049201, "learning_rate": 1.3723785489027926e-05, "loss": 0.6012, "step": 8439 }, { "epoch": 1.1917537418808246, "grad_norm": 3.5612454325212823, "learning_rate": 1.3722370630042809e-05, "loss": 0.6082, "step": 8440 }, { "epoch": 1.1918949449308105, "grad_norm": 3.7361880744180187, "learning_rate": 1.372095568455141e-05, "loss": 0.5201, "step": 8441 }, { "epoch": 1.1920361479807964, "grad_norm": 3.2001903684982502, "learning_rate": 1.3719540652586601e-05, "loss": 0.567, "step": 8442 }, { "epoch": 1.1921773510307823, "grad_norm": 4.065173060412272, "learning_rate": 1.3718125534181277e-05, "loss": 0.6917, "step": 8443 }, { "epoch": 1.1923185540807681, "grad_norm": 3.3613100687907043, "learning_rate": 1.3716710329368319e-05, "loss": 0.58, "step": 8444 }, { "epoch": 1.192459757130754, "grad_norm": 3.0478226485921613, "learning_rate": 1.371529503818062e-05, "loss": 0.5007, "step": 8445 }, { "epoch": 1.19260096018074, "grad_norm": 3.023701824519928, "learning_rate": 1.3713879660651069e-05, "loss": 0.5308, "step": 8446 }, { "epoch": 1.1927421632307258, "grad_norm": 3.532508408549909, "learning_rate": 1.3712464196812558e-05, "loss": 0.6012, "step": 8447 }, { "epoch": 1.1928833662807117, "grad_norm": 4.2241636911007685, "learning_rate": 1.3711048646697979e-05, "loss": 0.7859, "step": 8448 }, { "epoch": 1.1930245693306976, "grad_norm": 3.3416667581693633, "learning_rate": 1.370963301034024e-05, "loss": 0.5934, "step": 8449 }, { "epoch": 1.1931657723806834, "grad_norm": 3.5984709222099576, "learning_rate": 1.3708217287772227e-05, "loss": 0.5452, "step": 8450 }, { "epoch": 1.1933069754306693, "grad_norm": 4.193074495603969, "learning_rate": 1.3706801479026844e-05, "loss": 0.638, "step": 8451 }, { "epoch": 1.1934481784806552, "grad_norm": 3.3811873854617653, "learning_rate": 1.3705385584136997e-05, "loss": 0.6089, "step": 8452 }, { "epoch": 1.193589381530641, "grad_norm": 3.602948868471054, "learning_rate": 1.370396960313559e-05, "loss": 0.637, "step": 8453 }, { "epoch": 1.193730584580627, "grad_norm": 3.389911254884805, "learning_rate": 1.3702553536055529e-05, "loss": 0.5187, "step": 8454 }, { "epoch": 1.1938717876306129, "grad_norm": 4.183621518433871, "learning_rate": 1.3701137382929724e-05, "loss": 0.6934, "step": 8455 }, { "epoch": 1.1940129906805987, "grad_norm": 3.0065174649482067, "learning_rate": 1.3699721143791083e-05, "loss": 0.514, "step": 8456 }, { "epoch": 1.1941541937305846, "grad_norm": 3.5194347376052537, "learning_rate": 1.3698304818672519e-05, "loss": 0.6432, "step": 8457 }, { "epoch": 1.1942953967805705, "grad_norm": 3.5240683293150554, "learning_rate": 1.3696888407606952e-05, "loss": 0.5645, "step": 8458 }, { "epoch": 1.1944365998305564, "grad_norm": 3.263407012944965, "learning_rate": 1.3695471910627292e-05, "loss": 0.5877, "step": 8459 }, { "epoch": 1.1945778028805423, "grad_norm": 3.529119779712467, "learning_rate": 1.369405532776646e-05, "loss": 0.5354, "step": 8460 }, { "epoch": 1.1947190059305282, "grad_norm": 4.1225157981782, "learning_rate": 1.369263865905738e-05, "loss": 0.7623, "step": 8461 }, { "epoch": 1.194860208980514, "grad_norm": 3.248715536346656, "learning_rate": 1.3691221904532972e-05, "loss": 0.5388, "step": 8462 }, { "epoch": 1.1950014120305, "grad_norm": 3.3284582869391737, "learning_rate": 1.368980506422616e-05, "loss": 0.4776, "step": 8463 }, { "epoch": 1.1951426150804858, "grad_norm": 4.160924712694896, "learning_rate": 1.3688388138169873e-05, "loss": 0.7079, "step": 8464 }, { "epoch": 1.1952838181304717, "grad_norm": 2.8692248041175192, "learning_rate": 1.3686971126397035e-05, "loss": 0.4065, "step": 8465 }, { "epoch": 1.1954250211804576, "grad_norm": 3.363327236238631, "learning_rate": 1.3685554028940586e-05, "loss": 0.5936, "step": 8466 }, { "epoch": 1.1955662242304435, "grad_norm": 3.0684011278181718, "learning_rate": 1.368413684583345e-05, "loss": 0.5115, "step": 8467 }, { "epoch": 1.1957074272804293, "grad_norm": 3.217957706838382, "learning_rate": 1.3682719577108566e-05, "loss": 0.5513, "step": 8468 }, { "epoch": 1.1958486303304152, "grad_norm": 2.7918981144234856, "learning_rate": 1.3681302222798867e-05, "loss": 0.4151, "step": 8469 }, { "epoch": 1.195989833380401, "grad_norm": 3.051424640753601, "learning_rate": 1.3679884782937295e-05, "loss": 0.5218, "step": 8470 }, { "epoch": 1.196131036430387, "grad_norm": 5.378519879637901, "learning_rate": 1.3678467257556791e-05, "loss": 0.8515, "step": 8471 }, { "epoch": 1.1962722394803729, "grad_norm": 3.0431780229863667, "learning_rate": 1.3677049646690298e-05, "loss": 0.5409, "step": 8472 }, { "epoch": 1.1964134425303588, "grad_norm": 5.315474202352966, "learning_rate": 1.3675631950370757e-05, "loss": 0.9506, "step": 8473 }, { "epoch": 1.1965546455803446, "grad_norm": 3.8404528632437893, "learning_rate": 1.3674214168631114e-05, "loss": 0.6706, "step": 8474 }, { "epoch": 1.1966958486303305, "grad_norm": 4.064403456633462, "learning_rate": 1.3672796301504325e-05, "loss": 0.6745, "step": 8475 }, { "epoch": 1.1968370516803164, "grad_norm": 3.1738407073378236, "learning_rate": 1.3671378349023333e-05, "loss": 0.5892, "step": 8476 }, { "epoch": 1.1969782547303023, "grad_norm": 3.14070293638681, "learning_rate": 1.3669960311221098e-05, "loss": 0.5858, "step": 8477 }, { "epoch": 1.1971194577802882, "grad_norm": 3.0034149481967467, "learning_rate": 1.3668542188130567e-05, "loss": 0.5801, "step": 8478 }, { "epoch": 1.197260660830274, "grad_norm": 7.694344647774244, "learning_rate": 1.36671239797847e-05, "loss": 0.6505, "step": 8479 }, { "epoch": 1.19740186388026, "grad_norm": 3.443314511662318, "learning_rate": 1.3665705686216457e-05, "loss": 0.6659, "step": 8480 }, { "epoch": 1.1975430669302458, "grad_norm": 3.0368095601011262, "learning_rate": 1.3664287307458794e-05, "loss": 0.5097, "step": 8481 }, { "epoch": 1.1976842699802317, "grad_norm": 3.8884070149836347, "learning_rate": 1.3662868843544676e-05, "loss": 0.6981, "step": 8482 }, { "epoch": 1.1978254730302174, "grad_norm": 4.592943557892394, "learning_rate": 1.3661450294507075e-05, "loss": 0.7482, "step": 8483 }, { "epoch": 1.1979666760802032, "grad_norm": 3.718094572798151, "learning_rate": 1.3660031660378945e-05, "loss": 0.6251, "step": 8484 }, { "epoch": 1.1981078791301891, "grad_norm": 4.204987294158051, "learning_rate": 1.365861294119326e-05, "loss": 0.6811, "step": 8485 }, { "epoch": 1.198249082180175, "grad_norm": 3.829493305470706, "learning_rate": 1.365719413698299e-05, "loss": 0.7043, "step": 8486 }, { "epoch": 1.198390285230161, "grad_norm": 3.198977842889897, "learning_rate": 1.365577524778111e-05, "loss": 0.618, "step": 8487 }, { "epoch": 1.1985314882801468, "grad_norm": 4.745353176567218, "learning_rate": 1.365435627362059e-05, "loss": 0.7534, "step": 8488 }, { "epoch": 1.1986726913301327, "grad_norm": 3.834693528378682, "learning_rate": 1.365293721453441e-05, "loss": 0.5713, "step": 8489 }, { "epoch": 1.1988138943801185, "grad_norm": 3.7669121312559093, "learning_rate": 1.3651518070555545e-05, "loss": 0.6758, "step": 8490 }, { "epoch": 1.1989550974301044, "grad_norm": 3.0903143347236717, "learning_rate": 1.3650098841716978e-05, "loss": 0.5773, "step": 8491 }, { "epoch": 1.1990963004800903, "grad_norm": 3.1038476163699555, "learning_rate": 1.3648679528051689e-05, "loss": 0.5124, "step": 8492 }, { "epoch": 1.1992375035300762, "grad_norm": 3.2073192892049422, "learning_rate": 1.3647260129592666e-05, "loss": 0.5787, "step": 8493 }, { "epoch": 1.199378706580062, "grad_norm": 3.574112406249967, "learning_rate": 1.364584064637289e-05, "loss": 0.6135, "step": 8494 }, { "epoch": 1.199519909630048, "grad_norm": 3.5346483096154757, "learning_rate": 1.3644421078425354e-05, "loss": 0.593, "step": 8495 }, { "epoch": 1.1996611126800338, "grad_norm": 3.3629649950192153, "learning_rate": 1.3643001425783045e-05, "loss": 0.5753, "step": 8496 }, { "epoch": 1.1998023157300197, "grad_norm": 4.230447753081659, "learning_rate": 1.3641581688478955e-05, "loss": 0.8089, "step": 8497 }, { "epoch": 1.1999435187800056, "grad_norm": 3.337523468076622, "learning_rate": 1.3640161866546083e-05, "loss": 0.5784, "step": 8498 }, { "epoch": 1.2000847218299915, "grad_norm": 3.8715684783159485, "learning_rate": 1.3638741960017416e-05, "loss": 0.7553, "step": 8499 }, { "epoch": 1.2002259248799774, "grad_norm": 3.5417246787389978, "learning_rate": 1.3637321968925964e-05, "loss": 0.7248, "step": 8500 }, { "epoch": 1.2003671279299633, "grad_norm": 3.6226594348749845, "learning_rate": 1.3635901893304714e-05, "loss": 0.5876, "step": 8501 }, { "epoch": 1.2005083309799491, "grad_norm": 3.1410478976294396, "learning_rate": 1.3634481733186675e-05, "loss": 0.5346, "step": 8502 }, { "epoch": 1.200649534029935, "grad_norm": 4.1035957021501694, "learning_rate": 1.3633061488604854e-05, "loss": 0.7281, "step": 8503 }, { "epoch": 1.200790737079921, "grad_norm": 3.419767278564955, "learning_rate": 1.3631641159592253e-05, "loss": 0.5522, "step": 8504 }, { "epoch": 1.2009319401299068, "grad_norm": 3.860021115183452, "learning_rate": 1.3630220746181874e-05, "loss": 0.5234, "step": 8505 }, { "epoch": 1.2010731431798927, "grad_norm": 3.479976169822163, "learning_rate": 1.3628800248406738e-05, "loss": 0.6797, "step": 8506 }, { "epoch": 1.2012143462298785, "grad_norm": 3.203569020479435, "learning_rate": 1.3627379666299849e-05, "loss": 0.5456, "step": 8507 }, { "epoch": 1.2013555492798644, "grad_norm": 3.741860491073354, "learning_rate": 1.3625958999894225e-05, "loss": 0.7266, "step": 8508 }, { "epoch": 1.2014967523298503, "grad_norm": 3.742409880158705, "learning_rate": 1.3624538249222879e-05, "loss": 0.6227, "step": 8509 }, { "epoch": 1.2016379553798362, "grad_norm": 3.4866935218126844, "learning_rate": 1.3623117414318827e-05, "loss": 0.5741, "step": 8510 }, { "epoch": 1.201779158429822, "grad_norm": 3.173622142344666, "learning_rate": 1.3621696495215091e-05, "loss": 0.5393, "step": 8511 }, { "epoch": 1.201920361479808, "grad_norm": 3.2582502912214304, "learning_rate": 1.3620275491944695e-05, "loss": 0.5222, "step": 8512 }, { "epoch": 1.2020615645297938, "grad_norm": 3.161994567403225, "learning_rate": 1.361885440454066e-05, "loss": 0.5865, "step": 8513 }, { "epoch": 1.2022027675797797, "grad_norm": 3.1872926167372424, "learning_rate": 1.361743323303601e-05, "loss": 0.4506, "step": 8514 }, { "epoch": 1.2023439706297656, "grad_norm": 3.564685075897357, "learning_rate": 1.3616011977463776e-05, "loss": 0.6667, "step": 8515 }, { "epoch": 1.2024851736797515, "grad_norm": 3.1111746361691446, "learning_rate": 1.3614590637856986e-05, "loss": 0.4603, "step": 8516 }, { "epoch": 1.2026263767297374, "grad_norm": 3.6018899945912692, "learning_rate": 1.3613169214248667e-05, "loss": 0.6262, "step": 8517 }, { "epoch": 1.2027675797797233, "grad_norm": 3.654989321140412, "learning_rate": 1.3611747706671859e-05, "loss": 0.621, "step": 8518 }, { "epoch": 1.2029087828297091, "grad_norm": 4.181405229810992, "learning_rate": 1.3610326115159591e-05, "loss": 0.687, "step": 8519 }, { "epoch": 1.203049985879695, "grad_norm": 4.462507535101452, "learning_rate": 1.3608904439744905e-05, "loss": 0.7367, "step": 8520 }, { "epoch": 1.203191188929681, "grad_norm": 4.1142562973201455, "learning_rate": 1.3607482680460842e-05, "loss": 0.6151, "step": 8521 }, { "epoch": 1.2033323919796668, "grad_norm": 3.4267884097250536, "learning_rate": 1.3606060837340431e-05, "loss": 0.5668, "step": 8522 }, { "epoch": 1.2034735950296527, "grad_norm": 2.6628149207078007, "learning_rate": 1.360463891041673e-05, "loss": 0.4759, "step": 8523 }, { "epoch": 1.2036147980796386, "grad_norm": 3.570926320344101, "learning_rate": 1.3603216899722775e-05, "loss": 0.6769, "step": 8524 }, { "epoch": 1.2037560011296244, "grad_norm": 3.940339814897452, "learning_rate": 1.3601794805291617e-05, "loss": 0.6856, "step": 8525 }, { "epoch": 1.2038972041796103, "grad_norm": 3.42785845120409, "learning_rate": 1.3600372627156304e-05, "loss": 0.5286, "step": 8526 }, { "epoch": 1.2040384072295962, "grad_norm": 3.2527794155224403, "learning_rate": 1.3598950365349884e-05, "loss": 0.6319, "step": 8527 }, { "epoch": 1.204179610279582, "grad_norm": 3.1438447410507355, "learning_rate": 1.3597528019905411e-05, "loss": 0.5353, "step": 8528 }, { "epoch": 1.204320813329568, "grad_norm": 3.505633342545751, "learning_rate": 1.359610559085594e-05, "loss": 0.6547, "step": 8529 }, { "epoch": 1.2044620163795539, "grad_norm": 3.209384542513002, "learning_rate": 1.3594683078234532e-05, "loss": 0.5399, "step": 8530 }, { "epoch": 1.2046032194295397, "grad_norm": 4.227870854079469, "learning_rate": 1.3593260482074238e-05, "loss": 0.6747, "step": 8531 }, { "epoch": 1.2047444224795256, "grad_norm": 3.544573190428461, "learning_rate": 1.3591837802408123e-05, "loss": 0.666, "step": 8532 }, { "epoch": 1.2048856255295115, "grad_norm": 3.50923350732052, "learning_rate": 1.3590415039269251e-05, "loss": 0.5833, "step": 8533 }, { "epoch": 1.2050268285794974, "grad_norm": 3.185623948290316, "learning_rate": 1.3588992192690683e-05, "loss": 0.5503, "step": 8534 }, { "epoch": 1.2051680316294833, "grad_norm": 3.609065163298797, "learning_rate": 1.3587569262705485e-05, "loss": 0.6249, "step": 8535 }, { "epoch": 1.2053092346794692, "grad_norm": 3.256133112240713, "learning_rate": 1.3586146249346728e-05, "loss": 0.5907, "step": 8536 }, { "epoch": 1.205450437729455, "grad_norm": 3.5292369065014224, "learning_rate": 1.3584723152647479e-05, "loss": 0.6071, "step": 8537 }, { "epoch": 1.205591640779441, "grad_norm": 3.6971951607065767, "learning_rate": 1.3583299972640815e-05, "loss": 0.675, "step": 8538 }, { "epoch": 1.2057328438294268, "grad_norm": 4.2606284498679585, "learning_rate": 1.3581876709359804e-05, "loss": 0.6918, "step": 8539 }, { "epoch": 1.2058740468794127, "grad_norm": 3.44284691839895, "learning_rate": 1.3580453362837527e-05, "loss": 0.6868, "step": 8540 }, { "epoch": 1.2060152499293986, "grad_norm": 3.378145807020263, "learning_rate": 1.3579029933107059e-05, "loss": 0.661, "step": 8541 }, { "epoch": 1.2061564529793842, "grad_norm": 3.367405027180481, "learning_rate": 1.3577606420201483e-05, "loss": 0.5004, "step": 8542 }, { "epoch": 1.2062976560293701, "grad_norm": 2.9351763633420647, "learning_rate": 1.3576182824153879e-05, "loss": 0.5275, "step": 8543 }, { "epoch": 1.206438859079356, "grad_norm": 3.48957598898269, "learning_rate": 1.357475914499733e-05, "loss": 0.6836, "step": 8544 }, { "epoch": 1.2065800621293419, "grad_norm": 3.723050565377529, "learning_rate": 1.3573335382764919e-05, "loss": 0.7019, "step": 8545 }, { "epoch": 1.2067212651793278, "grad_norm": 3.6439295754732712, "learning_rate": 1.3571911537489739e-05, "loss": 0.572, "step": 8546 }, { "epoch": 1.2068624682293136, "grad_norm": 3.7028130266524886, "learning_rate": 1.357048760920488e-05, "loss": 0.6723, "step": 8547 }, { "epoch": 1.2070036712792995, "grad_norm": 3.7362060519518128, "learning_rate": 1.3569063597943428e-05, "loss": 0.5774, "step": 8548 }, { "epoch": 1.2071448743292854, "grad_norm": 3.583105759278942, "learning_rate": 1.3567639503738482e-05, "loss": 0.6938, "step": 8549 }, { "epoch": 1.2072860773792713, "grad_norm": 4.361245070989145, "learning_rate": 1.3566215326623131e-05, "loss": 0.6487, "step": 8550 }, { "epoch": 1.2074272804292572, "grad_norm": 3.3493731405979554, "learning_rate": 1.3564791066630478e-05, "loss": 0.5868, "step": 8551 }, { "epoch": 1.207568483479243, "grad_norm": 3.3907147582174693, "learning_rate": 1.3563366723793621e-05, "loss": 0.5766, "step": 8552 }, { "epoch": 1.207709686529229, "grad_norm": 3.3263859590184275, "learning_rate": 1.3561942298145661e-05, "loss": 0.518, "step": 8553 }, { "epoch": 1.2078508895792148, "grad_norm": 3.6789863438351924, "learning_rate": 1.3560517789719696e-05, "loss": 0.681, "step": 8554 }, { "epoch": 1.2079920926292007, "grad_norm": 3.5508380351202833, "learning_rate": 1.355909319854884e-05, "loss": 0.6455, "step": 8555 }, { "epoch": 1.2081332956791866, "grad_norm": 3.310680608142239, "learning_rate": 1.355766852466619e-05, "loss": 0.5233, "step": 8556 }, { "epoch": 1.2082744987291725, "grad_norm": 4.113737558100836, "learning_rate": 1.3556243768104864e-05, "loss": 0.6577, "step": 8557 }, { "epoch": 1.2084157017791584, "grad_norm": 4.4560172983662545, "learning_rate": 1.3554818928897965e-05, "loss": 0.7653, "step": 8558 }, { "epoch": 1.2085569048291442, "grad_norm": 3.9647366298478084, "learning_rate": 1.3553394007078615e-05, "loss": 0.7097, "step": 8559 }, { "epoch": 1.2086981078791301, "grad_norm": 3.1010730419016346, "learning_rate": 1.355196900267992e-05, "loss": 0.6087, "step": 8560 }, { "epoch": 1.208839310929116, "grad_norm": 3.557315267689678, "learning_rate": 1.3550543915735e-05, "loss": 0.5643, "step": 8561 }, { "epoch": 1.208980513979102, "grad_norm": 3.3813415193793754, "learning_rate": 1.3549118746276968e-05, "loss": 0.619, "step": 8562 }, { "epoch": 1.2091217170290878, "grad_norm": 3.2575783157116662, "learning_rate": 1.3547693494338953e-05, "loss": 0.5183, "step": 8563 }, { "epoch": 1.2092629200790737, "grad_norm": 3.4551415663666987, "learning_rate": 1.3546268159954075e-05, "loss": 0.5212, "step": 8564 }, { "epoch": 1.2094041231290595, "grad_norm": 3.7427195230464085, "learning_rate": 1.3544842743155453e-05, "loss": 0.6575, "step": 8565 }, { "epoch": 1.2095453261790454, "grad_norm": 3.537673644437599, "learning_rate": 1.3543417243976217e-05, "loss": 0.5876, "step": 8566 }, { "epoch": 1.2096865292290313, "grad_norm": 3.784301756949045, "learning_rate": 1.3541991662449498e-05, "loss": 0.5814, "step": 8567 }, { "epoch": 1.2098277322790172, "grad_norm": 4.271722913573719, "learning_rate": 1.3540565998608419e-05, "loss": 0.6513, "step": 8568 }, { "epoch": 1.209968935329003, "grad_norm": 3.675029375034489, "learning_rate": 1.3539140252486119e-05, "loss": 0.71, "step": 8569 }, { "epoch": 1.210110138378989, "grad_norm": 3.530925765704981, "learning_rate": 1.3537714424115725e-05, "loss": 0.7401, "step": 8570 }, { "epoch": 1.2102513414289748, "grad_norm": 3.1849349102572746, "learning_rate": 1.3536288513530374e-05, "loss": 0.5438, "step": 8571 }, { "epoch": 1.2103925444789607, "grad_norm": 3.434011398998355, "learning_rate": 1.353486252076321e-05, "loss": 0.5211, "step": 8572 }, { "epoch": 1.2105337475289466, "grad_norm": 3.989834468236496, "learning_rate": 1.353343644584736e-05, "loss": 0.6783, "step": 8573 }, { "epoch": 1.2106749505789325, "grad_norm": 3.61727967478047, "learning_rate": 1.353201028881598e-05, "loss": 0.6017, "step": 8574 }, { "epoch": 1.2108161536289184, "grad_norm": 3.5959256461846962, "learning_rate": 1.3530584049702204e-05, "loss": 0.5322, "step": 8575 }, { "epoch": 1.2109573566789043, "grad_norm": 3.037092612937834, "learning_rate": 1.3529157728539179e-05, "loss": 0.4418, "step": 8576 }, { "epoch": 1.2110985597288901, "grad_norm": 3.0716390423925186, "learning_rate": 1.3527731325360053e-05, "loss": 0.5976, "step": 8577 }, { "epoch": 1.211239762778876, "grad_norm": 3.8894253575639803, "learning_rate": 1.3526304840197976e-05, "loss": 0.5864, "step": 8578 }, { "epoch": 1.211380965828862, "grad_norm": 3.553511383788996, "learning_rate": 1.3524878273086093e-05, "loss": 0.6257, "step": 8579 }, { "epoch": 1.2115221688788478, "grad_norm": 3.7684484684640167, "learning_rate": 1.3523451624057566e-05, "loss": 0.6431, "step": 8580 }, { "epoch": 1.2116633719288337, "grad_norm": 3.4618412854900633, "learning_rate": 1.3522024893145544e-05, "loss": 0.5607, "step": 8581 }, { "epoch": 1.2118045749788195, "grad_norm": 3.8461643334558366, "learning_rate": 1.3520598080383183e-05, "loss": 0.629, "step": 8582 }, { "epoch": 1.2119457780288054, "grad_norm": 4.128154580157784, "learning_rate": 1.3519171185803641e-05, "loss": 0.6334, "step": 8583 }, { "epoch": 1.2120869810787913, "grad_norm": 3.4813518183171093, "learning_rate": 1.3517744209440085e-05, "loss": 0.6604, "step": 8584 }, { "epoch": 1.2122281841287772, "grad_norm": 3.3321193262460027, "learning_rate": 1.3516317151325671e-05, "loss": 0.5559, "step": 8585 }, { "epoch": 1.212369387178763, "grad_norm": 2.6223781781424487, "learning_rate": 1.3514890011493564e-05, "loss": 0.4308, "step": 8586 }, { "epoch": 1.212510590228749, "grad_norm": 3.760956847628895, "learning_rate": 1.3513462789976933e-05, "loss": 0.6591, "step": 8587 }, { "epoch": 1.2126517932787348, "grad_norm": 3.9659461129689024, "learning_rate": 1.3512035486808942e-05, "loss": 0.6586, "step": 8588 }, { "epoch": 1.2127929963287207, "grad_norm": 4.2370735273003595, "learning_rate": 1.3510608102022765e-05, "loss": 0.6172, "step": 8589 }, { "epoch": 1.2129341993787066, "grad_norm": 3.744984364391922, "learning_rate": 1.350918063565157e-05, "loss": 0.6758, "step": 8590 }, { "epoch": 1.2130754024286925, "grad_norm": 3.8578046226243323, "learning_rate": 1.3507753087728532e-05, "loss": 0.6326, "step": 8591 }, { "epoch": 1.2132166054786784, "grad_norm": 3.5473662443160277, "learning_rate": 1.3506325458286829e-05, "loss": 0.6453, "step": 8592 }, { "epoch": 1.2133578085286643, "grad_norm": 3.2785099255278003, "learning_rate": 1.3504897747359636e-05, "loss": 0.5544, "step": 8593 }, { "epoch": 1.2134990115786501, "grad_norm": 3.517138990766305, "learning_rate": 1.3503469954980133e-05, "loss": 0.5606, "step": 8594 }, { "epoch": 1.213640214628636, "grad_norm": 3.0758348976654384, "learning_rate": 1.3502042081181502e-05, "loss": 0.5131, "step": 8595 }, { "epoch": 1.213781417678622, "grad_norm": 3.444880122013575, "learning_rate": 1.3500614125996924e-05, "loss": 0.5971, "step": 8596 }, { "epoch": 1.2139226207286078, "grad_norm": 4.455252170548031, "learning_rate": 1.3499186089459587e-05, "loss": 0.8704, "step": 8597 }, { "epoch": 1.2140638237785937, "grad_norm": 3.4452404700899013, "learning_rate": 1.3497757971602677e-05, "loss": 0.5541, "step": 8598 }, { "epoch": 1.2142050268285796, "grad_norm": 3.7477138541272654, "learning_rate": 1.3496329772459378e-05, "loss": 0.676, "step": 8599 }, { "epoch": 1.2143462298785654, "grad_norm": 3.6574001072858753, "learning_rate": 1.3494901492062889e-05, "loss": 0.6385, "step": 8600 }, { "epoch": 1.2144874329285513, "grad_norm": 3.1330699365348207, "learning_rate": 1.3493473130446398e-05, "loss": 0.5664, "step": 8601 }, { "epoch": 1.2146286359785372, "grad_norm": 3.227589933804556, "learning_rate": 1.34920446876431e-05, "loss": 0.6729, "step": 8602 }, { "epoch": 1.214769839028523, "grad_norm": 3.86666391893981, "learning_rate": 1.3490616163686193e-05, "loss": 0.7414, "step": 8603 }, { "epoch": 1.214911042078509, "grad_norm": 4.239813656644816, "learning_rate": 1.3489187558608871e-05, "loss": 0.675, "step": 8604 }, { "epoch": 1.2150522451284949, "grad_norm": 4.340006516198574, "learning_rate": 1.3487758872444337e-05, "loss": 0.7567, "step": 8605 }, { "epoch": 1.2151934481784807, "grad_norm": 3.5017297322271412, "learning_rate": 1.3486330105225797e-05, "loss": 0.5903, "step": 8606 }, { "epoch": 1.2153346512284666, "grad_norm": 3.3594113946610635, "learning_rate": 1.3484901256986448e-05, "loss": 0.4827, "step": 8607 }, { "epoch": 1.2154758542784525, "grad_norm": 3.170211099506976, "learning_rate": 1.3483472327759496e-05, "loss": 0.5725, "step": 8608 }, { "epoch": 1.2156170573284384, "grad_norm": 2.6579878657503464, "learning_rate": 1.3482043317578154e-05, "loss": 0.477, "step": 8609 }, { "epoch": 1.2157582603784243, "grad_norm": 3.838288270545291, "learning_rate": 1.3480614226475632e-05, "loss": 0.6255, "step": 8610 }, { "epoch": 1.2158994634284102, "grad_norm": 3.4177385721360376, "learning_rate": 1.3479185054485137e-05, "loss": 0.6034, "step": 8611 }, { "epoch": 1.216040666478396, "grad_norm": 3.183650433639726, "learning_rate": 1.3477755801639883e-05, "loss": 0.57, "step": 8612 }, { "epoch": 1.216181869528382, "grad_norm": 3.428732221972703, "learning_rate": 1.3476326467973087e-05, "loss": 0.6101, "step": 8613 }, { "epoch": 1.2163230725783678, "grad_norm": 3.2315133997340793, "learning_rate": 1.3474897053517963e-05, "loss": 0.6142, "step": 8614 }, { "epoch": 1.2164642756283537, "grad_norm": 3.2781712905940084, "learning_rate": 1.3473467558307736e-05, "loss": 0.608, "step": 8615 }, { "epoch": 1.2166054786783396, "grad_norm": 3.241053531102113, "learning_rate": 1.347203798237562e-05, "loss": 0.5365, "step": 8616 }, { "epoch": 1.2167466817283255, "grad_norm": 3.3052234346771403, "learning_rate": 1.347060832575484e-05, "loss": 0.5503, "step": 8617 }, { "epoch": 1.2168878847783113, "grad_norm": 3.5386127987796647, "learning_rate": 1.3469178588478621e-05, "loss": 0.6891, "step": 8618 }, { "epoch": 1.217029087828297, "grad_norm": 3.5660582576540243, "learning_rate": 1.3467748770580193e-05, "loss": 0.6116, "step": 8619 }, { "epoch": 1.2171702908782829, "grad_norm": 3.0463352710576705, "learning_rate": 1.346631887209278e-05, "loss": 0.4931, "step": 8620 }, { "epoch": 1.2173114939282688, "grad_norm": 3.3812911964143435, "learning_rate": 1.3464888893049612e-05, "loss": 0.6282, "step": 8621 }, { "epoch": 1.2174526969782546, "grad_norm": 3.674696147908224, "learning_rate": 1.3463458833483923e-05, "loss": 0.5547, "step": 8622 }, { "epoch": 1.2175939000282405, "grad_norm": 3.957241833318662, "learning_rate": 1.3462028693428951e-05, "loss": 0.7094, "step": 8623 }, { "epoch": 1.2177351030782264, "grad_norm": 3.2563181493888242, "learning_rate": 1.3460598472917923e-05, "loss": 0.6156, "step": 8624 }, { "epoch": 1.2178763061282123, "grad_norm": 3.4703480223227197, "learning_rate": 1.3459168171984079e-05, "loss": 0.5381, "step": 8625 }, { "epoch": 1.2180175091781982, "grad_norm": 3.891617948874913, "learning_rate": 1.3457737790660663e-05, "loss": 0.5109, "step": 8626 }, { "epoch": 1.218158712228184, "grad_norm": 3.009528709851622, "learning_rate": 1.3456307328980915e-05, "loss": 0.4959, "step": 8627 }, { "epoch": 1.21829991527817, "grad_norm": 3.5403681733648624, "learning_rate": 1.3454876786978076e-05, "loss": 0.5796, "step": 8628 }, { "epoch": 1.2184411183281558, "grad_norm": 3.3127842765928857, "learning_rate": 1.3453446164685394e-05, "loss": 0.5861, "step": 8629 }, { "epoch": 1.2185823213781417, "grad_norm": 3.4940941279201168, "learning_rate": 1.3452015462136113e-05, "loss": 0.5456, "step": 8630 }, { "epoch": 1.2187235244281276, "grad_norm": 4.136366741650121, "learning_rate": 1.3450584679363483e-05, "loss": 0.569, "step": 8631 }, { "epoch": 1.2188647274781135, "grad_norm": 4.069479425265647, "learning_rate": 1.3449153816400758e-05, "loss": 0.6491, "step": 8632 }, { "epoch": 1.2190059305280994, "grad_norm": 3.3728440082937694, "learning_rate": 1.3447722873281186e-05, "loss": 0.56, "step": 8633 }, { "epoch": 1.2191471335780852, "grad_norm": 2.875231976442875, "learning_rate": 1.3446291850038021e-05, "loss": 0.4815, "step": 8634 }, { "epoch": 1.2192883366280711, "grad_norm": 3.900931577749558, "learning_rate": 1.3444860746704525e-05, "loss": 0.5908, "step": 8635 }, { "epoch": 1.219429539678057, "grad_norm": 3.834368671341001, "learning_rate": 1.3443429563313952e-05, "loss": 0.6266, "step": 8636 }, { "epoch": 1.219570742728043, "grad_norm": 3.783421321294592, "learning_rate": 1.3441998299899564e-05, "loss": 0.6338, "step": 8637 }, { "epoch": 1.2197119457780288, "grad_norm": 4.438207149848002, "learning_rate": 1.344056695649462e-05, "loss": 0.5973, "step": 8638 }, { "epoch": 1.2198531488280147, "grad_norm": 3.761510830021975, "learning_rate": 1.3439135533132389e-05, "loss": 0.7268, "step": 8639 }, { "epoch": 1.2199943518780005, "grad_norm": 3.2762204672586477, "learning_rate": 1.343770402984613e-05, "loss": 0.6784, "step": 8640 }, { "epoch": 1.2201355549279864, "grad_norm": 3.3557494198415863, "learning_rate": 1.3436272446669117e-05, "loss": 0.68, "step": 8641 }, { "epoch": 1.2202767579779723, "grad_norm": 3.2897323047668423, "learning_rate": 1.3434840783634611e-05, "loss": 0.5514, "step": 8642 }, { "epoch": 1.2204179610279582, "grad_norm": 3.4295679602253593, "learning_rate": 1.3433409040775894e-05, "loss": 0.5515, "step": 8643 }, { "epoch": 1.220559164077944, "grad_norm": 3.315950819195173, "learning_rate": 1.3431977218126234e-05, "loss": 0.5317, "step": 8644 }, { "epoch": 1.22070036712793, "grad_norm": 3.6681723092155263, "learning_rate": 1.3430545315718901e-05, "loss": 0.7062, "step": 8645 }, { "epoch": 1.2208415701779158, "grad_norm": 4.183329008022825, "learning_rate": 1.3429113333587181e-05, "loss": 0.7053, "step": 8646 }, { "epoch": 1.2209827732279017, "grad_norm": 4.496857321741503, "learning_rate": 1.3427681271764343e-05, "loss": 0.7416, "step": 8647 }, { "epoch": 1.2211239762778876, "grad_norm": 3.9468481737996073, "learning_rate": 1.3426249130283677e-05, "loss": 0.6425, "step": 8648 }, { "epoch": 1.2212651793278735, "grad_norm": 4.115070319931377, "learning_rate": 1.342481690917846e-05, "loss": 0.7116, "step": 8649 }, { "epoch": 1.2214063823778594, "grad_norm": 3.3309160562230207, "learning_rate": 1.3423384608481978e-05, "loss": 0.5448, "step": 8650 }, { "epoch": 1.2215475854278453, "grad_norm": 3.0595835494512036, "learning_rate": 1.3421952228227513e-05, "loss": 0.541, "step": 8651 }, { "epoch": 1.2216887884778311, "grad_norm": 3.481262881395974, "learning_rate": 1.342051976844836e-05, "loss": 0.6375, "step": 8652 }, { "epoch": 1.221829991527817, "grad_norm": 3.104621306616388, "learning_rate": 1.34190872291778e-05, "loss": 0.49, "step": 8653 }, { "epoch": 1.221971194577803, "grad_norm": 3.9152543364027554, "learning_rate": 1.3417654610449131e-05, "loss": 0.7503, "step": 8654 }, { "epoch": 1.2221123976277888, "grad_norm": 6.502413455851264, "learning_rate": 1.3416221912295646e-05, "loss": 0.629, "step": 8655 }, { "epoch": 1.2222536006777747, "grad_norm": 2.8990964619407915, "learning_rate": 1.341478913475064e-05, "loss": 0.4511, "step": 8656 }, { "epoch": 1.2223948037277605, "grad_norm": 3.6167372976067598, "learning_rate": 1.3413356277847407e-05, "loss": 0.6126, "step": 8657 }, { "epoch": 1.2225360067777464, "grad_norm": 4.033882756022542, "learning_rate": 1.341192334161925e-05, "loss": 0.6139, "step": 8658 }, { "epoch": 1.2226772098277323, "grad_norm": 3.2063581644267227, "learning_rate": 1.3410490326099466e-05, "loss": 0.614, "step": 8659 }, { "epoch": 1.2228184128777182, "grad_norm": 3.8950727745007354, "learning_rate": 1.3409057231321363e-05, "loss": 0.7888, "step": 8660 }, { "epoch": 1.222959615927704, "grad_norm": 3.9035403668366033, "learning_rate": 1.3407624057318241e-05, "loss": 0.6694, "step": 8661 }, { "epoch": 1.22310081897769, "grad_norm": 3.979011833176761, "learning_rate": 1.3406190804123405e-05, "loss": 0.7471, "step": 8662 }, { "epoch": 1.2232420220276758, "grad_norm": 3.348418197190203, "learning_rate": 1.3404757471770168e-05, "loss": 0.5958, "step": 8663 }, { "epoch": 1.2233832250776617, "grad_norm": 3.5997083603236204, "learning_rate": 1.3403324060291837e-05, "loss": 0.5985, "step": 8664 }, { "epoch": 1.2235244281276476, "grad_norm": 3.19066917317145, "learning_rate": 1.3401890569721725e-05, "loss": 0.6062, "step": 8665 }, { "epoch": 1.2236656311776335, "grad_norm": 4.130663238812506, "learning_rate": 1.3400457000093145e-05, "loss": 0.7731, "step": 8666 }, { "epoch": 1.2238068342276194, "grad_norm": 3.7333617085083532, "learning_rate": 1.3399023351439416e-05, "loss": 0.6675, "step": 8667 }, { "epoch": 1.2239480372776053, "grad_norm": 3.698225902897182, "learning_rate": 1.3397589623793845e-05, "loss": 0.7208, "step": 8668 }, { "epoch": 1.2240892403275911, "grad_norm": 3.4375251457035714, "learning_rate": 1.3396155817189767e-05, "loss": 0.6355, "step": 8669 }, { "epoch": 1.224230443377577, "grad_norm": 3.9830572879603148, "learning_rate": 1.3394721931660488e-05, "loss": 0.6773, "step": 8670 }, { "epoch": 1.224371646427563, "grad_norm": 3.154221504442012, "learning_rate": 1.3393287967239339e-05, "loss": 0.6301, "step": 8671 }, { "epoch": 1.2245128494775488, "grad_norm": 3.5714632920178193, "learning_rate": 1.3391853923959644e-05, "loss": 0.5697, "step": 8672 }, { "epoch": 1.2246540525275347, "grad_norm": 3.274560478021891, "learning_rate": 1.3390419801854729e-05, "loss": 0.5433, "step": 8673 }, { "epoch": 1.2247952555775206, "grad_norm": 5.170403187765152, "learning_rate": 1.3388985600957922e-05, "loss": 0.9513, "step": 8674 }, { "epoch": 1.2249364586275064, "grad_norm": 3.409885881919999, "learning_rate": 1.3387551321302553e-05, "loss": 0.5147, "step": 8675 }, { "epoch": 1.2250776616774923, "grad_norm": 3.814537936970504, "learning_rate": 1.3386116962921951e-05, "loss": 0.6673, "step": 8676 }, { "epoch": 1.2252188647274782, "grad_norm": 2.906133058884447, "learning_rate": 1.3384682525849458e-05, "loss": 0.4589, "step": 8677 }, { "epoch": 1.225360067777464, "grad_norm": 3.7565480488160925, "learning_rate": 1.3383248010118404e-05, "loss": 0.6455, "step": 8678 }, { "epoch": 1.2255012708274498, "grad_norm": 3.9281259081223596, "learning_rate": 1.3381813415762124e-05, "loss": 0.6238, "step": 8679 }, { "epoch": 1.2256424738774356, "grad_norm": 3.9086429734862085, "learning_rate": 1.3380378742813964e-05, "loss": 0.6002, "step": 8680 }, { "epoch": 1.2257836769274215, "grad_norm": 3.6605463214179226, "learning_rate": 1.337894399130726e-05, "loss": 0.6169, "step": 8681 }, { "epoch": 1.2259248799774074, "grad_norm": 3.0267969696340815, "learning_rate": 1.337750916127536e-05, "loss": 0.6623, "step": 8682 }, { "epoch": 1.2260660830273933, "grad_norm": 3.212405455609593, "learning_rate": 1.3376074252751604e-05, "loss": 0.5239, "step": 8683 }, { "epoch": 1.2262072860773792, "grad_norm": 3.506126059898003, "learning_rate": 1.3374639265769337e-05, "loss": 0.5975, "step": 8684 }, { "epoch": 1.226348489127365, "grad_norm": 3.6403411215180337, "learning_rate": 1.3373204200361914e-05, "loss": 0.7259, "step": 8685 }, { "epoch": 1.226489692177351, "grad_norm": 3.226312766692689, "learning_rate": 1.3371769056562683e-05, "loss": 0.6025, "step": 8686 }, { "epoch": 1.2266308952273368, "grad_norm": 3.189727790563823, "learning_rate": 1.3370333834404994e-05, "loss": 0.5978, "step": 8687 }, { "epoch": 1.2267720982773227, "grad_norm": 3.676676836942921, "learning_rate": 1.3368898533922202e-05, "loss": 0.5658, "step": 8688 }, { "epoch": 1.2269133013273086, "grad_norm": 3.9842953077049836, "learning_rate": 1.3367463155147661e-05, "loss": 0.6029, "step": 8689 }, { "epoch": 1.2270545043772945, "grad_norm": 3.1985296668742627, "learning_rate": 1.3366027698114734e-05, "loss": 0.5362, "step": 8690 }, { "epoch": 1.2271957074272803, "grad_norm": 3.7663589956570283, "learning_rate": 1.3364592162856777e-05, "loss": 0.511, "step": 8691 }, { "epoch": 1.2273369104772662, "grad_norm": 3.9798869481993355, "learning_rate": 1.3363156549407151e-05, "loss": 0.6465, "step": 8692 }, { "epoch": 1.2274781135272521, "grad_norm": 3.600116782901093, "learning_rate": 1.3361720857799218e-05, "loss": 0.5678, "step": 8693 }, { "epoch": 1.227619316577238, "grad_norm": 4.353373280940051, "learning_rate": 1.3360285088066343e-05, "loss": 0.7273, "step": 8694 }, { "epoch": 1.2277605196272239, "grad_norm": 3.1038583367283104, "learning_rate": 1.3358849240241897e-05, "loss": 0.4743, "step": 8695 }, { "epoch": 1.2279017226772098, "grad_norm": 4.419471958944962, "learning_rate": 1.3357413314359242e-05, "loss": 0.8118, "step": 8696 }, { "epoch": 1.2280429257271956, "grad_norm": 3.3226239160925144, "learning_rate": 1.3355977310451754e-05, "loss": 0.5518, "step": 8697 }, { "epoch": 1.2281841287771815, "grad_norm": 3.2776457193499704, "learning_rate": 1.3354541228552802e-05, "loss": 0.5904, "step": 8698 }, { "epoch": 1.2283253318271674, "grad_norm": 3.4803200465895094, "learning_rate": 1.3353105068695759e-05, "loss": 0.6501, "step": 8699 }, { "epoch": 1.2284665348771533, "grad_norm": 3.107582201927797, "learning_rate": 1.3351668830914004e-05, "loss": 0.5286, "step": 8700 }, { "epoch": 1.2286077379271392, "grad_norm": 3.5479816711720042, "learning_rate": 1.3350232515240913e-05, "loss": 0.4949, "step": 8701 }, { "epoch": 1.228748940977125, "grad_norm": 3.365327904351478, "learning_rate": 1.3348796121709862e-05, "loss": 0.5708, "step": 8702 }, { "epoch": 1.228890144027111, "grad_norm": 3.247106685718305, "learning_rate": 1.334735965035424e-05, "loss": 0.519, "step": 8703 }, { "epoch": 1.2290313470770968, "grad_norm": 4.4212674430501915, "learning_rate": 1.3345923101207423e-05, "loss": 0.779, "step": 8704 }, { "epoch": 1.2291725501270827, "grad_norm": 3.2772878319155017, "learning_rate": 1.3344486474302798e-05, "loss": 0.4847, "step": 8705 }, { "epoch": 1.2293137531770686, "grad_norm": 5.255088256681144, "learning_rate": 1.334304976967375e-05, "loss": 0.8484, "step": 8706 }, { "epoch": 1.2294549562270545, "grad_norm": 3.723380392294394, "learning_rate": 1.3341612987353671e-05, "loss": 0.6545, "step": 8707 }, { "epoch": 1.2295961592770404, "grad_norm": 3.225034887113844, "learning_rate": 1.334017612737595e-05, "loss": 0.5339, "step": 8708 }, { "epoch": 1.2297373623270262, "grad_norm": 3.1661664658221116, "learning_rate": 1.333873918977398e-05, "loss": 0.5411, "step": 8709 }, { "epoch": 1.2298785653770121, "grad_norm": 3.3400107518287, "learning_rate": 1.333730217458115e-05, "loss": 0.5636, "step": 8710 }, { "epoch": 1.230019768426998, "grad_norm": 3.478840793294304, "learning_rate": 1.3335865081830858e-05, "loss": 0.6096, "step": 8711 }, { "epoch": 1.2301609714769839, "grad_norm": 4.130375300777989, "learning_rate": 1.3334427911556506e-05, "loss": 0.6898, "step": 8712 }, { "epoch": 1.2303021745269698, "grad_norm": 3.4396497978684946, "learning_rate": 1.3332990663791486e-05, "loss": 0.5467, "step": 8713 }, { "epoch": 1.2304433775769557, "grad_norm": 3.2920673045485866, "learning_rate": 1.3331553338569204e-05, "loss": 0.61, "step": 8714 }, { "epoch": 1.2305845806269415, "grad_norm": 3.8797046174351846, "learning_rate": 1.333011593592306e-05, "loss": 0.6901, "step": 8715 }, { "epoch": 1.2307257836769274, "grad_norm": 3.3470547566034514, "learning_rate": 1.3328678455886461e-05, "loss": 0.5391, "step": 8716 }, { "epoch": 1.2308669867269133, "grad_norm": 3.1900555088897358, "learning_rate": 1.332724089849281e-05, "loss": 0.5123, "step": 8717 }, { "epoch": 1.2310081897768992, "grad_norm": 3.5290729950105026, "learning_rate": 1.3325803263775521e-05, "loss": 0.5649, "step": 8718 }, { "epoch": 1.231149392826885, "grad_norm": 4.112362098580588, "learning_rate": 1.3324365551767997e-05, "loss": 0.8645, "step": 8719 }, { "epoch": 1.231290595876871, "grad_norm": 3.6469944009374675, "learning_rate": 1.3322927762503656e-05, "loss": 0.5721, "step": 8720 }, { "epoch": 1.2314317989268568, "grad_norm": 2.8580873561702735, "learning_rate": 1.3321489896015908e-05, "loss": 0.4982, "step": 8721 }, { "epoch": 1.2315730019768427, "grad_norm": 2.9387662426394745, "learning_rate": 1.3320051952338166e-05, "loss": 0.4603, "step": 8722 }, { "epoch": 1.2317142050268286, "grad_norm": 3.242165368209716, "learning_rate": 1.3318613931503854e-05, "loss": 0.5198, "step": 8723 }, { "epoch": 1.2318554080768145, "grad_norm": 3.6759919511289336, "learning_rate": 1.3317175833546387e-05, "loss": 0.5658, "step": 8724 }, { "epoch": 1.2319966111268004, "grad_norm": 3.36715226945166, "learning_rate": 1.3315737658499188e-05, "loss": 0.5706, "step": 8725 }, { "epoch": 1.2321378141767863, "grad_norm": 3.6596440183135432, "learning_rate": 1.3314299406395677e-05, "loss": 0.5343, "step": 8726 }, { "epoch": 1.2322790172267721, "grad_norm": 3.4085440896636463, "learning_rate": 1.3312861077269277e-05, "loss": 0.5438, "step": 8727 }, { "epoch": 1.232420220276758, "grad_norm": 2.7259026125836234, "learning_rate": 1.3311422671153416e-05, "loss": 0.433, "step": 8728 }, { "epoch": 1.232561423326744, "grad_norm": 3.045497847759873, "learning_rate": 1.3309984188081524e-05, "loss": 0.5057, "step": 8729 }, { "epoch": 1.2327026263767298, "grad_norm": 4.313640047954047, "learning_rate": 1.3308545628087029e-05, "loss": 0.7062, "step": 8730 }, { "epoch": 1.2328438294267157, "grad_norm": 3.5517214546211786, "learning_rate": 1.330710699120336e-05, "loss": 0.6463, "step": 8731 }, { "epoch": 1.2329850324767015, "grad_norm": 4.843156263412101, "learning_rate": 1.3305668277463954e-05, "loss": 0.7249, "step": 8732 }, { "epoch": 1.2331262355266874, "grad_norm": 3.38763999286509, "learning_rate": 1.3304229486902248e-05, "loss": 0.5147, "step": 8733 }, { "epoch": 1.2332674385766733, "grad_norm": 3.3919326613363774, "learning_rate": 1.3302790619551673e-05, "loss": 0.5824, "step": 8734 }, { "epoch": 1.2334086416266592, "grad_norm": 3.8602501109615517, "learning_rate": 1.330135167544567e-05, "loss": 0.6411, "step": 8735 }, { "epoch": 1.233549844676645, "grad_norm": 3.2244372865228206, "learning_rate": 1.329991265461768e-05, "loss": 0.5034, "step": 8736 }, { "epoch": 1.233691047726631, "grad_norm": 2.80761104287388, "learning_rate": 1.3298473557101146e-05, "loss": 0.4364, "step": 8737 }, { "epoch": 1.2338322507766168, "grad_norm": 3.5905259617924354, "learning_rate": 1.3297034382929512e-05, "loss": 0.6025, "step": 8738 }, { "epoch": 1.2339734538266027, "grad_norm": 3.6553094866454776, "learning_rate": 1.3295595132136218e-05, "loss": 0.5634, "step": 8739 }, { "epoch": 1.2341146568765886, "grad_norm": 3.2519846132149066, "learning_rate": 1.329415580475472e-05, "loss": 0.5509, "step": 8740 }, { "epoch": 1.2342558599265745, "grad_norm": 4.134821385810212, "learning_rate": 1.3292716400818467e-05, "loss": 0.7049, "step": 8741 }, { "epoch": 1.2343970629765604, "grad_norm": 3.753889702249401, "learning_rate": 1.32912769203609e-05, "loss": 0.6503, "step": 8742 }, { "epoch": 1.2345382660265463, "grad_norm": 3.677663140687254, "learning_rate": 1.3289837363415484e-05, "loss": 0.6368, "step": 8743 }, { "epoch": 1.2346794690765321, "grad_norm": 3.9913719450959055, "learning_rate": 1.3288397730015666e-05, "loss": 0.6309, "step": 8744 }, { "epoch": 1.234820672126518, "grad_norm": 3.2337630960176824, "learning_rate": 1.3286958020194902e-05, "loss": 0.5586, "step": 8745 }, { "epoch": 1.234961875176504, "grad_norm": 2.661810933518996, "learning_rate": 1.328551823398666e-05, "loss": 0.4363, "step": 8746 }, { "epoch": 1.2351030782264898, "grad_norm": 3.7328450656648733, "learning_rate": 1.328407837142439e-05, "loss": 0.7094, "step": 8747 }, { "epoch": 1.2352442812764757, "grad_norm": 4.120270693464386, "learning_rate": 1.3282638432541553e-05, "loss": 0.6827, "step": 8748 }, { "epoch": 1.2353854843264616, "grad_norm": 3.358718703789723, "learning_rate": 1.3281198417371621e-05, "loss": 0.5792, "step": 8749 }, { "epoch": 1.2355266873764474, "grad_norm": 3.138071982901729, "learning_rate": 1.3279758325948054e-05, "loss": 0.5307, "step": 8750 }, { "epoch": 1.2356678904264333, "grad_norm": 3.200415191627236, "learning_rate": 1.3278318158304319e-05, "loss": 0.5219, "step": 8751 }, { "epoch": 1.2358090934764192, "grad_norm": 4.262555451277976, "learning_rate": 1.3276877914473887e-05, "loss": 0.6127, "step": 8752 }, { "epoch": 1.235950296526405, "grad_norm": 3.177761466802031, "learning_rate": 1.3275437594490228e-05, "loss": 0.4921, "step": 8753 }, { "epoch": 1.236091499576391, "grad_norm": 3.760803237939309, "learning_rate": 1.3273997198386814e-05, "loss": 0.6384, "step": 8754 }, { "epoch": 1.2362327026263766, "grad_norm": 3.8895198110196523, "learning_rate": 1.3272556726197116e-05, "loss": 0.6395, "step": 8755 }, { "epoch": 1.2363739056763625, "grad_norm": 4.684518618921429, "learning_rate": 1.3271116177954615e-05, "loss": 0.7126, "step": 8756 }, { "epoch": 1.2365151087263484, "grad_norm": 3.259053709408109, "learning_rate": 1.3269675553692787e-05, "loss": 0.5349, "step": 8757 }, { "epoch": 1.2366563117763343, "grad_norm": 3.6834726522198395, "learning_rate": 1.3268234853445113e-05, "loss": 0.5776, "step": 8758 }, { "epoch": 1.2367975148263202, "grad_norm": 4.370903924005992, "learning_rate": 1.3266794077245066e-05, "loss": 0.5604, "step": 8759 }, { "epoch": 1.236938717876306, "grad_norm": 3.639574699027349, "learning_rate": 1.3265353225126143e-05, "loss": 0.679, "step": 8760 }, { "epoch": 1.237079920926292, "grad_norm": 4.176948697697074, "learning_rate": 1.3263912297121817e-05, "loss": 0.8044, "step": 8761 }, { "epoch": 1.2372211239762778, "grad_norm": 3.8825169345936, "learning_rate": 1.3262471293265577e-05, "loss": 0.6151, "step": 8762 }, { "epoch": 1.2373623270262637, "grad_norm": 5.074461967211695, "learning_rate": 1.3261030213590919e-05, "loss": 0.9318, "step": 8763 }, { "epoch": 1.2375035300762496, "grad_norm": 3.54426080375801, "learning_rate": 1.3259589058131323e-05, "loss": 0.7533, "step": 8764 }, { "epoch": 1.2376447331262355, "grad_norm": 3.443067838183573, "learning_rate": 1.3258147826920285e-05, "loss": 0.6384, "step": 8765 }, { "epoch": 1.2377859361762213, "grad_norm": 3.0886851911528463, "learning_rate": 1.32567065199913e-05, "loss": 0.5565, "step": 8766 }, { "epoch": 1.2379271392262072, "grad_norm": 3.3323239150652078, "learning_rate": 1.3255265137377864e-05, "loss": 0.5616, "step": 8767 }, { "epoch": 1.2380683422761931, "grad_norm": 3.5836439975078753, "learning_rate": 1.3253823679113466e-05, "loss": 0.5795, "step": 8768 }, { "epoch": 1.238209545326179, "grad_norm": 3.167550862654838, "learning_rate": 1.3252382145231616e-05, "loss": 0.6186, "step": 8769 }, { "epoch": 1.2383507483761649, "grad_norm": 3.438833487710581, "learning_rate": 1.3250940535765808e-05, "loss": 0.5841, "step": 8770 }, { "epoch": 1.2384919514261508, "grad_norm": 3.4687520871239297, "learning_rate": 1.3249498850749547e-05, "loss": 0.5865, "step": 8771 }, { "epoch": 1.2386331544761366, "grad_norm": 3.508387611014922, "learning_rate": 1.3248057090216336e-05, "loss": 0.6311, "step": 8772 }, { "epoch": 1.2387743575261225, "grad_norm": 3.572248319273451, "learning_rate": 1.3246615254199679e-05, "loss": 0.5752, "step": 8773 }, { "epoch": 1.2389155605761084, "grad_norm": 3.6925340662810626, "learning_rate": 1.3245173342733084e-05, "loss": 0.6232, "step": 8774 }, { "epoch": 1.2390567636260943, "grad_norm": 2.935864365319856, "learning_rate": 1.3243731355850069e-05, "loss": 0.4893, "step": 8775 }, { "epoch": 1.2391979666760802, "grad_norm": 3.1348486320544975, "learning_rate": 1.3242289293584132e-05, "loss": 0.5436, "step": 8776 }, { "epoch": 1.239339169726066, "grad_norm": 3.1965456469447164, "learning_rate": 1.3240847155968792e-05, "loss": 0.5385, "step": 8777 }, { "epoch": 1.239480372776052, "grad_norm": 4.188913897730263, "learning_rate": 1.3239404943037566e-05, "loss": 0.6784, "step": 8778 }, { "epoch": 1.2396215758260378, "grad_norm": 3.6895657965985853, "learning_rate": 1.3237962654823965e-05, "loss": 0.5945, "step": 8779 }, { "epoch": 1.2397627788760237, "grad_norm": 3.6331855567298397, "learning_rate": 1.3236520291361516e-05, "loss": 0.531, "step": 8780 }, { "epoch": 1.2399039819260096, "grad_norm": 3.5912946304073654, "learning_rate": 1.3235077852683731e-05, "loss": 0.7203, "step": 8781 }, { "epoch": 1.2400451849759955, "grad_norm": 3.236463501239738, "learning_rate": 1.3233635338824132e-05, "loss": 0.5622, "step": 8782 }, { "epoch": 1.2401863880259814, "grad_norm": 2.9431432015729344, "learning_rate": 1.3232192749816243e-05, "loss": 0.4843, "step": 8783 }, { "epoch": 1.2403275910759672, "grad_norm": 4.00352661968204, "learning_rate": 1.3230750085693599e-05, "loss": 0.7451, "step": 8784 }, { "epoch": 1.2404687941259531, "grad_norm": 4.456548997837541, "learning_rate": 1.3229307346489706e-05, "loss": 0.6909, "step": 8785 }, { "epoch": 1.240609997175939, "grad_norm": 3.6956437535631115, "learning_rate": 1.3227864532238113e-05, "loss": 0.6299, "step": 8786 }, { "epoch": 1.2407512002259249, "grad_norm": 4.2098723305151395, "learning_rate": 1.3226421642972338e-05, "loss": 0.7161, "step": 8787 }, { "epoch": 1.2408924032759108, "grad_norm": 3.539657835746912, "learning_rate": 1.3224978678725921e-05, "loss": 0.5809, "step": 8788 }, { "epoch": 1.2410336063258967, "grad_norm": 3.3095940675461524, "learning_rate": 1.322353563953239e-05, "loss": 0.5769, "step": 8789 }, { "epoch": 1.2411748093758825, "grad_norm": 4.149141414449005, "learning_rate": 1.3222092525425286e-05, "loss": 0.5538, "step": 8790 }, { "epoch": 1.2413160124258684, "grad_norm": 3.965040769406472, "learning_rate": 1.3220649336438137e-05, "loss": 0.6906, "step": 8791 }, { "epoch": 1.2414572154758543, "grad_norm": 2.9669938479299613, "learning_rate": 1.3219206072604496e-05, "loss": 0.5257, "step": 8792 }, { "epoch": 1.2415984185258402, "grad_norm": 3.7567511804677114, "learning_rate": 1.3217762733957888e-05, "loss": 0.5202, "step": 8793 }, { "epoch": 1.241739621575826, "grad_norm": 3.601470149270778, "learning_rate": 1.321631932053187e-05, "loss": 0.6698, "step": 8794 }, { "epoch": 1.241880824625812, "grad_norm": 3.937632431517342, "learning_rate": 1.3214875832359976e-05, "loss": 0.6854, "step": 8795 }, { "epoch": 1.2420220276757978, "grad_norm": 3.498409729477378, "learning_rate": 1.3213432269475759e-05, "loss": 0.6512, "step": 8796 }, { "epoch": 1.2421632307257837, "grad_norm": 3.486828253123663, "learning_rate": 1.3211988631912763e-05, "loss": 0.5526, "step": 8797 }, { "epoch": 1.2423044337757696, "grad_norm": 3.2226967640097093, "learning_rate": 1.3210544919704539e-05, "loss": 0.5399, "step": 8798 }, { "epoch": 1.2424456368257555, "grad_norm": 3.5689335021443376, "learning_rate": 1.3209101132884634e-05, "loss": 0.5354, "step": 8799 }, { "epoch": 1.2425868398757414, "grad_norm": 3.404096080261258, "learning_rate": 1.3207657271486607e-05, "loss": 0.605, "step": 8800 }, { "epoch": 1.2427280429257272, "grad_norm": 3.5728517955633263, "learning_rate": 1.3206213335544014e-05, "loss": 0.5884, "step": 8801 }, { "epoch": 1.2428692459757131, "grad_norm": 4.2201097792867195, "learning_rate": 1.3204769325090403e-05, "loss": 0.6789, "step": 8802 }, { "epoch": 1.243010449025699, "grad_norm": 3.604481167022113, "learning_rate": 1.3203325240159337e-05, "loss": 0.583, "step": 8803 }, { "epoch": 1.243151652075685, "grad_norm": 3.5253743176520067, "learning_rate": 1.3201881080784378e-05, "loss": 0.6013, "step": 8804 }, { "epoch": 1.2432928551256708, "grad_norm": 3.712498563349242, "learning_rate": 1.3200436846999085e-05, "loss": 0.6944, "step": 8805 }, { "epoch": 1.2434340581756567, "grad_norm": 3.087602266306328, "learning_rate": 1.3198992538837021e-05, "loss": 0.5121, "step": 8806 }, { "epoch": 1.2435752612256425, "grad_norm": 2.8669561716366956, "learning_rate": 1.3197548156331752e-05, "loss": 0.5541, "step": 8807 }, { "epoch": 1.2437164642756284, "grad_norm": 3.963777886816572, "learning_rate": 1.3196103699516844e-05, "loss": 0.6355, "step": 8808 }, { "epoch": 1.2438576673256143, "grad_norm": 3.37288524112615, "learning_rate": 1.319465916842587e-05, "loss": 0.5522, "step": 8809 }, { "epoch": 1.2439988703756002, "grad_norm": 4.6162432659403985, "learning_rate": 1.3193214563092392e-05, "loss": 0.5549, "step": 8810 }, { "epoch": 1.244140073425586, "grad_norm": 4.011670435701752, "learning_rate": 1.319176988354999e-05, "loss": 0.6446, "step": 8811 }, { "epoch": 1.244281276475572, "grad_norm": 3.969847613268934, "learning_rate": 1.319032512983223e-05, "loss": 0.6532, "step": 8812 }, { "epoch": 1.2444224795255578, "grad_norm": 3.554827968653602, "learning_rate": 1.3188880301972696e-05, "loss": 0.6085, "step": 8813 }, { "epoch": 1.2445636825755437, "grad_norm": 3.009536506260182, "learning_rate": 1.318743540000496e-05, "loss": 0.4958, "step": 8814 }, { "epoch": 1.2447048856255294, "grad_norm": 3.448484926164053, "learning_rate": 1.3185990423962602e-05, "loss": 0.6078, "step": 8815 }, { "epoch": 1.2448460886755153, "grad_norm": 3.287058916227956, "learning_rate": 1.3184545373879201e-05, "loss": 0.5298, "step": 8816 }, { "epoch": 1.2449872917255012, "grad_norm": 3.6185668494815717, "learning_rate": 1.3183100249788342e-05, "loss": 0.579, "step": 8817 }, { "epoch": 1.245128494775487, "grad_norm": 3.7719687525944257, "learning_rate": 1.318165505172361e-05, "loss": 0.4656, "step": 8818 }, { "epoch": 1.245269697825473, "grad_norm": 4.0415331449559835, "learning_rate": 1.3180209779718584e-05, "loss": 0.7276, "step": 8819 }, { "epoch": 1.2454109008754588, "grad_norm": 3.5860333791680965, "learning_rate": 1.3178764433806858e-05, "loss": 0.6693, "step": 8820 }, { "epoch": 1.2455521039254447, "grad_norm": 3.2978923818028405, "learning_rate": 1.3177319014022021e-05, "loss": 0.5923, "step": 8821 }, { "epoch": 1.2456933069754306, "grad_norm": 3.1079156425258083, "learning_rate": 1.3175873520397659e-05, "loss": 0.4998, "step": 8822 }, { "epoch": 1.2458345100254165, "grad_norm": 3.437350757217801, "learning_rate": 1.3174427952967373e-05, "loss": 0.7065, "step": 8823 }, { "epoch": 1.2459757130754023, "grad_norm": 4.273112073343518, "learning_rate": 1.3172982311764749e-05, "loss": 0.6009, "step": 8824 }, { "epoch": 1.2461169161253882, "grad_norm": 4.230711585042246, "learning_rate": 1.3171536596823385e-05, "loss": 0.6747, "step": 8825 }, { "epoch": 1.246258119175374, "grad_norm": 2.9691244557211474, "learning_rate": 1.3170090808176883e-05, "loss": 0.5021, "step": 8826 }, { "epoch": 1.24639932222536, "grad_norm": 3.7183292395900396, "learning_rate": 1.316864494585884e-05, "loss": 0.6001, "step": 8827 }, { "epoch": 1.2465405252753459, "grad_norm": 2.8241661285580064, "learning_rate": 1.316719900990285e-05, "loss": 0.4634, "step": 8828 }, { "epoch": 1.2466817283253318, "grad_norm": 3.2483391687347853, "learning_rate": 1.3165753000342532e-05, "loss": 0.5398, "step": 8829 }, { "epoch": 1.2468229313753176, "grad_norm": 3.7439766374704155, "learning_rate": 1.3164306917211475e-05, "loss": 0.6913, "step": 8830 }, { "epoch": 1.2469641344253035, "grad_norm": 3.1873693922947566, "learning_rate": 1.3162860760543297e-05, "loss": 0.4709, "step": 8831 }, { "epoch": 1.2471053374752894, "grad_norm": 4.192412531159845, "learning_rate": 1.3161414530371598e-05, "loss": 0.6238, "step": 8832 }, { "epoch": 1.2472465405252753, "grad_norm": 4.112658388139034, "learning_rate": 1.3159968226729992e-05, "loss": 0.7094, "step": 8833 }, { "epoch": 1.2473877435752612, "grad_norm": 2.928363409720141, "learning_rate": 1.3158521849652087e-05, "loss": 0.5165, "step": 8834 }, { "epoch": 1.247528946625247, "grad_norm": 5.240800274105885, "learning_rate": 1.31570753991715e-05, "loss": 0.8118, "step": 8835 }, { "epoch": 1.247670149675233, "grad_norm": 4.067856663369048, "learning_rate": 1.3155628875321842e-05, "loss": 0.6378, "step": 8836 }, { "epoch": 1.2478113527252188, "grad_norm": 3.788071631396618, "learning_rate": 1.3154182278136734e-05, "loss": 0.6113, "step": 8837 }, { "epoch": 1.2479525557752047, "grad_norm": 3.435652464183366, "learning_rate": 1.315273560764979e-05, "loss": 0.5113, "step": 8838 }, { "epoch": 1.2480937588251906, "grad_norm": 4.775649081429604, "learning_rate": 1.3151288863894633e-05, "loss": 0.7843, "step": 8839 }, { "epoch": 1.2482349618751765, "grad_norm": 3.2997238814226844, "learning_rate": 1.3149842046904885e-05, "loss": 0.5308, "step": 8840 }, { "epoch": 1.2483761649251623, "grad_norm": 3.4772409889865665, "learning_rate": 1.3148395156714169e-05, "loss": 0.596, "step": 8841 }, { "epoch": 1.2485173679751482, "grad_norm": 4.199031420248525, "learning_rate": 1.3146948193356105e-05, "loss": 0.6357, "step": 8842 }, { "epoch": 1.2486585710251341, "grad_norm": 3.1522251625643833, "learning_rate": 1.314550115686433e-05, "loss": 0.4538, "step": 8843 }, { "epoch": 1.24879977407512, "grad_norm": 3.6964605650874423, "learning_rate": 1.3144054047272462e-05, "loss": 0.6175, "step": 8844 }, { "epoch": 1.2489409771251059, "grad_norm": 2.9756356761730323, "learning_rate": 1.3142606864614136e-05, "loss": 0.5128, "step": 8845 }, { "epoch": 1.2490821801750918, "grad_norm": 4.847521576357099, "learning_rate": 1.3141159608922984e-05, "loss": 0.7898, "step": 8846 }, { "epoch": 1.2492233832250776, "grad_norm": 3.19555466080851, "learning_rate": 1.313971228023264e-05, "loss": 0.5153, "step": 8847 }, { "epoch": 1.2493645862750635, "grad_norm": 3.18323702431375, "learning_rate": 1.3138264878576738e-05, "loss": 0.516, "step": 8848 }, { "epoch": 1.2495057893250494, "grad_norm": 3.6818333512778305, "learning_rate": 1.3136817403988918e-05, "loss": 0.563, "step": 8849 }, { "epoch": 1.2496469923750353, "grad_norm": 3.1073702214895444, "learning_rate": 1.3135369856502813e-05, "loss": 0.5175, "step": 8850 }, { "epoch": 1.2497881954250212, "grad_norm": 3.517799896132647, "learning_rate": 1.3133922236152066e-05, "loss": 0.5859, "step": 8851 }, { "epoch": 1.249929398475007, "grad_norm": 3.6545191569357085, "learning_rate": 1.3132474542970324e-05, "loss": 0.5811, "step": 8852 }, { "epoch": 1.250070601524993, "grad_norm": 3.6830604709977317, "learning_rate": 1.313102677699122e-05, "loss": 0.649, "step": 8853 }, { "epoch": 1.2502118045749788, "grad_norm": 3.4289558038114882, "learning_rate": 1.3129578938248411e-05, "loss": 0.5374, "step": 8854 }, { "epoch": 1.2503530076249647, "grad_norm": 3.1954982437347472, "learning_rate": 1.3128131026775539e-05, "loss": 0.6011, "step": 8855 }, { "epoch": 1.2504942106749506, "grad_norm": 4.578644899640924, "learning_rate": 1.3126683042606251e-05, "loss": 0.7552, "step": 8856 }, { "epoch": 1.2506354137249365, "grad_norm": 3.487276572802947, "learning_rate": 1.31252349857742e-05, "loss": 0.6086, "step": 8857 }, { "epoch": 1.2507766167749224, "grad_norm": 3.555008217422142, "learning_rate": 1.3123786856313036e-05, "loss": 0.6226, "step": 8858 }, { "epoch": 1.2509178198249082, "grad_norm": 3.371465182688844, "learning_rate": 1.3122338654256414e-05, "loss": 0.5301, "step": 8859 }, { "epoch": 1.2510590228748941, "grad_norm": 3.890419092880434, "learning_rate": 1.3120890379637996e-05, "loss": 0.6638, "step": 8860 }, { "epoch": 1.25120022592488, "grad_norm": 2.7699914645186228, "learning_rate": 1.311944203249143e-05, "loss": 0.4545, "step": 8861 }, { "epoch": 1.2513414289748659, "grad_norm": 3.4554507306461693, "learning_rate": 1.3117993612850377e-05, "loss": 0.62, "step": 8862 }, { "epoch": 1.2514826320248518, "grad_norm": 2.966992611244665, "learning_rate": 1.3116545120748501e-05, "loss": 0.4996, "step": 8863 }, { "epoch": 1.2516238350748377, "grad_norm": 3.505279991720964, "learning_rate": 1.3115096556219462e-05, "loss": 0.6551, "step": 8864 }, { "epoch": 1.2517650381248235, "grad_norm": 3.760279503684472, "learning_rate": 1.311364791929692e-05, "loss": 0.7873, "step": 8865 }, { "epoch": 1.2519062411748094, "grad_norm": 3.484042485733211, "learning_rate": 1.3112199210014552e-05, "loss": 0.5954, "step": 8866 }, { "epoch": 1.2520474442247953, "grad_norm": 4.648987153093613, "learning_rate": 1.3110750428406013e-05, "loss": 0.8175, "step": 8867 }, { "epoch": 1.2521886472747812, "grad_norm": 3.3030767246552495, "learning_rate": 1.3109301574504978e-05, "loss": 0.646, "step": 8868 }, { "epoch": 1.252329850324767, "grad_norm": 3.929273037475889, "learning_rate": 1.310785264834512e-05, "loss": 0.6821, "step": 8869 }, { "epoch": 1.252471053374753, "grad_norm": 3.204361847352328, "learning_rate": 1.3106403649960109e-05, "loss": 0.5157, "step": 8870 }, { "epoch": 1.2526122564247388, "grad_norm": 3.82218263509139, "learning_rate": 1.3104954579383616e-05, "loss": 0.704, "step": 8871 }, { "epoch": 1.2527534594747247, "grad_norm": 3.2679830467993356, "learning_rate": 1.310350543664932e-05, "loss": 0.5674, "step": 8872 }, { "epoch": 1.2528946625247106, "grad_norm": 3.6296984233646734, "learning_rate": 1.3102056221790899e-05, "loss": 0.5782, "step": 8873 }, { "epoch": 1.2530358655746965, "grad_norm": 4.03258890304909, "learning_rate": 1.310060693484203e-05, "loss": 0.6735, "step": 8874 }, { "epoch": 1.2531770686246824, "grad_norm": 3.737356866454328, "learning_rate": 1.3099157575836397e-05, "loss": 0.6064, "step": 8875 }, { "epoch": 1.2533182716746682, "grad_norm": 4.532391642812697, "learning_rate": 1.3097708144807679e-05, "loss": 0.6881, "step": 8876 }, { "epoch": 1.2534594747246541, "grad_norm": 4.405534329667572, "learning_rate": 1.3096258641789561e-05, "loss": 0.7272, "step": 8877 }, { "epoch": 1.25360067777464, "grad_norm": 2.9499957772359098, "learning_rate": 1.3094809066815731e-05, "loss": 0.5234, "step": 8878 }, { "epoch": 1.253741880824626, "grad_norm": 3.2731531983735125, "learning_rate": 1.309335941991987e-05, "loss": 0.6693, "step": 8879 }, { "epoch": 1.2538830838746118, "grad_norm": 3.0469863548731264, "learning_rate": 1.3091909701135676e-05, "loss": 0.549, "step": 8880 }, { "epoch": 1.2540242869245977, "grad_norm": 2.9534372891883174, "learning_rate": 1.3090459910496837e-05, "loss": 0.558, "step": 8881 }, { "epoch": 1.2541654899745835, "grad_norm": 2.678718538473889, "learning_rate": 1.3089010048037043e-05, "loss": 0.5322, "step": 8882 }, { "epoch": 1.2543066930245694, "grad_norm": 4.535064901333173, "learning_rate": 1.3087560113789988e-05, "loss": 0.6839, "step": 8883 }, { "epoch": 1.2544478960745553, "grad_norm": 3.735818564746955, "learning_rate": 1.3086110107789371e-05, "loss": 0.6573, "step": 8884 }, { "epoch": 1.2545890991245412, "grad_norm": 3.848881031250211, "learning_rate": 1.3084660030068886e-05, "loss": 0.6112, "step": 8885 }, { "epoch": 1.254730302174527, "grad_norm": 4.448083422792564, "learning_rate": 1.3083209880662237e-05, "loss": 0.7231, "step": 8886 }, { "epoch": 1.254871505224513, "grad_norm": 3.328377895698151, "learning_rate": 1.3081759659603121e-05, "loss": 0.5734, "step": 8887 }, { "epoch": 1.2550127082744988, "grad_norm": 3.8545191273647936, "learning_rate": 1.3080309366925239e-05, "loss": 0.6591, "step": 8888 }, { "epoch": 1.2551539113244847, "grad_norm": 2.993041001391005, "learning_rate": 1.3078859002662301e-05, "loss": 0.5193, "step": 8889 }, { "epoch": 1.2552951143744706, "grad_norm": 3.478173442070076, "learning_rate": 1.3077408566848009e-05, "loss": 0.5155, "step": 8890 }, { "epoch": 1.2554363174244565, "grad_norm": 3.491073873243466, "learning_rate": 1.307595805951607e-05, "loss": 0.676, "step": 8891 }, { "epoch": 1.2555775204744424, "grad_norm": 3.8598432580956823, "learning_rate": 1.3074507480700192e-05, "loss": 0.6301, "step": 8892 }, { "epoch": 1.2557187235244283, "grad_norm": 3.868848972892207, "learning_rate": 1.307305683043409e-05, "loss": 0.7152, "step": 8893 }, { "epoch": 1.2558599265744141, "grad_norm": 4.748159980676383, "learning_rate": 1.3071606108751475e-05, "loss": 0.7932, "step": 8894 }, { "epoch": 1.2560011296244, "grad_norm": 3.756027891269936, "learning_rate": 1.307015531568606e-05, "loss": 0.5866, "step": 8895 }, { "epoch": 1.2561423326743857, "grad_norm": 3.936574219668811, "learning_rate": 1.3068704451271561e-05, "loss": 0.6473, "step": 8896 }, { "epoch": 1.2562835357243716, "grad_norm": 3.061095836867893, "learning_rate": 1.3067253515541696e-05, "loss": 0.7049, "step": 8897 }, { "epoch": 1.2564247387743575, "grad_norm": 3.718967192780105, "learning_rate": 1.3065802508530186e-05, "loss": 0.657, "step": 8898 }, { "epoch": 1.2565659418243433, "grad_norm": 3.406167284703346, "learning_rate": 1.3064351430270746e-05, "loss": 0.6181, "step": 8899 }, { "epoch": 1.2567071448743292, "grad_norm": 3.5380274967198018, "learning_rate": 1.3062900280797104e-05, "loss": 0.6316, "step": 8900 }, { "epoch": 1.256848347924315, "grad_norm": 3.5325974670148925, "learning_rate": 1.306144906014298e-05, "loss": 0.6437, "step": 8901 }, { "epoch": 1.256989550974301, "grad_norm": 3.8264641770226255, "learning_rate": 1.3059997768342108e-05, "loss": 0.6781, "step": 8902 }, { "epoch": 1.2571307540242869, "grad_norm": 4.095870638503094, "learning_rate": 1.3058546405428203e-05, "loss": 0.8185, "step": 8903 }, { "epoch": 1.2572719570742728, "grad_norm": 3.853095788380736, "learning_rate": 1.3057094971435005e-05, "loss": 0.6963, "step": 8904 }, { "epoch": 1.2574131601242586, "grad_norm": 3.7104930205725157, "learning_rate": 1.3055643466396234e-05, "loss": 0.6659, "step": 8905 }, { "epoch": 1.2575543631742445, "grad_norm": 2.8770009189194274, "learning_rate": 1.3054191890345633e-05, "loss": 0.4687, "step": 8906 }, { "epoch": 1.2576955662242304, "grad_norm": 4.742575777138068, "learning_rate": 1.3052740243316932e-05, "loss": 0.8066, "step": 8907 }, { "epoch": 1.2578367692742163, "grad_norm": 3.2747676847445075, "learning_rate": 1.3051288525343866e-05, "loss": 0.5463, "step": 8908 }, { "epoch": 1.2579779723242022, "grad_norm": 3.9685279704623864, "learning_rate": 1.304983673646017e-05, "loss": 0.6982, "step": 8909 }, { "epoch": 1.258119175374188, "grad_norm": 3.5930840915197435, "learning_rate": 1.3048384876699588e-05, "loss": 0.6242, "step": 8910 }, { "epoch": 1.258260378424174, "grad_norm": 3.9491200168461176, "learning_rate": 1.3046932946095856e-05, "loss": 0.6113, "step": 8911 }, { "epoch": 1.2584015814741598, "grad_norm": 3.317973328250457, "learning_rate": 1.304548094468272e-05, "loss": 0.5419, "step": 8912 }, { "epoch": 1.2585427845241457, "grad_norm": 3.5908218535080647, "learning_rate": 1.304402887249392e-05, "loss": 0.6088, "step": 8913 }, { "epoch": 1.2586839875741316, "grad_norm": 3.8069201078017856, "learning_rate": 1.3042576729563203e-05, "loss": 0.5494, "step": 8914 }, { "epoch": 1.2588251906241175, "grad_norm": 3.9781355984075826, "learning_rate": 1.3041124515924324e-05, "loss": 0.7008, "step": 8915 }, { "epoch": 1.2589663936741033, "grad_norm": 3.8632152272993405, "learning_rate": 1.3039672231611015e-05, "loss": 0.6686, "step": 8916 }, { "epoch": 1.2591075967240892, "grad_norm": 3.793096450272382, "learning_rate": 1.303821987665704e-05, "loss": 0.5002, "step": 8917 }, { "epoch": 1.2592487997740751, "grad_norm": 2.752665495213168, "learning_rate": 1.3036767451096148e-05, "loss": 0.4297, "step": 8918 }, { "epoch": 1.259390002824061, "grad_norm": 3.369686424265232, "learning_rate": 1.3035314954962092e-05, "loss": 0.5652, "step": 8919 }, { "epoch": 1.2595312058740469, "grad_norm": 4.240867842961195, "learning_rate": 1.3033862388288628e-05, "loss": 0.5564, "step": 8920 }, { "epoch": 1.2596724089240328, "grad_norm": 3.2356365250206243, "learning_rate": 1.3032409751109509e-05, "loss": 0.5441, "step": 8921 }, { "epoch": 1.2598136119740186, "grad_norm": 3.8468139636304537, "learning_rate": 1.30309570434585e-05, "loss": 0.6802, "step": 8922 }, { "epoch": 1.2599548150240045, "grad_norm": 3.949462364771497, "learning_rate": 1.3029504265369356e-05, "loss": 0.5011, "step": 8923 }, { "epoch": 1.2600960180739904, "grad_norm": 2.8576362413757326, "learning_rate": 1.3028051416875845e-05, "loss": 0.4717, "step": 8924 }, { "epoch": 1.2602372211239763, "grad_norm": 4.894327819831198, "learning_rate": 1.3026598498011721e-05, "loss": 0.7425, "step": 8925 }, { "epoch": 1.2603784241739622, "grad_norm": 3.6381770204971158, "learning_rate": 1.302514550881076e-05, "loss": 0.6288, "step": 8926 }, { "epoch": 1.260519627223948, "grad_norm": 3.431979565034582, "learning_rate": 1.3023692449306721e-05, "loss": 0.59, "step": 8927 }, { "epoch": 1.260660830273934, "grad_norm": 3.7887198884522464, "learning_rate": 1.3022239319533376e-05, "loss": 0.5832, "step": 8928 }, { "epoch": 1.2608020333239198, "grad_norm": 4.005135336528074, "learning_rate": 1.3020786119524495e-05, "loss": 0.6775, "step": 8929 }, { "epoch": 1.2609432363739057, "grad_norm": 4.111172688075792, "learning_rate": 1.3019332849313851e-05, "loss": 0.6188, "step": 8930 }, { "epoch": 1.2610844394238916, "grad_norm": 3.4482134691506063, "learning_rate": 1.301787950893521e-05, "loss": 0.5794, "step": 8931 }, { "epoch": 1.2612256424738775, "grad_norm": 3.13623252154556, "learning_rate": 1.301642609842236e-05, "loss": 0.5171, "step": 8932 }, { "epoch": 1.2613668455238634, "grad_norm": 4.240374093672408, "learning_rate": 1.3014972617809062e-05, "loss": 0.652, "step": 8933 }, { "epoch": 1.2615080485738492, "grad_norm": 3.650117196717995, "learning_rate": 1.3013519067129108e-05, "loss": 0.5957, "step": 8934 }, { "epoch": 1.2616492516238351, "grad_norm": 3.56709103857253, "learning_rate": 1.3012065446416272e-05, "loss": 0.5668, "step": 8935 }, { "epoch": 1.261790454673821, "grad_norm": 3.338455964870943, "learning_rate": 1.3010611755704334e-05, "loss": 0.5544, "step": 8936 }, { "epoch": 1.2619316577238069, "grad_norm": 4.320475233328176, "learning_rate": 1.3009157995027079e-05, "loss": 0.6603, "step": 8937 }, { "epoch": 1.2620728607737928, "grad_norm": 3.6989088346249526, "learning_rate": 1.3007704164418294e-05, "loss": 0.6197, "step": 8938 }, { "epoch": 1.2622140638237787, "grad_norm": 3.9258328393111457, "learning_rate": 1.3006250263911759e-05, "loss": 0.5633, "step": 8939 }, { "epoch": 1.2623552668737645, "grad_norm": 3.472715183538632, "learning_rate": 1.3004796293541269e-05, "loss": 0.5269, "step": 8940 }, { "epoch": 1.2624964699237504, "grad_norm": 3.3614711803971766, "learning_rate": 1.3003342253340613e-05, "loss": 0.6048, "step": 8941 }, { "epoch": 1.2626376729737363, "grad_norm": 3.668901304355269, "learning_rate": 1.3001888143343578e-05, "loss": 0.62, "step": 8942 }, { "epoch": 1.2627788760237222, "grad_norm": 3.8102469144223168, "learning_rate": 1.3000433963583956e-05, "loss": 0.6805, "step": 8943 }, { "epoch": 1.262920079073708, "grad_norm": 3.5014917754074895, "learning_rate": 1.2998979714095547e-05, "loss": 0.5967, "step": 8944 }, { "epoch": 1.263061282123694, "grad_norm": 3.4865129052020296, "learning_rate": 1.2997525394912148e-05, "loss": 0.5413, "step": 8945 }, { "epoch": 1.2632024851736798, "grad_norm": 3.6064554367241075, "learning_rate": 1.2996071006067548e-05, "loss": 0.6061, "step": 8946 }, { "epoch": 1.2633436882236655, "grad_norm": 3.262145240240631, "learning_rate": 1.2994616547595555e-05, "loss": 0.4854, "step": 8947 }, { "epoch": 1.2634848912736514, "grad_norm": 3.674252208660445, "learning_rate": 1.2993162019529963e-05, "loss": 0.6715, "step": 8948 }, { "epoch": 1.2636260943236373, "grad_norm": 3.8027909909672575, "learning_rate": 1.2991707421904582e-05, "loss": 0.5725, "step": 8949 }, { "epoch": 1.2637672973736231, "grad_norm": 3.296533472974303, "learning_rate": 1.2990252754753212e-05, "loss": 0.4854, "step": 8950 }, { "epoch": 1.263908500423609, "grad_norm": 3.8955632055364267, "learning_rate": 1.2988798018109655e-05, "loss": 0.6311, "step": 8951 }, { "epoch": 1.264049703473595, "grad_norm": 3.3606954751147717, "learning_rate": 1.2987343212007728e-05, "loss": 0.5515, "step": 8952 }, { "epoch": 1.2641909065235808, "grad_norm": 3.315289844199876, "learning_rate": 1.2985888336481236e-05, "loss": 0.6271, "step": 8953 }, { "epoch": 1.2643321095735667, "grad_norm": 5.377517995348369, "learning_rate": 1.2984433391563984e-05, "loss": 0.4782, "step": 8954 }, { "epoch": 1.2644733126235526, "grad_norm": 3.7162518336751615, "learning_rate": 1.2982978377289792e-05, "loss": 0.558, "step": 8955 }, { "epoch": 1.2646145156735384, "grad_norm": 3.0820007244190912, "learning_rate": 1.2981523293692468e-05, "loss": 0.4931, "step": 8956 }, { "epoch": 1.2647557187235243, "grad_norm": 3.64601854661631, "learning_rate": 1.2980068140805833e-05, "loss": 0.65, "step": 8957 }, { "epoch": 1.2648969217735102, "grad_norm": 3.2652312135192028, "learning_rate": 1.2978612918663702e-05, "loss": 0.6184, "step": 8958 }, { "epoch": 1.265038124823496, "grad_norm": 3.678370650149595, "learning_rate": 1.2977157627299888e-05, "loss": 0.6475, "step": 8959 }, { "epoch": 1.265179327873482, "grad_norm": 3.2042596889359665, "learning_rate": 1.297570226674822e-05, "loss": 0.6045, "step": 8960 }, { "epoch": 1.2653205309234679, "grad_norm": 3.962067951560162, "learning_rate": 1.2974246837042518e-05, "loss": 0.7248, "step": 8961 }, { "epoch": 1.2654617339734537, "grad_norm": 3.9980907105036403, "learning_rate": 1.2972791338216601e-05, "loss": 0.6598, "step": 8962 }, { "epoch": 1.2656029370234396, "grad_norm": 3.4544667163195646, "learning_rate": 1.2971335770304301e-05, "loss": 0.5786, "step": 8963 }, { "epoch": 1.2657441400734255, "grad_norm": 3.93353711886771, "learning_rate": 1.2969880133339437e-05, "loss": 0.6544, "step": 8964 }, { "epoch": 1.2658853431234114, "grad_norm": 4.238039114949604, "learning_rate": 1.2968424427355842e-05, "loss": 0.7479, "step": 8965 }, { "epoch": 1.2660265461733973, "grad_norm": 3.145038751687595, "learning_rate": 1.2966968652387348e-05, "loss": 0.4958, "step": 8966 }, { "epoch": 1.2661677492233832, "grad_norm": 4.183474409543424, "learning_rate": 1.2965512808467782e-05, "loss": 0.7165, "step": 8967 }, { "epoch": 1.266308952273369, "grad_norm": 3.6759148043059167, "learning_rate": 1.2964056895630976e-05, "loss": 0.6784, "step": 8968 }, { "epoch": 1.266450155323355, "grad_norm": 3.383925566923806, "learning_rate": 1.2962600913910773e-05, "loss": 0.5114, "step": 8969 }, { "epoch": 1.2665913583733408, "grad_norm": 3.404124685983445, "learning_rate": 1.2961144863341e-05, "loss": 0.6093, "step": 8970 }, { "epoch": 1.2667325614233267, "grad_norm": 3.0597263951938305, "learning_rate": 1.2959688743955502e-05, "loss": 0.5727, "step": 8971 }, { "epoch": 1.2668737644733126, "grad_norm": 3.400041641777541, "learning_rate": 1.2958232555788115e-05, "loss": 0.5637, "step": 8972 }, { "epoch": 1.2670149675232985, "grad_norm": 3.4521489421962275, "learning_rate": 1.2956776298872682e-05, "loss": 0.5491, "step": 8973 }, { "epoch": 1.2671561705732843, "grad_norm": 2.8287607076633767, "learning_rate": 1.2955319973243043e-05, "loss": 0.5293, "step": 8974 }, { "epoch": 1.2672973736232702, "grad_norm": 3.6401511014648316, "learning_rate": 1.2953863578933045e-05, "loss": 0.5701, "step": 8975 }, { "epoch": 1.267438576673256, "grad_norm": 3.6630020153107825, "learning_rate": 1.2952407115976531e-05, "loss": 0.6483, "step": 8976 }, { "epoch": 1.267579779723242, "grad_norm": 4.065382577807174, "learning_rate": 1.2950950584407353e-05, "loss": 0.6482, "step": 8977 }, { "epoch": 1.2677209827732279, "grad_norm": 4.326769035670086, "learning_rate": 1.2949493984259355e-05, "loss": 0.6315, "step": 8978 }, { "epoch": 1.2678621858232137, "grad_norm": 4.025754721347293, "learning_rate": 1.2948037315566395e-05, "loss": 0.6701, "step": 8979 }, { "epoch": 1.2680033888731996, "grad_norm": 3.5624685657430715, "learning_rate": 1.294658057836232e-05, "loss": 0.6098, "step": 8980 }, { "epoch": 1.2681445919231855, "grad_norm": 3.9543817995148505, "learning_rate": 1.2945123772680983e-05, "loss": 0.734, "step": 8981 }, { "epoch": 1.2682857949731714, "grad_norm": 5.084742692533595, "learning_rate": 1.294366689855624e-05, "loss": 0.5913, "step": 8982 }, { "epoch": 1.2684269980231573, "grad_norm": 3.4026778394307646, "learning_rate": 1.2942209956021953e-05, "loss": 0.559, "step": 8983 }, { "epoch": 1.2685682010731432, "grad_norm": 3.6524786099681075, "learning_rate": 1.2940752945111977e-05, "loss": 0.5666, "step": 8984 }, { "epoch": 1.268709404123129, "grad_norm": 3.298860884514903, "learning_rate": 1.293929586586017e-05, "loss": 0.606, "step": 8985 }, { "epoch": 1.268850607173115, "grad_norm": 3.9303412608682318, "learning_rate": 1.2937838718300398e-05, "loss": 0.6429, "step": 8986 }, { "epoch": 1.2689918102231008, "grad_norm": 3.497938671828123, "learning_rate": 1.2936381502466524e-05, "loss": 0.6578, "step": 8987 }, { "epoch": 1.2691330132730867, "grad_norm": 3.758198179817365, "learning_rate": 1.293492421839241e-05, "loss": 0.6359, "step": 8988 }, { "epoch": 1.2692742163230726, "grad_norm": 4.032020562309251, "learning_rate": 1.2933466866111925e-05, "loss": 0.6627, "step": 8989 }, { "epoch": 1.2694154193730585, "grad_norm": 3.0089064763758873, "learning_rate": 1.293200944565894e-05, "loss": 0.533, "step": 8990 }, { "epoch": 1.2695566224230443, "grad_norm": 3.2256158405256214, "learning_rate": 1.2930551957067318e-05, "loss": 0.6249, "step": 8991 }, { "epoch": 1.2696978254730302, "grad_norm": 3.7546933467337946, "learning_rate": 1.2929094400370936e-05, "loss": 0.5833, "step": 8992 }, { "epoch": 1.2698390285230161, "grad_norm": 3.181103764849049, "learning_rate": 1.2927636775603663e-05, "loss": 0.4576, "step": 8993 }, { "epoch": 1.269980231573002, "grad_norm": 4.827523025444046, "learning_rate": 1.2926179082799377e-05, "loss": 0.8276, "step": 8994 }, { "epoch": 1.2701214346229879, "grad_norm": 3.195125233736593, "learning_rate": 1.2924721321991953e-05, "loss": 0.5184, "step": 8995 }, { "epoch": 1.2702626376729738, "grad_norm": 4.325255856387945, "learning_rate": 1.292326349321527e-05, "loss": 0.803, "step": 8996 }, { "epoch": 1.2704038407229596, "grad_norm": 3.2589357281435327, "learning_rate": 1.2921805596503203e-05, "loss": 0.5662, "step": 8997 }, { "epoch": 1.2705450437729455, "grad_norm": 3.1653797195248234, "learning_rate": 1.2920347631889637e-05, "loss": 0.5886, "step": 8998 }, { "epoch": 1.2706862468229314, "grad_norm": 3.290514737918978, "learning_rate": 1.2918889599408453e-05, "loss": 0.5716, "step": 8999 }, { "epoch": 1.2708274498729173, "grad_norm": 3.685175409717053, "learning_rate": 1.2917431499093538e-05, "loss": 0.5248, "step": 9000 }, { "epoch": 1.2709686529229032, "grad_norm": 3.404081731664567, "learning_rate": 1.2915973330978772e-05, "loss": 0.5311, "step": 9001 }, { "epoch": 1.271109855972889, "grad_norm": 3.8071207311969104, "learning_rate": 1.2914515095098043e-05, "loss": 0.6472, "step": 9002 }, { "epoch": 1.271251059022875, "grad_norm": 3.470880049957385, "learning_rate": 1.2913056791485246e-05, "loss": 0.5813, "step": 9003 }, { "epoch": 1.2713922620728608, "grad_norm": 3.377128356843363, "learning_rate": 1.291159842017427e-05, "loss": 0.5266, "step": 9004 }, { "epoch": 1.2715334651228467, "grad_norm": 3.7512896580711477, "learning_rate": 1.2910139981198997e-05, "loss": 0.6997, "step": 9005 }, { "epoch": 1.2716746681728326, "grad_norm": 3.838302242630251, "learning_rate": 1.290868147459333e-05, "loss": 0.6298, "step": 9006 }, { "epoch": 1.2718158712228185, "grad_norm": 3.374726034915733, "learning_rate": 1.2907222900391164e-05, "loss": 0.5631, "step": 9007 }, { "epoch": 1.2719570742728044, "grad_norm": 3.3580965615188574, "learning_rate": 1.290576425862639e-05, "loss": 0.657, "step": 9008 }, { "epoch": 1.2720982773227902, "grad_norm": 4.777898492589735, "learning_rate": 1.2904305549332911e-05, "loss": 0.7193, "step": 9009 }, { "epoch": 1.2722394803727761, "grad_norm": 3.6071499667920084, "learning_rate": 1.2902846772544625e-05, "loss": 0.5979, "step": 9010 }, { "epoch": 1.272380683422762, "grad_norm": 3.1814571763100483, "learning_rate": 1.2901387928295432e-05, "loss": 0.5846, "step": 9011 }, { "epoch": 1.2725218864727479, "grad_norm": 3.705722505407158, "learning_rate": 1.2899929016619241e-05, "loss": 0.6566, "step": 9012 }, { "epoch": 1.2726630895227338, "grad_norm": 3.5763143552016503, "learning_rate": 1.2898470037549951e-05, "loss": 0.6745, "step": 9013 }, { "epoch": 1.2728042925727197, "grad_norm": 4.519392776891794, "learning_rate": 1.2897010991121465e-05, "loss": 0.5467, "step": 9014 }, { "epoch": 1.2729454956227055, "grad_norm": 3.497465990029531, "learning_rate": 1.2895551877367697e-05, "loss": 0.5674, "step": 9015 }, { "epoch": 1.2730866986726914, "grad_norm": 3.273343653483845, "learning_rate": 1.2894092696322554e-05, "loss": 0.5137, "step": 9016 }, { "epoch": 1.2732279017226773, "grad_norm": 3.0075411063392696, "learning_rate": 1.2892633448019949e-05, "loss": 0.5048, "step": 9017 }, { "epoch": 1.2733691047726632, "grad_norm": 3.522618824981709, "learning_rate": 1.2891174132493792e-05, "loss": 0.5525, "step": 9018 }, { "epoch": 1.273510307822649, "grad_norm": 3.5560233088935638, "learning_rate": 1.2889714749777991e-05, "loss": 0.6299, "step": 9019 }, { "epoch": 1.273651510872635, "grad_norm": 3.6976794483143283, "learning_rate": 1.288825529990647e-05, "loss": 0.5337, "step": 9020 }, { "epoch": 1.2737927139226208, "grad_norm": 4.089550088145935, "learning_rate": 1.2886795782913146e-05, "loss": 0.6999, "step": 9021 }, { "epoch": 1.2739339169726067, "grad_norm": 2.963432204737656, "learning_rate": 1.2885336198831933e-05, "loss": 0.4643, "step": 9022 }, { "epoch": 1.2740751200225926, "grad_norm": 3.805806762094766, "learning_rate": 1.2883876547696752e-05, "loss": 0.5956, "step": 9023 }, { "epoch": 1.2742163230725785, "grad_norm": 3.699386348450627, "learning_rate": 1.2882416829541526e-05, "loss": 0.5453, "step": 9024 }, { "epoch": 1.2743575261225644, "grad_norm": 3.982022060237142, "learning_rate": 1.2880957044400178e-05, "loss": 0.6503, "step": 9025 }, { "epoch": 1.2744987291725502, "grad_norm": 3.735658804595784, "learning_rate": 1.2879497192306634e-05, "loss": 0.6317, "step": 9026 }, { "epoch": 1.2746399322225361, "grad_norm": 2.9418784850845006, "learning_rate": 1.2878037273294815e-05, "loss": 0.4802, "step": 9027 }, { "epoch": 1.274781135272522, "grad_norm": 3.484275421998208, "learning_rate": 1.2876577287398653e-05, "loss": 0.6382, "step": 9028 }, { "epoch": 1.274922338322508, "grad_norm": 3.3357819990388866, "learning_rate": 1.2875117234652078e-05, "loss": 0.576, "step": 9029 }, { "epoch": 1.2750635413724938, "grad_norm": 3.8040315235486735, "learning_rate": 1.2873657115089022e-05, "loss": 0.5232, "step": 9030 }, { "epoch": 1.2752047444224797, "grad_norm": 3.7550747136052554, "learning_rate": 1.2872196928743415e-05, "loss": 0.619, "step": 9031 }, { "epoch": 1.2753459474724653, "grad_norm": 4.150598432962877, "learning_rate": 1.2870736675649192e-05, "loss": 0.6395, "step": 9032 }, { "epoch": 1.2754871505224512, "grad_norm": 4.946742700535504, "learning_rate": 1.2869276355840288e-05, "loss": 0.8017, "step": 9033 }, { "epoch": 1.275628353572437, "grad_norm": 3.35923903874186, "learning_rate": 1.286781596935064e-05, "loss": 0.5291, "step": 9034 }, { "epoch": 1.275769556622423, "grad_norm": 4.194240938547909, "learning_rate": 1.2866355516214189e-05, "loss": 0.7017, "step": 9035 }, { "epoch": 1.2759107596724089, "grad_norm": 4.221129370964834, "learning_rate": 1.286489499646487e-05, "loss": 0.7873, "step": 9036 }, { "epoch": 1.2760519627223947, "grad_norm": 3.946002483998895, "learning_rate": 1.286343441013663e-05, "loss": 0.6566, "step": 9037 }, { "epoch": 1.2761931657723806, "grad_norm": 3.8080867091565125, "learning_rate": 1.2861973757263416e-05, "loss": 0.6312, "step": 9038 }, { "epoch": 1.2763343688223665, "grad_norm": 3.511239423643281, "learning_rate": 1.2860513037879163e-05, "loss": 0.5464, "step": 9039 }, { "epoch": 1.2764755718723524, "grad_norm": 3.6746233485778075, "learning_rate": 1.2859052252017824e-05, "loss": 0.5864, "step": 9040 }, { "epoch": 1.2766167749223383, "grad_norm": 3.311392569297727, "learning_rate": 1.2857591399713343e-05, "loss": 0.6239, "step": 9041 }, { "epoch": 1.2767579779723242, "grad_norm": 4.298390151375422, "learning_rate": 1.2856130480999673e-05, "loss": 0.7454, "step": 9042 }, { "epoch": 1.27689918102231, "grad_norm": 3.1506816768221753, "learning_rate": 1.2854669495910766e-05, "loss": 0.5701, "step": 9043 }, { "epoch": 1.277040384072296, "grad_norm": 2.916827332024249, "learning_rate": 1.2853208444480572e-05, "loss": 0.4372, "step": 9044 }, { "epoch": 1.2771815871222818, "grad_norm": 3.6272905943783345, "learning_rate": 1.2851747326743042e-05, "loss": 0.6946, "step": 9045 }, { "epoch": 1.2773227901722677, "grad_norm": 3.8019574062878827, "learning_rate": 1.2850286142732141e-05, "loss": 0.6799, "step": 9046 }, { "epoch": 1.2774639932222536, "grad_norm": 2.5473892670713205, "learning_rate": 1.284882489248182e-05, "loss": 0.4538, "step": 9047 }, { "epoch": 1.2776051962722395, "grad_norm": 3.1407306516241196, "learning_rate": 1.2847363576026037e-05, "loss": 0.5421, "step": 9048 }, { "epoch": 1.2777463993222253, "grad_norm": 3.879403219381558, "learning_rate": 1.2845902193398754e-05, "loss": 0.6871, "step": 9049 }, { "epoch": 1.2778876023722112, "grad_norm": 3.159963253562537, "learning_rate": 1.2844440744633934e-05, "loss": 0.5396, "step": 9050 }, { "epoch": 1.278028805422197, "grad_norm": 4.778117223539895, "learning_rate": 1.2842979229765542e-05, "loss": 0.8533, "step": 9051 }, { "epoch": 1.278170008472183, "grad_norm": 3.0857270106133434, "learning_rate": 1.2841517648827538e-05, "loss": 0.5128, "step": 9052 }, { "epoch": 1.2783112115221689, "grad_norm": 4.069958689198129, "learning_rate": 1.284005600185389e-05, "loss": 0.8294, "step": 9053 }, { "epoch": 1.2784524145721547, "grad_norm": 3.0031579853538606, "learning_rate": 1.2838594288878567e-05, "loss": 0.482, "step": 9054 }, { "epoch": 1.2785936176221406, "grad_norm": 3.7158121329497287, "learning_rate": 1.283713250993554e-05, "loss": 0.5901, "step": 9055 }, { "epoch": 1.2787348206721265, "grad_norm": 4.463494988961276, "learning_rate": 1.2835670665058779e-05, "loss": 0.6481, "step": 9056 }, { "epoch": 1.2788760237221124, "grad_norm": 3.415631213538582, "learning_rate": 1.2834208754282255e-05, "loss": 0.6852, "step": 9057 }, { "epoch": 1.2790172267720983, "grad_norm": 4.882143141877675, "learning_rate": 1.2832746777639947e-05, "loss": 0.6252, "step": 9058 }, { "epoch": 1.2791584298220842, "grad_norm": 3.3786757878391533, "learning_rate": 1.2831284735165822e-05, "loss": 0.6443, "step": 9059 }, { "epoch": 1.27929963287207, "grad_norm": 3.3911009722904453, "learning_rate": 1.2829822626893867e-05, "loss": 0.6144, "step": 9060 }, { "epoch": 1.279440835922056, "grad_norm": 3.345531011189538, "learning_rate": 1.2828360452858055e-05, "loss": 0.6042, "step": 9061 }, { "epoch": 1.2795820389720418, "grad_norm": 2.9206185831109264, "learning_rate": 1.2826898213092365e-05, "loss": 0.4402, "step": 9062 }, { "epoch": 1.2797232420220277, "grad_norm": 3.8689221550079136, "learning_rate": 1.2825435907630785e-05, "loss": 0.6051, "step": 9063 }, { "epoch": 1.2798644450720136, "grad_norm": 4.023273198520075, "learning_rate": 1.2823973536507295e-05, "loss": 0.5673, "step": 9064 }, { "epoch": 1.2800056481219995, "grad_norm": 3.7825713302075843, "learning_rate": 1.2822511099755875e-05, "loss": 0.5499, "step": 9065 }, { "epoch": 1.2801468511719853, "grad_norm": 3.6008399540236224, "learning_rate": 1.282104859741052e-05, "loss": 0.5746, "step": 9066 }, { "epoch": 1.2802880542219712, "grad_norm": 4.178942750261326, "learning_rate": 1.281958602950521e-05, "loss": 0.6172, "step": 9067 }, { "epoch": 1.280429257271957, "grad_norm": 3.6290002368569265, "learning_rate": 1.2818123396073942e-05, "loss": 0.5913, "step": 9068 }, { "epoch": 1.280570460321943, "grad_norm": 4.014921813384591, "learning_rate": 1.2816660697150702e-05, "loss": 0.6248, "step": 9069 }, { "epoch": 1.2807116633719289, "grad_norm": 3.3017953832358713, "learning_rate": 1.2815197932769486e-05, "loss": 0.5486, "step": 9070 }, { "epoch": 1.2808528664219148, "grad_norm": 3.1221616580412603, "learning_rate": 1.2813735102964281e-05, "loss": 0.5434, "step": 9071 }, { "epoch": 1.2809940694719006, "grad_norm": 4.1161508168803405, "learning_rate": 1.2812272207769092e-05, "loss": 0.6951, "step": 9072 }, { "epoch": 1.2811352725218865, "grad_norm": 3.822547768492929, "learning_rate": 1.281080924721791e-05, "loss": 0.7153, "step": 9073 }, { "epoch": 1.2812764755718724, "grad_norm": 4.32537764969359, "learning_rate": 1.2809346221344734e-05, "loss": 0.7702, "step": 9074 }, { "epoch": 1.2814176786218583, "grad_norm": 3.82402094964537, "learning_rate": 1.2807883130183565e-05, "loss": 0.7318, "step": 9075 }, { "epoch": 1.2815588816718442, "grad_norm": 3.8540534164318228, "learning_rate": 1.2806419973768407e-05, "loss": 0.6531, "step": 9076 }, { "epoch": 1.28170008472183, "grad_norm": 3.1890766880969954, "learning_rate": 1.280495675213326e-05, "loss": 0.5015, "step": 9077 }, { "epoch": 1.281841287771816, "grad_norm": 3.642016654278224, "learning_rate": 1.280349346531213e-05, "loss": 0.6078, "step": 9078 }, { "epoch": 1.2819824908218018, "grad_norm": 4.863419029139658, "learning_rate": 1.2802030113339016e-05, "loss": 0.9011, "step": 9079 }, { "epoch": 1.2821236938717877, "grad_norm": 3.721077428242094, "learning_rate": 1.2800566696247943e-05, "loss": 0.6479, "step": 9080 }, { "epoch": 1.2822648969217736, "grad_norm": 3.832086077215903, "learning_rate": 1.2799103214072902e-05, "loss": 0.5806, "step": 9081 }, { "epoch": 1.2824060999717595, "grad_norm": 3.8781451452458637, "learning_rate": 1.2797639666847912e-05, "loss": 0.657, "step": 9082 }, { "epoch": 1.2825473030217451, "grad_norm": 3.6559326162855412, "learning_rate": 1.2796176054606987e-05, "loss": 0.6045, "step": 9083 }, { "epoch": 1.282688506071731, "grad_norm": 3.635084095363264, "learning_rate": 1.2794712377384135e-05, "loss": 0.4881, "step": 9084 }, { "epoch": 1.282829709121717, "grad_norm": 3.6237142931407873, "learning_rate": 1.2793248635213378e-05, "loss": 0.6144, "step": 9085 }, { "epoch": 1.2829709121717028, "grad_norm": 3.7376196314230072, "learning_rate": 1.2791784828128727e-05, "loss": 0.5813, "step": 9086 }, { "epoch": 1.2831121152216887, "grad_norm": 4.337537177584381, "learning_rate": 1.2790320956164201e-05, "loss": 0.7707, "step": 9087 }, { "epoch": 1.2832533182716745, "grad_norm": 3.8976983203461235, "learning_rate": 1.2788857019353821e-05, "loss": 0.6553, "step": 9088 }, { "epoch": 1.2833945213216604, "grad_norm": 3.551887662402904, "learning_rate": 1.2787393017731611e-05, "loss": 0.6227, "step": 9089 }, { "epoch": 1.2835357243716463, "grad_norm": 4.93692179457243, "learning_rate": 1.278592895133159e-05, "loss": 0.587, "step": 9090 }, { "epoch": 1.2836769274216322, "grad_norm": 3.3096639248984863, "learning_rate": 1.278446482018778e-05, "loss": 0.6119, "step": 9091 }, { "epoch": 1.283818130471618, "grad_norm": 3.5142085273817085, "learning_rate": 1.2783000624334214e-05, "loss": 0.5337, "step": 9092 }, { "epoch": 1.283959333521604, "grad_norm": 3.561899909836545, "learning_rate": 1.2781536363804916e-05, "loss": 0.6693, "step": 9093 }, { "epoch": 1.2841005365715898, "grad_norm": 3.134394405928417, "learning_rate": 1.2780072038633913e-05, "loss": 0.5643, "step": 9094 }, { "epoch": 1.2842417396215757, "grad_norm": 3.7054041950743053, "learning_rate": 1.2778607648855234e-05, "loss": 0.6389, "step": 9095 }, { "epoch": 1.2843829426715616, "grad_norm": 3.1891706803151396, "learning_rate": 1.2777143194502915e-05, "loss": 0.6225, "step": 9096 }, { "epoch": 1.2845241457215475, "grad_norm": 3.7730931467218536, "learning_rate": 1.277567867561099e-05, "loss": 0.6012, "step": 9097 }, { "epoch": 1.2846653487715334, "grad_norm": 3.5183364941382917, "learning_rate": 1.2774214092213489e-05, "loss": 0.6552, "step": 9098 }, { "epoch": 1.2848065518215193, "grad_norm": 3.874592950812235, "learning_rate": 1.2772749444344448e-05, "loss": 0.5055, "step": 9099 }, { "epoch": 1.2849477548715051, "grad_norm": 3.255264845768958, "learning_rate": 1.2771284732037912e-05, "loss": 0.6486, "step": 9100 }, { "epoch": 1.285088957921491, "grad_norm": 3.877770314912597, "learning_rate": 1.2769819955327913e-05, "loss": 0.6116, "step": 9101 }, { "epoch": 1.285230160971477, "grad_norm": 3.612740206367866, "learning_rate": 1.2768355114248493e-05, "loss": 0.5899, "step": 9102 }, { "epoch": 1.2853713640214628, "grad_norm": 3.2431648960275097, "learning_rate": 1.27668902088337e-05, "loss": 0.5624, "step": 9103 }, { "epoch": 1.2855125670714487, "grad_norm": 3.2387335645328728, "learning_rate": 1.2765425239117572e-05, "loss": 0.5617, "step": 9104 }, { "epoch": 1.2856537701214346, "grad_norm": 3.236788266527372, "learning_rate": 1.2763960205134154e-05, "loss": 0.478, "step": 9105 }, { "epoch": 1.2857949731714204, "grad_norm": 3.750706132297012, "learning_rate": 1.2762495106917496e-05, "loss": 0.6337, "step": 9106 }, { "epoch": 1.2859361762214063, "grad_norm": 4.048985172899322, "learning_rate": 1.2761029944501646e-05, "loss": 0.6661, "step": 9107 }, { "epoch": 1.2860773792713922, "grad_norm": 3.8888052252130394, "learning_rate": 1.2759564717920649e-05, "loss": 0.5669, "step": 9108 }, { "epoch": 1.286218582321378, "grad_norm": 2.915494251355418, "learning_rate": 1.2758099427208561e-05, "loss": 0.5163, "step": 9109 }, { "epoch": 1.286359785371364, "grad_norm": 4.285217129704498, "learning_rate": 1.2756634072399434e-05, "loss": 0.6707, "step": 9110 }, { "epoch": 1.2865009884213499, "grad_norm": 3.512441839670131, "learning_rate": 1.275516865352732e-05, "loss": 0.6144, "step": 9111 }, { "epoch": 1.2866421914713357, "grad_norm": 3.2951732486540566, "learning_rate": 1.2753703170626279e-05, "loss": 0.5363, "step": 9112 }, { "epoch": 1.2867833945213216, "grad_norm": 2.729260865312269, "learning_rate": 1.2752237623730365e-05, "loss": 0.4523, "step": 9113 }, { "epoch": 1.2869245975713075, "grad_norm": 4.1942233778185445, "learning_rate": 1.2750772012873637e-05, "loss": 0.6665, "step": 9114 }, { "epoch": 1.2870658006212934, "grad_norm": 3.7468447817708204, "learning_rate": 1.2749306338090155e-05, "loss": 0.5828, "step": 9115 }, { "epoch": 1.2872070036712793, "grad_norm": 3.7043649426765146, "learning_rate": 1.2747840599413981e-05, "loss": 0.5242, "step": 9116 }, { "epoch": 1.2873482067212652, "grad_norm": 3.5891298501327373, "learning_rate": 1.2746374796879177e-05, "loss": 0.6133, "step": 9117 }, { "epoch": 1.287489409771251, "grad_norm": 4.172448338957914, "learning_rate": 1.274490893051981e-05, "loss": 0.6337, "step": 9118 }, { "epoch": 1.287630612821237, "grad_norm": 4.017832984296004, "learning_rate": 1.2743443000369947e-05, "loss": 0.7282, "step": 9119 }, { "epoch": 1.2877718158712228, "grad_norm": 3.138434748723435, "learning_rate": 1.274197700646365e-05, "loss": 0.5969, "step": 9120 }, { "epoch": 1.2879130189212087, "grad_norm": 3.2319800607921234, "learning_rate": 1.2740510948834995e-05, "loss": 0.5734, "step": 9121 }, { "epoch": 1.2880542219711946, "grad_norm": 3.5038270449090345, "learning_rate": 1.2739044827518043e-05, "loss": 0.6379, "step": 9122 }, { "epoch": 1.2881954250211805, "grad_norm": 3.4222816116387684, "learning_rate": 1.273757864254688e-05, "loss": 0.6449, "step": 9123 }, { "epoch": 1.2883366280711663, "grad_norm": 4.2249335261196235, "learning_rate": 1.2736112393955567e-05, "loss": 0.5842, "step": 9124 }, { "epoch": 1.2884778311211522, "grad_norm": 3.461448309290748, "learning_rate": 1.273464608177818e-05, "loss": 0.5505, "step": 9125 }, { "epoch": 1.288619034171138, "grad_norm": 3.9934403814322406, "learning_rate": 1.2733179706048805e-05, "loss": 0.5846, "step": 9126 }, { "epoch": 1.288760237221124, "grad_norm": 3.4278466965991843, "learning_rate": 1.2731713266801515e-05, "loss": 0.5523, "step": 9127 }, { "epoch": 1.2889014402711099, "grad_norm": 3.8982328172273304, "learning_rate": 1.2730246764070385e-05, "loss": 0.6666, "step": 9128 }, { "epoch": 1.2890426433210957, "grad_norm": 3.2968420413702773, "learning_rate": 1.2728780197889502e-05, "loss": 0.4832, "step": 9129 }, { "epoch": 1.2891838463710816, "grad_norm": 2.749232950974371, "learning_rate": 1.2727313568292942e-05, "loss": 0.408, "step": 9130 }, { "epoch": 1.2893250494210675, "grad_norm": 3.4603073480395588, "learning_rate": 1.2725846875314796e-05, "loss": 0.703, "step": 9131 }, { "epoch": 1.2894662524710534, "grad_norm": 3.303766060637883, "learning_rate": 1.2724380118989144e-05, "loss": 0.5756, "step": 9132 }, { "epoch": 1.2896074555210393, "grad_norm": 3.56451770569294, "learning_rate": 1.2722913299350072e-05, "loss": 0.5995, "step": 9133 }, { "epoch": 1.2897486585710252, "grad_norm": 4.361508179535617, "learning_rate": 1.2721446416431676e-05, "loss": 0.7199, "step": 9134 }, { "epoch": 1.289889861621011, "grad_norm": 3.6834623748311115, "learning_rate": 1.271997947026804e-05, "loss": 0.5984, "step": 9135 }, { "epoch": 1.290031064670997, "grad_norm": 3.3743725090140515, "learning_rate": 1.271851246089325e-05, "loss": 0.543, "step": 9136 }, { "epoch": 1.2901722677209828, "grad_norm": 3.0500344111849484, "learning_rate": 1.271704538834141e-05, "loss": 0.5708, "step": 9137 }, { "epoch": 1.2903134707709687, "grad_norm": 2.9109490139650513, "learning_rate": 1.2715578252646608e-05, "loss": 0.4338, "step": 9138 }, { "epoch": 1.2904546738209546, "grad_norm": 3.0943686708591365, "learning_rate": 1.2714111053842934e-05, "loss": 0.5301, "step": 9139 }, { "epoch": 1.2905958768709405, "grad_norm": 3.4863447653048922, "learning_rate": 1.2712643791964501e-05, "loss": 0.7131, "step": 9140 }, { "epoch": 1.2907370799209263, "grad_norm": 3.041717308363255, "learning_rate": 1.2711176467045392e-05, "loss": 0.547, "step": 9141 }, { "epoch": 1.2908782829709122, "grad_norm": 3.4656368821398758, "learning_rate": 1.2709709079119712e-05, "loss": 0.6446, "step": 9142 }, { "epoch": 1.291019486020898, "grad_norm": 3.947719585980999, "learning_rate": 1.2708241628221567e-05, "loss": 0.5806, "step": 9143 }, { "epoch": 1.291160689070884, "grad_norm": 3.2697595091787504, "learning_rate": 1.2706774114385054e-05, "loss": 0.5626, "step": 9144 }, { "epoch": 1.2913018921208699, "grad_norm": 4.377270108459941, "learning_rate": 1.270530653764428e-05, "loss": 0.6703, "step": 9145 }, { "epoch": 1.2914430951708558, "grad_norm": 4.202886227512236, "learning_rate": 1.270383889803335e-05, "loss": 0.6021, "step": 9146 }, { "epoch": 1.2915842982208416, "grad_norm": 3.7745778135809793, "learning_rate": 1.2702371195586373e-05, "loss": 0.6119, "step": 9147 }, { "epoch": 1.2917255012708275, "grad_norm": 3.009342510453179, "learning_rate": 1.2700903430337456e-05, "loss": 0.4995, "step": 9148 }, { "epoch": 1.2918667043208134, "grad_norm": 3.900364454571682, "learning_rate": 1.2699435602320709e-05, "loss": 0.7363, "step": 9149 }, { "epoch": 1.2920079073707993, "grad_norm": 2.727029227241306, "learning_rate": 1.2697967711570243e-05, "loss": 0.4824, "step": 9150 }, { "epoch": 1.2921491104207852, "grad_norm": 3.9514071111954103, "learning_rate": 1.2696499758120173e-05, "loss": 0.6633, "step": 9151 }, { "epoch": 1.292290313470771, "grad_norm": 3.9017390315751688, "learning_rate": 1.2695031742004617e-05, "loss": 0.6377, "step": 9152 }, { "epoch": 1.292431516520757, "grad_norm": 3.7045476575352314, "learning_rate": 1.2693563663257684e-05, "loss": 0.6663, "step": 9153 }, { "epoch": 1.2925727195707428, "grad_norm": 3.923140023803046, "learning_rate": 1.2692095521913494e-05, "loss": 0.7567, "step": 9154 }, { "epoch": 1.2927139226207287, "grad_norm": 3.2456361405594985, "learning_rate": 1.2690627318006171e-05, "loss": 0.5535, "step": 9155 }, { "epoch": 1.2928551256707146, "grad_norm": 4.517974292635989, "learning_rate": 1.2689159051569824e-05, "loss": 0.7345, "step": 9156 }, { "epoch": 1.2929963287207005, "grad_norm": 5.341343923925911, "learning_rate": 1.2687690722638588e-05, "loss": 0.8875, "step": 9157 }, { "epoch": 1.2931375317706864, "grad_norm": 3.649420995069708, "learning_rate": 1.268622233124658e-05, "loss": 0.6293, "step": 9158 }, { "epoch": 1.2932787348206722, "grad_norm": 4.110481779041412, "learning_rate": 1.2684753877427922e-05, "loss": 0.9144, "step": 9159 }, { "epoch": 1.2934199378706581, "grad_norm": 2.830973213421061, "learning_rate": 1.2683285361216745e-05, "loss": 0.4555, "step": 9160 }, { "epoch": 1.293561140920644, "grad_norm": 3.402516639997098, "learning_rate": 1.2681816782647176e-05, "loss": 0.5348, "step": 9161 }, { "epoch": 1.2937023439706299, "grad_norm": 2.7525871910766337, "learning_rate": 1.268034814175334e-05, "loss": 0.4001, "step": 9162 }, { "epoch": 1.2938435470206158, "grad_norm": 4.019184744946877, "learning_rate": 1.2678879438569373e-05, "loss": 0.6721, "step": 9163 }, { "epoch": 1.2939847500706017, "grad_norm": 3.9479831659107307, "learning_rate": 1.2677410673129406e-05, "loss": 0.6557, "step": 9164 }, { "epoch": 1.2941259531205875, "grad_norm": 3.162731753157139, "learning_rate": 1.267594184546757e-05, "loss": 0.5178, "step": 9165 }, { "epoch": 1.2942671561705734, "grad_norm": 3.367463749031155, "learning_rate": 1.2674472955618001e-05, "loss": 0.5941, "step": 9166 }, { "epoch": 1.2944083592205593, "grad_norm": 3.5275740142043266, "learning_rate": 1.2673004003614835e-05, "loss": 0.5644, "step": 9167 }, { "epoch": 1.294549562270545, "grad_norm": 3.7123388426804573, "learning_rate": 1.2671534989492209e-05, "loss": 0.6172, "step": 9168 }, { "epoch": 1.2946907653205308, "grad_norm": 3.4731283051624553, "learning_rate": 1.2670065913284268e-05, "loss": 0.5004, "step": 9169 }, { "epoch": 1.2948319683705167, "grad_norm": 3.9616396078989093, "learning_rate": 1.2668596775025143e-05, "loss": 0.5446, "step": 9170 }, { "epoch": 1.2949731714205026, "grad_norm": 3.198145166284306, "learning_rate": 1.2667127574748985e-05, "loss": 0.5033, "step": 9171 }, { "epoch": 1.2951143744704885, "grad_norm": 3.6393317998630703, "learning_rate": 1.2665658312489932e-05, "loss": 0.6497, "step": 9172 }, { "epoch": 1.2952555775204744, "grad_norm": 3.4451994435918323, "learning_rate": 1.2664188988282134e-05, "loss": 0.6279, "step": 9173 }, { "epoch": 1.2953967805704603, "grad_norm": 3.0543916530372415, "learning_rate": 1.2662719602159732e-05, "loss": 0.5374, "step": 9174 }, { "epoch": 1.2955379836204461, "grad_norm": 3.995415561467427, "learning_rate": 1.2661250154156876e-05, "loss": 0.6717, "step": 9175 }, { "epoch": 1.295679186670432, "grad_norm": 3.3638788769976866, "learning_rate": 1.2659780644307718e-05, "loss": 0.5538, "step": 9176 }, { "epoch": 1.295820389720418, "grad_norm": 3.754253857857848, "learning_rate": 1.2658311072646403e-05, "loss": 0.5385, "step": 9177 }, { "epoch": 1.2959615927704038, "grad_norm": 3.747825199381283, "learning_rate": 1.2656841439207093e-05, "loss": 0.7271, "step": 9178 }, { "epoch": 1.2961027958203897, "grad_norm": 4.380742937847543, "learning_rate": 1.2655371744023927e-05, "loss": 0.7049, "step": 9179 }, { "epoch": 1.2962439988703756, "grad_norm": 3.0658416465473817, "learning_rate": 1.2653901987131074e-05, "loss": 0.4793, "step": 9180 }, { "epoch": 1.2963852019203614, "grad_norm": 3.7528761518649056, "learning_rate": 1.2652432168562686e-05, "loss": 0.6789, "step": 9181 }, { "epoch": 1.2965264049703473, "grad_norm": 4.1413207492653, "learning_rate": 1.2650962288352916e-05, "loss": 0.7074, "step": 9182 }, { "epoch": 1.2966676080203332, "grad_norm": 3.346290887816353, "learning_rate": 1.264949234653593e-05, "loss": 0.4921, "step": 9183 }, { "epoch": 1.296808811070319, "grad_norm": 3.0799745957867244, "learning_rate": 1.2648022343145885e-05, "loss": 0.5031, "step": 9184 }, { "epoch": 1.296950014120305, "grad_norm": 4.614755748817048, "learning_rate": 1.2646552278216944e-05, "loss": 0.6786, "step": 9185 }, { "epoch": 1.2970912171702909, "grad_norm": 3.9530908809668714, "learning_rate": 1.2645082151783272e-05, "loss": 0.7993, "step": 9186 }, { "epoch": 1.2972324202202767, "grad_norm": 3.799183263641042, "learning_rate": 1.264361196387903e-05, "loss": 0.5602, "step": 9187 }, { "epoch": 1.2973736232702626, "grad_norm": 3.735304362747171, "learning_rate": 1.2642141714538391e-05, "loss": 0.5967, "step": 9188 }, { "epoch": 1.2975148263202485, "grad_norm": 3.35732043957424, "learning_rate": 1.2640671403795519e-05, "loss": 0.6186, "step": 9189 }, { "epoch": 1.2976560293702344, "grad_norm": 3.4756560697122523, "learning_rate": 1.2639201031684584e-05, "loss": 0.6093, "step": 9190 }, { "epoch": 1.2977972324202203, "grad_norm": 3.4846579296438693, "learning_rate": 1.2637730598239756e-05, "loss": 0.6051, "step": 9191 }, { "epoch": 1.2979384354702062, "grad_norm": 3.241275930240864, "learning_rate": 1.2636260103495209e-05, "loss": 0.5586, "step": 9192 }, { "epoch": 1.298079638520192, "grad_norm": 3.0768233031250234, "learning_rate": 1.2634789547485114e-05, "loss": 0.4239, "step": 9193 }, { "epoch": 1.298220841570178, "grad_norm": 3.1098997348543573, "learning_rate": 1.2633318930243647e-05, "loss": 0.5366, "step": 9194 }, { "epoch": 1.2983620446201638, "grad_norm": 3.9755980212009345, "learning_rate": 1.2631848251804992e-05, "loss": 0.6447, "step": 9195 }, { "epoch": 1.2985032476701497, "grad_norm": 3.9644059849144475, "learning_rate": 1.2630377512203314e-05, "loss": 0.7631, "step": 9196 }, { "epoch": 1.2986444507201356, "grad_norm": 3.22762014733013, "learning_rate": 1.26289067114728e-05, "loss": 0.5078, "step": 9197 }, { "epoch": 1.2987856537701215, "grad_norm": 2.6559142763370303, "learning_rate": 1.2627435849647629e-05, "loss": 0.4428, "step": 9198 }, { "epoch": 1.2989268568201073, "grad_norm": 4.249009344261088, "learning_rate": 1.2625964926761984e-05, "loss": 0.6148, "step": 9199 }, { "epoch": 1.2990680598700932, "grad_norm": 3.423248820356446, "learning_rate": 1.262449394285005e-05, "loss": 0.5887, "step": 9200 }, { "epoch": 1.299209262920079, "grad_norm": 4.102646208948321, "learning_rate": 1.2623022897946008e-05, "loss": 0.6299, "step": 9201 }, { "epoch": 1.299350465970065, "grad_norm": 3.187380673913972, "learning_rate": 1.2621551792084045e-05, "loss": 0.5114, "step": 9202 }, { "epoch": 1.2994916690200509, "grad_norm": 3.753509560109683, "learning_rate": 1.2620080625298355e-05, "loss": 0.5615, "step": 9203 }, { "epoch": 1.2996328720700367, "grad_norm": 3.608135438769864, "learning_rate": 1.2618609397623121e-05, "loss": 0.6428, "step": 9204 }, { "epoch": 1.2997740751200226, "grad_norm": 3.3244461554486846, "learning_rate": 1.2617138109092533e-05, "loss": 0.4924, "step": 9205 }, { "epoch": 1.2999152781700085, "grad_norm": 3.524588630677718, "learning_rate": 1.2615666759740788e-05, "loss": 0.5365, "step": 9206 }, { "epoch": 1.3000564812199944, "grad_norm": 3.762455366243628, "learning_rate": 1.2614195349602076e-05, "loss": 0.6073, "step": 9207 }, { "epoch": 1.3001976842699803, "grad_norm": 4.182315209182354, "learning_rate": 1.2612723878710594e-05, "loss": 0.5632, "step": 9208 }, { "epoch": 1.3003388873199662, "grad_norm": 3.427077512354995, "learning_rate": 1.2611252347100538e-05, "loss": 0.5562, "step": 9209 }, { "epoch": 1.300480090369952, "grad_norm": 4.103940312370206, "learning_rate": 1.2609780754806105e-05, "loss": 0.6116, "step": 9210 }, { "epoch": 1.300621293419938, "grad_norm": 3.5965243412444896, "learning_rate": 1.2608309101861491e-05, "loss": 0.5912, "step": 9211 }, { "epoch": 1.3007624964699238, "grad_norm": 3.547172590395529, "learning_rate": 1.2606837388300905e-05, "loss": 0.6037, "step": 9212 }, { "epoch": 1.3009036995199097, "grad_norm": 3.3507083359123646, "learning_rate": 1.260536561415854e-05, "loss": 0.5831, "step": 9213 }, { "epoch": 1.3010449025698956, "grad_norm": 3.3867300675965653, "learning_rate": 1.2603893779468604e-05, "loss": 0.5604, "step": 9214 }, { "epoch": 1.3011861056198815, "grad_norm": 3.8621106543325463, "learning_rate": 1.2602421884265304e-05, "loss": 0.7, "step": 9215 }, { "epoch": 1.3013273086698673, "grad_norm": 3.3095964738495267, "learning_rate": 1.2600949928582839e-05, "loss": 0.5302, "step": 9216 }, { "epoch": 1.3014685117198532, "grad_norm": 3.230380849409727, "learning_rate": 1.2599477912455425e-05, "loss": 0.5565, "step": 9217 }, { "epoch": 1.301609714769839, "grad_norm": 3.9254749244011684, "learning_rate": 1.2598005835917263e-05, "loss": 0.6512, "step": 9218 }, { "epoch": 1.3017509178198248, "grad_norm": 3.148232914471939, "learning_rate": 1.2596533699002568e-05, "loss": 0.5626, "step": 9219 }, { "epoch": 1.3018921208698107, "grad_norm": 4.263337419451311, "learning_rate": 1.2595061501745556e-05, "loss": 0.6361, "step": 9220 }, { "epoch": 1.3020333239197965, "grad_norm": 2.611021097978707, "learning_rate": 1.2593589244180431e-05, "loss": 0.4608, "step": 9221 }, { "epoch": 1.3021745269697824, "grad_norm": 3.668163325454977, "learning_rate": 1.2592116926341411e-05, "loss": 0.6532, "step": 9222 }, { "epoch": 1.3023157300197683, "grad_norm": 4.155956454463068, "learning_rate": 1.2590644548262715e-05, "loss": 0.6765, "step": 9223 }, { "epoch": 1.3024569330697542, "grad_norm": 3.545076840440909, "learning_rate": 1.258917210997856e-05, "loss": 0.609, "step": 9224 }, { "epoch": 1.30259813611974, "grad_norm": 4.126756638443326, "learning_rate": 1.2587699611523162e-05, "loss": 0.7125, "step": 9225 }, { "epoch": 1.302739339169726, "grad_norm": 3.633539083708262, "learning_rate": 1.2586227052930742e-05, "loss": 0.7023, "step": 9226 }, { "epoch": 1.3028805422197118, "grad_norm": 3.9478574104552884, "learning_rate": 1.2584754434235524e-05, "loss": 0.6645, "step": 9227 }, { "epoch": 1.3030217452696977, "grad_norm": 3.2569735468147294, "learning_rate": 1.2583281755471726e-05, "loss": 0.5555, "step": 9228 }, { "epoch": 1.3031629483196836, "grad_norm": 3.90011549331362, "learning_rate": 1.2581809016673581e-05, "loss": 0.6465, "step": 9229 }, { "epoch": 1.3033041513696695, "grad_norm": 3.060315708706385, "learning_rate": 1.2580336217875303e-05, "loss": 0.5537, "step": 9230 }, { "epoch": 1.3034453544196554, "grad_norm": 4.196938659517449, "learning_rate": 1.2578863359111129e-05, "loss": 0.6245, "step": 9231 }, { "epoch": 1.3035865574696412, "grad_norm": 4.37189937633729, "learning_rate": 1.2577390440415284e-05, "loss": 0.6887, "step": 9232 }, { "epoch": 1.3037277605196271, "grad_norm": 3.666958479537359, "learning_rate": 1.2575917461821998e-05, "loss": 0.6331, "step": 9233 }, { "epoch": 1.303868963569613, "grad_norm": 3.4969768392521114, "learning_rate": 1.2574444423365503e-05, "loss": 0.5452, "step": 9234 }, { "epoch": 1.304010166619599, "grad_norm": 5.156653102349881, "learning_rate": 1.2572971325080033e-05, "loss": 0.6953, "step": 9235 }, { "epoch": 1.3041513696695848, "grad_norm": 2.811542306016432, "learning_rate": 1.2571498166999816e-05, "loss": 0.4188, "step": 9236 }, { "epoch": 1.3042925727195707, "grad_norm": 2.7427089789373986, "learning_rate": 1.2570024949159097e-05, "loss": 0.3697, "step": 9237 }, { "epoch": 1.3044337757695565, "grad_norm": 4.156514052944761, "learning_rate": 1.2568551671592106e-05, "loss": 0.6854, "step": 9238 }, { "epoch": 1.3045749788195424, "grad_norm": 3.0204778903150302, "learning_rate": 1.2567078334333083e-05, "loss": 0.5371, "step": 9239 }, { "epoch": 1.3047161818695283, "grad_norm": 3.185404891906232, "learning_rate": 1.2565604937416267e-05, "loss": 0.5115, "step": 9240 }, { "epoch": 1.3048573849195142, "grad_norm": 3.0288109667028373, "learning_rate": 1.2564131480875905e-05, "loss": 0.465, "step": 9241 }, { "epoch": 1.3049985879695, "grad_norm": 3.1489080873078956, "learning_rate": 1.256265796474623e-05, "loss": 0.5965, "step": 9242 }, { "epoch": 1.305139791019486, "grad_norm": 3.1535011919011833, "learning_rate": 1.2561184389061491e-05, "loss": 0.5258, "step": 9243 }, { "epoch": 1.3052809940694718, "grad_norm": 4.106568400784383, "learning_rate": 1.2559710753855937e-05, "loss": 0.6012, "step": 9244 }, { "epoch": 1.3054221971194577, "grad_norm": 2.919580928387219, "learning_rate": 1.2558237059163805e-05, "loss": 0.5206, "step": 9245 }, { "epoch": 1.3055634001694436, "grad_norm": 3.6481958167009703, "learning_rate": 1.2556763305019353e-05, "loss": 0.6156, "step": 9246 }, { "epoch": 1.3057046032194295, "grad_norm": 3.825560218440628, "learning_rate": 1.2555289491456822e-05, "loss": 0.6533, "step": 9247 }, { "epoch": 1.3058458062694154, "grad_norm": 3.415095554042437, "learning_rate": 1.255381561851047e-05, "loss": 0.5388, "step": 9248 }, { "epoch": 1.3059870093194013, "grad_norm": 3.323744061224734, "learning_rate": 1.2552341686214544e-05, "loss": 0.5352, "step": 9249 }, { "epoch": 1.3061282123693871, "grad_norm": 3.9175071418402685, "learning_rate": 1.2550867694603302e-05, "loss": 0.6332, "step": 9250 }, { "epoch": 1.306269415419373, "grad_norm": 3.6822575051325055, "learning_rate": 1.2549393643710994e-05, "loss": 0.6342, "step": 9251 }, { "epoch": 1.306410618469359, "grad_norm": 3.87837085093127, "learning_rate": 1.2547919533571879e-05, "loss": 0.552, "step": 9252 }, { "epoch": 1.3065518215193448, "grad_norm": 3.629248648469618, "learning_rate": 1.2546445364220214e-05, "loss": 0.5717, "step": 9253 }, { "epoch": 1.3066930245693307, "grad_norm": 4.312863166428563, "learning_rate": 1.2544971135690263e-05, "loss": 0.7042, "step": 9254 }, { "epoch": 1.3068342276193166, "grad_norm": 4.736792516158595, "learning_rate": 1.2543496848016278e-05, "loss": 0.723, "step": 9255 }, { "epoch": 1.3069754306693024, "grad_norm": 3.3879786809211945, "learning_rate": 1.2542022501232522e-05, "loss": 0.5851, "step": 9256 }, { "epoch": 1.3071166337192883, "grad_norm": 3.1562943805964165, "learning_rate": 1.2540548095373266e-05, "loss": 0.5518, "step": 9257 }, { "epoch": 1.3072578367692742, "grad_norm": 4.253654238411628, "learning_rate": 1.2539073630472768e-05, "loss": 0.7056, "step": 9258 }, { "epoch": 1.30739903981926, "grad_norm": 3.591993407644833, "learning_rate": 1.2537599106565295e-05, "loss": 0.6348, "step": 9259 }, { "epoch": 1.307540242869246, "grad_norm": 4.236035072447704, "learning_rate": 1.2536124523685114e-05, "loss": 0.7128, "step": 9260 }, { "epoch": 1.3076814459192319, "grad_norm": 3.114823931997039, "learning_rate": 1.2534649881866494e-05, "loss": 0.5183, "step": 9261 }, { "epoch": 1.3078226489692177, "grad_norm": 2.817984018560341, "learning_rate": 1.2533175181143704e-05, "loss": 0.3875, "step": 9262 }, { "epoch": 1.3079638520192036, "grad_norm": 3.8205400884775593, "learning_rate": 1.253170042155102e-05, "loss": 0.597, "step": 9263 }, { "epoch": 1.3081050550691895, "grad_norm": 3.1081068623405463, "learning_rate": 1.2530225603122713e-05, "loss": 0.5262, "step": 9264 }, { "epoch": 1.3082462581191754, "grad_norm": 3.3608363563632233, "learning_rate": 1.252875072589305e-05, "loss": 0.678, "step": 9265 }, { "epoch": 1.3083874611691613, "grad_norm": 3.1112980915399957, "learning_rate": 1.2527275789896315e-05, "loss": 0.5677, "step": 9266 }, { "epoch": 1.3085286642191472, "grad_norm": 3.5240580591515442, "learning_rate": 1.2525800795166783e-05, "loss": 0.5247, "step": 9267 }, { "epoch": 1.308669867269133, "grad_norm": 4.298886661569598, "learning_rate": 1.2524325741738732e-05, "loss": 0.6677, "step": 9268 }, { "epoch": 1.308811070319119, "grad_norm": 3.5933587103455475, "learning_rate": 1.2522850629646439e-05, "loss": 0.558, "step": 9269 }, { "epoch": 1.3089522733691048, "grad_norm": 3.7607720814874903, "learning_rate": 1.2521375458924187e-05, "loss": 0.6326, "step": 9270 }, { "epoch": 1.3090934764190907, "grad_norm": 3.2957694209007817, "learning_rate": 1.2519900229606261e-05, "loss": 0.4539, "step": 9271 }, { "epoch": 1.3092346794690766, "grad_norm": 3.774286912841949, "learning_rate": 1.2518424941726939e-05, "loss": 0.504, "step": 9272 }, { "epoch": 1.3093758825190625, "grad_norm": 3.1098028550188657, "learning_rate": 1.251694959532051e-05, "loss": 0.5128, "step": 9273 }, { "epoch": 1.3095170855690483, "grad_norm": 3.0696725368822406, "learning_rate": 1.2515474190421258e-05, "loss": 0.4844, "step": 9274 }, { "epoch": 1.3096582886190342, "grad_norm": 2.988075346302314, "learning_rate": 1.2513998727063475e-05, "loss": 0.5383, "step": 9275 }, { "epoch": 1.30979949166902, "grad_norm": 3.682406744798175, "learning_rate": 1.2512523205281444e-05, "loss": 0.6115, "step": 9276 }, { "epoch": 1.309940694719006, "grad_norm": 3.8131474272886754, "learning_rate": 1.251104762510946e-05, "loss": 0.5554, "step": 9277 }, { "epoch": 1.3100818977689919, "grad_norm": 3.8331445825039947, "learning_rate": 1.2509571986581814e-05, "loss": 0.6753, "step": 9278 }, { "epoch": 1.3102231008189777, "grad_norm": 3.852097333624089, "learning_rate": 1.2508096289732799e-05, "loss": 0.7472, "step": 9279 }, { "epoch": 1.3103643038689636, "grad_norm": 3.5665765023083065, "learning_rate": 1.2506620534596711e-05, "loss": 0.5941, "step": 9280 }, { "epoch": 1.3105055069189495, "grad_norm": 2.9934552756948736, "learning_rate": 1.2505144721207843e-05, "loss": 0.4984, "step": 9281 }, { "epoch": 1.3106467099689354, "grad_norm": 3.512662329711256, "learning_rate": 1.250366884960049e-05, "loss": 0.4728, "step": 9282 }, { "epoch": 1.3107879130189213, "grad_norm": 3.9225558091451442, "learning_rate": 1.2502192919808958e-05, "loss": 0.5928, "step": 9283 }, { "epoch": 1.3109291160689072, "grad_norm": 3.3510030423618358, "learning_rate": 1.2500716931867543e-05, "loss": 0.5689, "step": 9284 }, { "epoch": 1.311070319118893, "grad_norm": 4.2268187826796515, "learning_rate": 1.2499240885810546e-05, "loss": 0.7237, "step": 9285 }, { "epoch": 1.311211522168879, "grad_norm": 3.680876323663695, "learning_rate": 1.249776478167227e-05, "loss": 0.605, "step": 9286 }, { "epoch": 1.3113527252188648, "grad_norm": 4.002694119795089, "learning_rate": 1.249628861948702e-05, "loss": 0.6596, "step": 9287 }, { "epoch": 1.3114939282688507, "grad_norm": 3.1645928399905494, "learning_rate": 1.24948123992891e-05, "loss": 0.5867, "step": 9288 }, { "epoch": 1.3116351313188366, "grad_norm": 3.8299127374595243, "learning_rate": 1.2493336121112818e-05, "loss": 0.7085, "step": 9289 }, { "epoch": 1.3117763343688225, "grad_norm": 3.255832351809516, "learning_rate": 1.2491859784992477e-05, "loss": 0.6707, "step": 9290 }, { "epoch": 1.3119175374188083, "grad_norm": 3.7400629711008397, "learning_rate": 1.2490383390962395e-05, "loss": 0.5892, "step": 9291 }, { "epoch": 1.3120587404687942, "grad_norm": 2.9668270687012286, "learning_rate": 1.248890693905688e-05, "loss": 0.5873, "step": 9292 }, { "epoch": 1.31219994351878, "grad_norm": 3.719476391783897, "learning_rate": 1.2487430429310239e-05, "loss": 0.5367, "step": 9293 }, { "epoch": 1.312341146568766, "grad_norm": 4.012833342172237, "learning_rate": 1.248595386175679e-05, "loss": 0.5228, "step": 9294 }, { "epoch": 1.3124823496187519, "grad_norm": 3.5696478412825616, "learning_rate": 1.2484477236430847e-05, "loss": 0.5924, "step": 9295 }, { "epoch": 1.3126235526687378, "grad_norm": 3.815641626023018, "learning_rate": 1.2483000553366727e-05, "loss": 0.6269, "step": 9296 }, { "epoch": 1.3127647557187236, "grad_norm": 4.166165230315048, "learning_rate": 1.2481523812598746e-05, "loss": 0.8467, "step": 9297 }, { "epoch": 1.3129059587687095, "grad_norm": 3.705751989958468, "learning_rate": 1.2480047014161223e-05, "loss": 0.6672, "step": 9298 }, { "epoch": 1.3130471618186954, "grad_norm": 3.405347242669965, "learning_rate": 1.2478570158088477e-05, "loss": 0.5666, "step": 9299 }, { "epoch": 1.3131883648686813, "grad_norm": 4.0849396138275935, "learning_rate": 1.247709324441483e-05, "loss": 0.7035, "step": 9300 }, { "epoch": 1.3133295679186672, "grad_norm": 3.4379999750701145, "learning_rate": 1.247561627317461e-05, "loss": 0.5881, "step": 9301 }, { "epoch": 1.313470770968653, "grad_norm": 3.6419292726317734, "learning_rate": 1.2474139244402134e-05, "loss": 0.6041, "step": 9302 }, { "epoch": 1.313611974018639, "grad_norm": 3.16572552319187, "learning_rate": 1.2472662158131732e-05, "loss": 0.5277, "step": 9303 }, { "epoch": 1.3137531770686246, "grad_norm": 3.6123126059706836, "learning_rate": 1.2471185014397728e-05, "loss": 0.6163, "step": 9304 }, { "epoch": 1.3138943801186105, "grad_norm": 3.757437168569089, "learning_rate": 1.2469707813234455e-05, "loss": 0.642, "step": 9305 }, { "epoch": 1.3140355831685964, "grad_norm": 3.196902626158747, "learning_rate": 1.2468230554676235e-05, "loss": 0.4455, "step": 9306 }, { "epoch": 1.3141767862185822, "grad_norm": 3.557264732379261, "learning_rate": 1.2466753238757406e-05, "loss": 0.5001, "step": 9307 }, { "epoch": 1.3143179892685681, "grad_norm": 4.336686034052295, "learning_rate": 1.2465275865512291e-05, "loss": 0.5803, "step": 9308 }, { "epoch": 1.314459192318554, "grad_norm": 3.302825718200357, "learning_rate": 1.2463798434975239e-05, "loss": 0.5924, "step": 9309 }, { "epoch": 1.31460039536854, "grad_norm": 3.6257427112141762, "learning_rate": 1.2462320947180565e-05, "loss": 0.5996, "step": 9310 }, { "epoch": 1.3147415984185258, "grad_norm": 4.410392126008087, "learning_rate": 1.2460843402162624e-05, "loss": 0.7127, "step": 9311 }, { "epoch": 1.3148828014685117, "grad_norm": 3.2330243897979747, "learning_rate": 1.2459365799955741e-05, "loss": 0.6044, "step": 9312 }, { "epoch": 1.3150240045184975, "grad_norm": 3.9013937369360043, "learning_rate": 1.245788814059426e-05, "loss": 0.6431, "step": 9313 }, { "epoch": 1.3151652075684834, "grad_norm": 3.529792051402255, "learning_rate": 1.245641042411252e-05, "loss": 0.5846, "step": 9314 }, { "epoch": 1.3153064106184693, "grad_norm": 3.410021676331754, "learning_rate": 1.2454932650544862e-05, "loss": 0.52, "step": 9315 }, { "epoch": 1.3154476136684552, "grad_norm": 3.2379182049059816, "learning_rate": 1.2453454819925627e-05, "loss": 0.5478, "step": 9316 }, { "epoch": 1.315588816718441, "grad_norm": 3.509887683378275, "learning_rate": 1.2451976932289168e-05, "loss": 0.4775, "step": 9317 }, { "epoch": 1.315730019768427, "grad_norm": 3.6648330664363553, "learning_rate": 1.245049898766982e-05, "loss": 0.4969, "step": 9318 }, { "epoch": 1.3158712228184128, "grad_norm": 3.612899750807376, "learning_rate": 1.2449020986101934e-05, "loss": 0.6447, "step": 9319 }, { "epoch": 1.3160124258683987, "grad_norm": 5.165230371738194, "learning_rate": 1.2447542927619857e-05, "loss": 0.8195, "step": 9320 }, { "epoch": 1.3161536289183846, "grad_norm": 4.036267354645072, "learning_rate": 1.2446064812257941e-05, "loss": 0.5093, "step": 9321 }, { "epoch": 1.3162948319683705, "grad_norm": 2.7688796737880823, "learning_rate": 1.2444586640050536e-05, "loss": 0.4783, "step": 9322 }, { "epoch": 1.3164360350183564, "grad_norm": 3.896023635680067, "learning_rate": 1.2443108411031992e-05, "loss": 0.6337, "step": 9323 }, { "epoch": 1.3165772380683423, "grad_norm": 3.853454582969259, "learning_rate": 1.2441630125236664e-05, "loss": 0.6244, "step": 9324 }, { "epoch": 1.3167184411183281, "grad_norm": 3.337199941590131, "learning_rate": 1.2440151782698904e-05, "loss": 0.5576, "step": 9325 }, { "epoch": 1.316859644168314, "grad_norm": 3.3421757059944754, "learning_rate": 1.2438673383453073e-05, "loss": 0.5519, "step": 9326 }, { "epoch": 1.3170008472183, "grad_norm": 3.2967566547495375, "learning_rate": 1.2437194927533524e-05, "loss": 0.619, "step": 9327 }, { "epoch": 1.3171420502682858, "grad_norm": 3.6234533068333548, "learning_rate": 1.243571641497462e-05, "loss": 0.7383, "step": 9328 }, { "epoch": 1.3172832533182717, "grad_norm": 3.775982829211028, "learning_rate": 1.2434237845810714e-05, "loss": 0.5902, "step": 9329 }, { "epoch": 1.3174244563682576, "grad_norm": 3.699851892501107, "learning_rate": 1.2432759220076177e-05, "loss": 0.6836, "step": 9330 }, { "epoch": 1.3175656594182434, "grad_norm": 4.226177458401757, "learning_rate": 1.2431280537805363e-05, "loss": 0.6984, "step": 9331 }, { "epoch": 1.3177068624682293, "grad_norm": 5.503905857685413, "learning_rate": 1.242980179903264e-05, "loss": 0.6524, "step": 9332 }, { "epoch": 1.3178480655182152, "grad_norm": 4.182109172160288, "learning_rate": 1.242832300379237e-05, "loss": 0.6601, "step": 9333 }, { "epoch": 1.317989268568201, "grad_norm": 3.4885976871269135, "learning_rate": 1.2426844152118926e-05, "loss": 0.7145, "step": 9334 }, { "epoch": 1.318130471618187, "grad_norm": 3.4704760759425204, "learning_rate": 1.2425365244046674e-05, "loss": 0.6912, "step": 9335 }, { "epoch": 1.3182716746681729, "grad_norm": 3.52727431863363, "learning_rate": 1.2423886279609975e-05, "loss": 0.6495, "step": 9336 }, { "epoch": 1.3184128777181587, "grad_norm": 3.8004598991392124, "learning_rate": 1.242240725884321e-05, "loss": 0.6967, "step": 9337 }, { "epoch": 1.3185540807681446, "grad_norm": 3.4945221242998454, "learning_rate": 1.2420928181780745e-05, "loss": 0.6797, "step": 9338 }, { "epoch": 1.3186952838181305, "grad_norm": 4.048181658191856, "learning_rate": 1.2419449048456955e-05, "loss": 0.6253, "step": 9339 }, { "epoch": 1.3188364868681164, "grad_norm": 3.2396788521223834, "learning_rate": 1.2417969858906214e-05, "loss": 0.5436, "step": 9340 }, { "epoch": 1.3189776899181023, "grad_norm": 3.2680796320000067, "learning_rate": 1.2416490613162896e-05, "loss": 0.5327, "step": 9341 }, { "epoch": 1.3191188929680882, "grad_norm": 3.715863324615412, "learning_rate": 1.241501131126138e-05, "loss": 0.5818, "step": 9342 }, { "epoch": 1.319260096018074, "grad_norm": 3.33465078891875, "learning_rate": 1.241353195323605e-05, "loss": 0.55, "step": 9343 }, { "epoch": 1.31940129906806, "grad_norm": 3.3057332986866985, "learning_rate": 1.2412052539121273e-05, "loss": 0.5576, "step": 9344 }, { "epoch": 1.3195425021180458, "grad_norm": 3.520822933687587, "learning_rate": 1.2410573068951437e-05, "loss": 0.5273, "step": 9345 }, { "epoch": 1.3196837051680317, "grad_norm": 3.5341433291135864, "learning_rate": 1.2409093542760925e-05, "loss": 0.5676, "step": 9346 }, { "epoch": 1.3198249082180176, "grad_norm": 3.535026490042776, "learning_rate": 1.2407613960584121e-05, "loss": 0.593, "step": 9347 }, { "epoch": 1.3199661112680035, "grad_norm": 5.254598342605096, "learning_rate": 1.240613432245541e-05, "loss": 0.4673, "step": 9348 }, { "epoch": 1.3201073143179893, "grad_norm": 3.6432390362174587, "learning_rate": 1.2404654628409172e-05, "loss": 0.6045, "step": 9349 }, { "epoch": 1.3202485173679752, "grad_norm": 3.640047014244559, "learning_rate": 1.2403174878479802e-05, "loss": 0.6163, "step": 9350 }, { "epoch": 1.320389720417961, "grad_norm": 3.816127366732024, "learning_rate": 1.2401695072701683e-05, "loss": 0.632, "step": 9351 }, { "epoch": 1.320530923467947, "grad_norm": 3.708569188697407, "learning_rate": 1.240021521110921e-05, "loss": 0.7233, "step": 9352 }, { "epoch": 1.3206721265179329, "grad_norm": 4.148344250944895, "learning_rate": 1.239873529373677e-05, "loss": 0.7323, "step": 9353 }, { "epoch": 1.3208133295679187, "grad_norm": 3.220730686155303, "learning_rate": 1.239725532061876e-05, "loss": 0.5624, "step": 9354 }, { "epoch": 1.3209545326179044, "grad_norm": 3.5095879124962464, "learning_rate": 1.239577529178957e-05, "loss": 0.6158, "step": 9355 }, { "epoch": 1.3210957356678903, "grad_norm": 4.038595904247723, "learning_rate": 1.2394295207283598e-05, "loss": 0.6899, "step": 9356 }, { "epoch": 1.3212369387178762, "grad_norm": 4.568254446886248, "learning_rate": 1.239281506713524e-05, "loss": 0.7219, "step": 9357 }, { "epoch": 1.321378141767862, "grad_norm": 3.5765069412575334, "learning_rate": 1.239133487137889e-05, "loss": 0.548, "step": 9358 }, { "epoch": 1.321519344817848, "grad_norm": 4.517824425508421, "learning_rate": 1.2389854620048952e-05, "loss": 0.7614, "step": 9359 }, { "epoch": 1.3216605478678338, "grad_norm": 4.046679223340215, "learning_rate": 1.2388374313179828e-05, "loss": 0.6196, "step": 9360 }, { "epoch": 1.3218017509178197, "grad_norm": 3.7118062157675604, "learning_rate": 1.2386893950805914e-05, "loss": 0.6179, "step": 9361 }, { "epoch": 1.3219429539678056, "grad_norm": 3.70906559079968, "learning_rate": 1.2385413532961612e-05, "loss": 0.6028, "step": 9362 }, { "epoch": 1.3220841570177915, "grad_norm": 3.6649247131006786, "learning_rate": 1.2383933059681332e-05, "loss": 0.6613, "step": 9363 }, { "epoch": 1.3222253600677774, "grad_norm": 4.025558316977827, "learning_rate": 1.2382452530999479e-05, "loss": 0.5858, "step": 9364 }, { "epoch": 1.3223665631177632, "grad_norm": 3.43461804909093, "learning_rate": 1.2380971946950458e-05, "loss": 0.5644, "step": 9365 }, { "epoch": 1.3225077661677491, "grad_norm": 3.6261507436426985, "learning_rate": 1.2379491307568676e-05, "loss": 0.7124, "step": 9366 }, { "epoch": 1.322648969217735, "grad_norm": 3.6563503868976412, "learning_rate": 1.2378010612888544e-05, "loss": 0.6896, "step": 9367 }, { "epoch": 1.3227901722677209, "grad_norm": 3.631086581687956, "learning_rate": 1.2376529862944472e-05, "loss": 0.5961, "step": 9368 }, { "epoch": 1.3229313753177068, "grad_norm": 3.418877946589534, "learning_rate": 1.2375049057770874e-05, "loss": 0.5615, "step": 9369 }, { "epoch": 1.3230725783676927, "grad_norm": 3.258397362848523, "learning_rate": 1.2373568197402157e-05, "loss": 0.557, "step": 9370 }, { "epoch": 1.3232137814176785, "grad_norm": 3.789938580363218, "learning_rate": 1.2372087281872745e-05, "loss": 0.6114, "step": 9371 }, { "epoch": 1.3233549844676644, "grad_norm": 3.2990273647341706, "learning_rate": 1.2370606311217047e-05, "loss": 0.5651, "step": 9372 }, { "epoch": 1.3234961875176503, "grad_norm": 3.120162170597407, "learning_rate": 1.2369125285469482e-05, "loss": 0.5675, "step": 9373 }, { "epoch": 1.3236373905676362, "grad_norm": 3.459996318656619, "learning_rate": 1.2367644204664468e-05, "loss": 0.6314, "step": 9374 }, { "epoch": 1.323778593617622, "grad_norm": 3.6902647229533283, "learning_rate": 1.2366163068836427e-05, "loss": 0.5108, "step": 9375 }, { "epoch": 1.323919796667608, "grad_norm": 3.3778885651381723, "learning_rate": 1.2364681878019776e-05, "loss": 0.5746, "step": 9376 }, { "epoch": 1.3240609997175938, "grad_norm": 3.242667512537071, "learning_rate": 1.2363200632248942e-05, "loss": 0.6375, "step": 9377 }, { "epoch": 1.3242022027675797, "grad_norm": 3.377010724744004, "learning_rate": 1.2361719331558346e-05, "loss": 0.594, "step": 9378 }, { "epoch": 1.3243434058175656, "grad_norm": 3.574507917565269, "learning_rate": 1.2360237975982408e-05, "loss": 0.5372, "step": 9379 }, { "epoch": 1.3244846088675515, "grad_norm": 3.963388790323442, "learning_rate": 1.2358756565555563e-05, "loss": 0.7209, "step": 9380 }, { "epoch": 1.3246258119175374, "grad_norm": 3.39928205935657, "learning_rate": 1.2357275100312234e-05, "loss": 0.5888, "step": 9381 }, { "epoch": 1.3247670149675232, "grad_norm": 4.166551988019832, "learning_rate": 1.2355793580286848e-05, "loss": 0.7082, "step": 9382 }, { "epoch": 1.3249082180175091, "grad_norm": 4.374973444731073, "learning_rate": 1.2354312005513838e-05, "loss": 0.6568, "step": 9383 }, { "epoch": 1.325049421067495, "grad_norm": 3.218107840652149, "learning_rate": 1.2352830376027634e-05, "loss": 0.5173, "step": 9384 }, { "epoch": 1.325190624117481, "grad_norm": 3.27036034240229, "learning_rate": 1.2351348691862665e-05, "loss": 0.517, "step": 9385 }, { "epoch": 1.3253318271674668, "grad_norm": 3.14250587262911, "learning_rate": 1.2349866953053372e-05, "loss": 0.4996, "step": 9386 }, { "epoch": 1.3254730302174527, "grad_norm": 3.6981472072319552, "learning_rate": 1.2348385159634181e-05, "loss": 0.5938, "step": 9387 }, { "epoch": 1.3256142332674385, "grad_norm": 3.1320548093918226, "learning_rate": 1.2346903311639537e-05, "loss": 0.4986, "step": 9388 }, { "epoch": 1.3257554363174244, "grad_norm": 3.666485313871525, "learning_rate": 1.2345421409103872e-05, "loss": 0.6317, "step": 9389 }, { "epoch": 1.3258966393674103, "grad_norm": 3.2467727923996463, "learning_rate": 1.2343939452061628e-05, "loss": 0.4593, "step": 9390 }, { "epoch": 1.3260378424173962, "grad_norm": 3.4692872007049953, "learning_rate": 1.2342457440547241e-05, "loss": 0.5348, "step": 9391 }, { "epoch": 1.326179045467382, "grad_norm": 3.7259483955097905, "learning_rate": 1.2340975374595157e-05, "loss": 0.6006, "step": 9392 }, { "epoch": 1.326320248517368, "grad_norm": 3.5740612258052273, "learning_rate": 1.2339493254239814e-05, "loss": 0.574, "step": 9393 }, { "epoch": 1.3264614515673538, "grad_norm": 2.9524646374962082, "learning_rate": 1.2338011079515661e-05, "loss": 0.4963, "step": 9394 }, { "epoch": 1.3266026546173397, "grad_norm": 3.76750563848066, "learning_rate": 1.2336528850457138e-05, "loss": 0.6411, "step": 9395 }, { "epoch": 1.3267438576673256, "grad_norm": 2.9736231327102804, "learning_rate": 1.2335046567098694e-05, "loss": 0.5136, "step": 9396 }, { "epoch": 1.3268850607173115, "grad_norm": 3.668291127339204, "learning_rate": 1.2333564229474778e-05, "loss": 0.6207, "step": 9397 }, { "epoch": 1.3270262637672974, "grad_norm": 3.75462918508023, "learning_rate": 1.2332081837619836e-05, "loss": 0.5616, "step": 9398 }, { "epoch": 1.3271674668172833, "grad_norm": 3.393014992745285, "learning_rate": 1.233059939156832e-05, "loss": 0.5711, "step": 9399 }, { "epoch": 1.3273086698672691, "grad_norm": 3.6450057366952993, "learning_rate": 1.2329116891354677e-05, "loss": 0.5315, "step": 9400 }, { "epoch": 1.327449872917255, "grad_norm": 3.2572139309360395, "learning_rate": 1.2327634337013366e-05, "loss": 0.5645, "step": 9401 }, { "epoch": 1.327591075967241, "grad_norm": 3.1348991158688206, "learning_rate": 1.2326151728578839e-05, "loss": 0.5049, "step": 9402 }, { "epoch": 1.3277322790172268, "grad_norm": 3.418606562274722, "learning_rate": 1.2324669066085549e-05, "loss": 0.5024, "step": 9403 }, { "epoch": 1.3278734820672127, "grad_norm": 3.256608069213126, "learning_rate": 1.2323186349567955e-05, "loss": 0.6353, "step": 9404 }, { "epoch": 1.3280146851171986, "grad_norm": 3.913407544923666, "learning_rate": 1.232170357906051e-05, "loss": 0.6601, "step": 9405 }, { "epoch": 1.3281558881671844, "grad_norm": 3.024532204786768, "learning_rate": 1.232022075459768e-05, "loss": 0.5178, "step": 9406 }, { "epoch": 1.3282970912171703, "grad_norm": 3.6906275823672074, "learning_rate": 1.2318737876213922e-05, "loss": 0.6135, "step": 9407 }, { "epoch": 1.3284382942671562, "grad_norm": 3.8646666897048476, "learning_rate": 1.2317254943943695e-05, "loss": 0.7091, "step": 9408 }, { "epoch": 1.328579497317142, "grad_norm": 6.992988266133434, "learning_rate": 1.2315771957821466e-05, "loss": 0.7157, "step": 9409 }, { "epoch": 1.328720700367128, "grad_norm": 3.4661409033845025, "learning_rate": 1.2314288917881696e-05, "loss": 0.5798, "step": 9410 }, { "epoch": 1.3288619034171139, "grad_norm": 4.215371370410614, "learning_rate": 1.2312805824158852e-05, "loss": 0.6174, "step": 9411 }, { "epoch": 1.3290031064670997, "grad_norm": 3.1690829513304655, "learning_rate": 1.2311322676687398e-05, "loss": 0.6081, "step": 9412 }, { "epoch": 1.3291443095170856, "grad_norm": 3.4157954008135674, "learning_rate": 1.2309839475501801e-05, "loss": 0.6279, "step": 9413 }, { "epoch": 1.3292855125670715, "grad_norm": 3.1755921780632526, "learning_rate": 1.2308356220636535e-05, "loss": 0.5089, "step": 9414 }, { "epoch": 1.3294267156170574, "grad_norm": 3.4491796843346245, "learning_rate": 1.2306872912126068e-05, "loss": 0.5568, "step": 9415 }, { "epoch": 1.3295679186670433, "grad_norm": 3.612064152234278, "learning_rate": 1.230538955000487e-05, "loss": 0.5153, "step": 9416 }, { "epoch": 1.3297091217170292, "grad_norm": 3.6379477506253455, "learning_rate": 1.2303906134307413e-05, "loss": 0.5774, "step": 9417 }, { "epoch": 1.329850324767015, "grad_norm": 4.182307417877903, "learning_rate": 1.2302422665068174e-05, "loss": 0.6527, "step": 9418 }, { "epoch": 1.329991527817001, "grad_norm": 3.468247971401187, "learning_rate": 1.2300939142321626e-05, "loss": 0.6052, "step": 9419 }, { "epoch": 1.3301327308669868, "grad_norm": 3.3609426284591595, "learning_rate": 1.2299455566102248e-05, "loss": 0.5844, "step": 9420 }, { "epoch": 1.3302739339169727, "grad_norm": 5.227694973434564, "learning_rate": 1.2297971936444512e-05, "loss": 0.7641, "step": 9421 }, { "epoch": 1.3304151369669586, "grad_norm": 4.062006907220968, "learning_rate": 1.2296488253382902e-05, "loss": 0.6743, "step": 9422 }, { "epoch": 1.3305563400169444, "grad_norm": 4.446921186139767, "learning_rate": 1.2295004516951898e-05, "loss": 0.6239, "step": 9423 }, { "epoch": 1.3306975430669303, "grad_norm": 3.2336327037120913, "learning_rate": 1.229352072718598e-05, "loss": 0.6276, "step": 9424 }, { "epoch": 1.3308387461169162, "grad_norm": 3.9540902965003504, "learning_rate": 1.229203688411963e-05, "loss": 0.6171, "step": 9425 }, { "epoch": 1.330979949166902, "grad_norm": 3.4775552978711812, "learning_rate": 1.2290552987787332e-05, "loss": 0.4476, "step": 9426 }, { "epoch": 1.331121152216888, "grad_norm": 3.619380493059914, "learning_rate": 1.2289069038223574e-05, "loss": 0.5797, "step": 9427 }, { "epoch": 1.3312623552668739, "grad_norm": 3.8797930015628768, "learning_rate": 1.2287585035462838e-05, "loss": 0.5436, "step": 9428 }, { "epoch": 1.3314035583168597, "grad_norm": 4.134773340550053, "learning_rate": 1.2286100979539616e-05, "loss": 0.7309, "step": 9429 }, { "epoch": 1.3315447613668456, "grad_norm": 3.5952435249030015, "learning_rate": 1.228461687048839e-05, "loss": 0.5614, "step": 9430 }, { "epoch": 1.3316859644168315, "grad_norm": 3.6342837997832835, "learning_rate": 1.2283132708343659e-05, "loss": 0.5929, "step": 9431 }, { "epoch": 1.3318271674668174, "grad_norm": 3.08637650514622, "learning_rate": 1.2281648493139911e-05, "loss": 0.4914, "step": 9432 }, { "epoch": 1.3319683705168033, "grad_norm": 4.616875226200589, "learning_rate": 1.2280164224911633e-05, "loss": 0.775, "step": 9433 }, { "epoch": 1.3321095735667892, "grad_norm": 2.577744620894552, "learning_rate": 1.2278679903693325e-05, "loss": 0.4532, "step": 9434 }, { "epoch": 1.332250776616775, "grad_norm": 3.3350150444291673, "learning_rate": 1.227719552951948e-05, "loss": 0.5754, "step": 9435 }, { "epoch": 1.332391979666761, "grad_norm": 3.636931871227736, "learning_rate": 1.2275711102424595e-05, "loss": 0.6325, "step": 9436 }, { "epoch": 1.3325331827167468, "grad_norm": 3.419171994897924, "learning_rate": 1.2274226622443165e-05, "loss": 0.5612, "step": 9437 }, { "epoch": 1.3326743857667327, "grad_norm": 3.6348746938901484, "learning_rate": 1.2272742089609694e-05, "loss": 0.5717, "step": 9438 }, { "epoch": 1.3328155888167186, "grad_norm": 3.8057851360772412, "learning_rate": 1.2271257503958674e-05, "loss": 0.6302, "step": 9439 }, { "epoch": 1.3329567918667042, "grad_norm": 4.31800193565284, "learning_rate": 1.2269772865524612e-05, "loss": 0.7276, "step": 9440 }, { "epoch": 1.3330979949166901, "grad_norm": 4.21400117312378, "learning_rate": 1.2268288174342013e-05, "loss": 0.7091, "step": 9441 }, { "epoch": 1.333239197966676, "grad_norm": 3.059093786819134, "learning_rate": 1.2266803430445372e-05, "loss": 0.4818, "step": 9442 }, { "epoch": 1.3333804010166619, "grad_norm": 3.312637304449722, "learning_rate": 1.2265318633869198e-05, "loss": 0.6169, "step": 9443 }, { "epoch": 1.3335216040666478, "grad_norm": 3.523818624103979, "learning_rate": 1.2263833784647998e-05, "loss": 0.578, "step": 9444 }, { "epoch": 1.3336628071166337, "grad_norm": 4.14118523181956, "learning_rate": 1.226234888281628e-05, "loss": 0.7882, "step": 9445 }, { "epoch": 1.3338040101666195, "grad_norm": 3.8338976859005416, "learning_rate": 1.2260863928408551e-05, "loss": 0.6222, "step": 9446 }, { "epoch": 1.3339452132166054, "grad_norm": 3.105659687071912, "learning_rate": 1.225937892145932e-05, "loss": 0.5277, "step": 9447 }, { "epoch": 1.3340864162665913, "grad_norm": 3.8826484778187114, "learning_rate": 1.2257893862003093e-05, "loss": 0.6512, "step": 9448 }, { "epoch": 1.3342276193165772, "grad_norm": 3.216689869156623, "learning_rate": 1.2256408750074397e-05, "loss": 0.5126, "step": 9449 }, { "epoch": 1.334368822366563, "grad_norm": 4.388395175411938, "learning_rate": 1.225492358570773e-05, "loss": 0.7349, "step": 9450 }, { "epoch": 1.334510025416549, "grad_norm": 5.098037335371972, "learning_rate": 1.2253438368937615e-05, "loss": 0.9015, "step": 9451 }, { "epoch": 1.3346512284665348, "grad_norm": 3.081887700340631, "learning_rate": 1.2251953099798566e-05, "loss": 0.5251, "step": 9452 }, { "epoch": 1.3347924315165207, "grad_norm": 3.6573336918144856, "learning_rate": 1.22504677783251e-05, "loss": 0.6051, "step": 9453 }, { "epoch": 1.3349336345665066, "grad_norm": 4.0183552445341055, "learning_rate": 1.2248982404551733e-05, "loss": 0.6718, "step": 9454 }, { "epoch": 1.3350748376164925, "grad_norm": 4.247716634027352, "learning_rate": 1.2247496978512988e-05, "loss": 0.6922, "step": 9455 }, { "epoch": 1.3352160406664784, "grad_norm": 3.0253917359778604, "learning_rate": 1.224601150024338e-05, "loss": 0.487, "step": 9456 }, { "epoch": 1.3353572437164642, "grad_norm": 4.202526400211722, "learning_rate": 1.2244525969777438e-05, "loss": 0.6371, "step": 9457 }, { "epoch": 1.3354984467664501, "grad_norm": 3.849070660148693, "learning_rate": 1.2243040387149682e-05, "loss": 0.6035, "step": 9458 }, { "epoch": 1.335639649816436, "grad_norm": 3.790411585502975, "learning_rate": 1.2241554752394633e-05, "loss": 0.6354, "step": 9459 }, { "epoch": 1.335780852866422, "grad_norm": 3.3816453410918865, "learning_rate": 1.2240069065546823e-05, "loss": 0.5956, "step": 9460 }, { "epoch": 1.3359220559164078, "grad_norm": 3.8186172088018333, "learning_rate": 1.2238583326640774e-05, "loss": 0.6188, "step": 9461 }, { "epoch": 1.3360632589663937, "grad_norm": 3.889644140583435, "learning_rate": 1.2237097535711016e-05, "loss": 0.6197, "step": 9462 }, { "epoch": 1.3362044620163795, "grad_norm": 4.091606484889772, "learning_rate": 1.2235611692792078e-05, "loss": 0.6102, "step": 9463 }, { "epoch": 1.3363456650663654, "grad_norm": 4.009843468951958, "learning_rate": 1.2234125797918488e-05, "loss": 0.6242, "step": 9464 }, { "epoch": 1.3364868681163513, "grad_norm": 3.981661103480363, "learning_rate": 1.2232639851124778e-05, "loss": 0.6287, "step": 9465 }, { "epoch": 1.3366280711663372, "grad_norm": 4.113754729262994, "learning_rate": 1.2231153852445485e-05, "loss": 0.7285, "step": 9466 }, { "epoch": 1.336769274216323, "grad_norm": 3.030985693472055, "learning_rate": 1.2229667801915136e-05, "loss": 0.535, "step": 9467 }, { "epoch": 1.336910477266309, "grad_norm": 3.433861568259088, "learning_rate": 1.2228181699568276e-05, "loss": 0.5797, "step": 9468 }, { "epoch": 1.3370516803162948, "grad_norm": 3.9918220370960906, "learning_rate": 1.222669554543943e-05, "loss": 0.6775, "step": 9469 }, { "epoch": 1.3371928833662807, "grad_norm": 2.88206278251222, "learning_rate": 1.2225209339563144e-05, "loss": 0.4869, "step": 9470 }, { "epoch": 1.3373340864162666, "grad_norm": 3.212716043331327, "learning_rate": 1.2223723081973955e-05, "loss": 0.527, "step": 9471 }, { "epoch": 1.3374752894662525, "grad_norm": 3.429279549762187, "learning_rate": 1.2222236772706402e-05, "loss": 0.5842, "step": 9472 }, { "epoch": 1.3376164925162384, "grad_norm": 4.166665004383837, "learning_rate": 1.2220750411795021e-05, "loss": 0.7869, "step": 9473 }, { "epoch": 1.3377576955662243, "grad_norm": 3.538006512037677, "learning_rate": 1.2219263999274367e-05, "loss": 0.5724, "step": 9474 }, { "epoch": 1.3378988986162101, "grad_norm": 3.643270041208831, "learning_rate": 1.2217777535178973e-05, "loss": 0.6163, "step": 9475 }, { "epoch": 1.338040101666196, "grad_norm": 3.793927551252516, "learning_rate": 1.2216291019543385e-05, "loss": 0.6049, "step": 9476 }, { "epoch": 1.338181304716182, "grad_norm": 3.2237758161888936, "learning_rate": 1.2214804452402152e-05, "loss": 0.5407, "step": 9477 }, { "epoch": 1.3383225077661678, "grad_norm": 3.735651225161646, "learning_rate": 1.221331783378982e-05, "loss": 0.6448, "step": 9478 }, { "epoch": 1.3384637108161537, "grad_norm": 3.7437083403399987, "learning_rate": 1.2211831163740937e-05, "loss": 0.565, "step": 9479 }, { "epoch": 1.3386049138661396, "grad_norm": 3.312341068009971, "learning_rate": 1.2210344442290054e-05, "loss": 0.5292, "step": 9480 }, { "epoch": 1.3387461169161254, "grad_norm": 3.382365957889865, "learning_rate": 1.2208857669471721e-05, "loss": 0.6308, "step": 9481 }, { "epoch": 1.3388873199661113, "grad_norm": 3.4506389702983573, "learning_rate": 1.2207370845320488e-05, "loss": 0.5973, "step": 9482 }, { "epoch": 1.3390285230160972, "grad_norm": 3.5821221560061205, "learning_rate": 1.220588396987091e-05, "loss": 0.5703, "step": 9483 }, { "epoch": 1.339169726066083, "grad_norm": 3.1485399693975804, "learning_rate": 1.2204397043157541e-05, "loss": 0.4886, "step": 9484 }, { "epoch": 1.339310929116069, "grad_norm": 3.4094009214543055, "learning_rate": 1.2202910065214939e-05, "loss": 0.6504, "step": 9485 }, { "epoch": 1.3394521321660549, "grad_norm": 3.6375790610975356, "learning_rate": 1.2201423036077657e-05, "loss": 0.6149, "step": 9486 }, { "epoch": 1.3395933352160407, "grad_norm": 3.8065820374636385, "learning_rate": 1.2199935955780255e-05, "loss": 0.6032, "step": 9487 }, { "epoch": 1.3397345382660266, "grad_norm": 3.7384230161145853, "learning_rate": 1.2198448824357292e-05, "loss": 0.5512, "step": 9488 }, { "epoch": 1.3398757413160125, "grad_norm": 3.556937550206897, "learning_rate": 1.2196961641843326e-05, "loss": 0.658, "step": 9489 }, { "epoch": 1.3400169443659984, "grad_norm": 3.5449440963912413, "learning_rate": 1.2195474408272919e-05, "loss": 0.6149, "step": 9490 }, { "epoch": 1.340158147415984, "grad_norm": 3.6591956402124493, "learning_rate": 1.2193987123680639e-05, "loss": 0.5405, "step": 9491 }, { "epoch": 1.34029935046597, "grad_norm": 3.856805363911538, "learning_rate": 1.2192499788101044e-05, "loss": 0.6688, "step": 9492 }, { "epoch": 1.3404405535159558, "grad_norm": 3.6363421723079288, "learning_rate": 1.2191012401568698e-05, "loss": 0.6355, "step": 9493 }, { "epoch": 1.3405817565659417, "grad_norm": 3.4936021069998713, "learning_rate": 1.2189524964118174e-05, "loss": 0.6023, "step": 9494 }, { "epoch": 1.3407229596159276, "grad_norm": 3.4240243114236635, "learning_rate": 1.2188037475784033e-05, "loss": 0.5855, "step": 9495 }, { "epoch": 1.3408641626659135, "grad_norm": 3.688040769666093, "learning_rate": 1.2186549936600847e-05, "loss": 0.663, "step": 9496 }, { "epoch": 1.3410053657158993, "grad_norm": 3.642948190663818, "learning_rate": 1.2185062346603184e-05, "loss": 0.6207, "step": 9497 }, { "epoch": 1.3411465687658852, "grad_norm": 2.824834396200575, "learning_rate": 1.2183574705825618e-05, "loss": 0.4484, "step": 9498 }, { "epoch": 1.341287771815871, "grad_norm": 3.7750083967331434, "learning_rate": 1.2182087014302715e-05, "loss": 0.7176, "step": 9499 }, { "epoch": 1.341428974865857, "grad_norm": 2.736824344403802, "learning_rate": 1.2180599272069058e-05, "loss": 0.5183, "step": 9500 }, { "epoch": 1.3415701779158429, "grad_norm": 3.847069793910702, "learning_rate": 1.2179111479159212e-05, "loss": 0.662, "step": 9501 }, { "epoch": 1.3417113809658288, "grad_norm": 3.1602452728343446, "learning_rate": 1.2177623635607753e-05, "loss": 0.587, "step": 9502 }, { "epoch": 1.3418525840158146, "grad_norm": 3.0734893957851837, "learning_rate": 1.2176135741449265e-05, "loss": 0.513, "step": 9503 }, { "epoch": 1.3419937870658005, "grad_norm": 4.043516469574682, "learning_rate": 1.2174647796718322e-05, "loss": 0.6133, "step": 9504 }, { "epoch": 1.3421349901157864, "grad_norm": 3.5405752870275475, "learning_rate": 1.2173159801449503e-05, "loss": 0.5529, "step": 9505 }, { "epoch": 1.3422761931657723, "grad_norm": 4.056864171822321, "learning_rate": 1.217167175567739e-05, "loss": 0.7302, "step": 9506 }, { "epoch": 1.3424173962157582, "grad_norm": 3.591377802408437, "learning_rate": 1.217018365943656e-05, "loss": 0.5727, "step": 9507 }, { "epoch": 1.342558599265744, "grad_norm": 3.2951522500026615, "learning_rate": 1.2168695512761604e-05, "loss": 0.6631, "step": 9508 }, { "epoch": 1.34269980231573, "grad_norm": 3.21489168530271, "learning_rate": 1.2167207315687098e-05, "loss": 0.581, "step": 9509 }, { "epoch": 1.3428410053657158, "grad_norm": 3.6168848446528057, "learning_rate": 1.2165719068247626e-05, "loss": 0.5349, "step": 9510 }, { "epoch": 1.3429822084157017, "grad_norm": 2.8616998666174656, "learning_rate": 1.2164230770477782e-05, "loss": 0.5222, "step": 9511 }, { "epoch": 1.3431234114656876, "grad_norm": 3.824965610396753, "learning_rate": 1.216274242241215e-05, "loss": 0.755, "step": 9512 }, { "epoch": 1.3432646145156735, "grad_norm": 4.215403999937383, "learning_rate": 1.2161254024085318e-05, "loss": 0.7723, "step": 9513 }, { "epoch": 1.3434058175656594, "grad_norm": 3.621464128164179, "learning_rate": 1.2159765575531877e-05, "loss": 0.6364, "step": 9514 }, { "epoch": 1.3435470206156452, "grad_norm": 3.5188310359724935, "learning_rate": 1.2158277076786415e-05, "loss": 0.5834, "step": 9515 }, { "epoch": 1.3436882236656311, "grad_norm": 3.793689224665667, "learning_rate": 1.2156788527883524e-05, "loss": 0.6081, "step": 9516 }, { "epoch": 1.343829426715617, "grad_norm": 3.1310544953142605, "learning_rate": 1.2155299928857803e-05, "loss": 0.4742, "step": 9517 }, { "epoch": 1.3439706297656029, "grad_norm": 3.2075901555208857, "learning_rate": 1.2153811279743841e-05, "loss": 0.4664, "step": 9518 }, { "epoch": 1.3441118328155888, "grad_norm": 3.70370555178042, "learning_rate": 1.2152322580576232e-05, "loss": 0.5163, "step": 9519 }, { "epoch": 1.3442530358655747, "grad_norm": 3.250533588515463, "learning_rate": 1.215083383138958e-05, "loss": 0.5344, "step": 9520 }, { "epoch": 1.3443942389155605, "grad_norm": 3.3894286347363045, "learning_rate": 1.2149345032218476e-05, "loss": 0.5855, "step": 9521 }, { "epoch": 1.3445354419655464, "grad_norm": 3.6396430476646104, "learning_rate": 1.2147856183097524e-05, "loss": 0.5681, "step": 9522 }, { "epoch": 1.3446766450155323, "grad_norm": 3.3830036673809336, "learning_rate": 1.214636728406132e-05, "loss": 0.4724, "step": 9523 }, { "epoch": 1.3448178480655182, "grad_norm": 3.5911779114940634, "learning_rate": 1.2144878335144469e-05, "loss": 0.6269, "step": 9524 }, { "epoch": 1.344959051115504, "grad_norm": 3.773120524563539, "learning_rate": 1.214338933638157e-05, "loss": 0.6615, "step": 9525 }, { "epoch": 1.34510025416549, "grad_norm": 3.9252476929937816, "learning_rate": 1.214190028780723e-05, "loss": 0.5989, "step": 9526 }, { "epoch": 1.3452414572154758, "grad_norm": 4.526512144068702, "learning_rate": 1.2140411189456049e-05, "loss": 0.7814, "step": 9527 }, { "epoch": 1.3453826602654617, "grad_norm": 3.5181255584299254, "learning_rate": 1.213892204136264e-05, "loss": 0.5874, "step": 9528 }, { "epoch": 1.3455238633154476, "grad_norm": 3.5823587304544557, "learning_rate": 1.2137432843561602e-05, "loss": 0.7258, "step": 9529 }, { "epoch": 1.3456650663654335, "grad_norm": 3.173298931162847, "learning_rate": 1.2135943596087554e-05, "loss": 0.4945, "step": 9530 }, { "epoch": 1.3458062694154194, "grad_norm": 3.0909190647424603, "learning_rate": 1.2134454298975096e-05, "loss": 0.4773, "step": 9531 }, { "epoch": 1.3459474724654052, "grad_norm": 4.557325221355176, "learning_rate": 1.2132964952258841e-05, "loss": 0.7261, "step": 9532 }, { "epoch": 1.3460886755153911, "grad_norm": 2.9188963538333317, "learning_rate": 1.2131475555973403e-05, "loss": 0.5147, "step": 9533 }, { "epoch": 1.346229878565377, "grad_norm": 3.4002038474717007, "learning_rate": 1.2129986110153395e-05, "loss": 0.6207, "step": 9534 }, { "epoch": 1.346371081615363, "grad_norm": 3.9747152147812397, "learning_rate": 1.2128496614833427e-05, "loss": 0.6869, "step": 9535 }, { "epoch": 1.3465122846653488, "grad_norm": 4.008302576254742, "learning_rate": 1.2127007070048117e-05, "loss": 0.6335, "step": 9536 }, { "epoch": 1.3466534877153347, "grad_norm": 3.8891035007349144, "learning_rate": 1.2125517475832082e-05, "loss": 0.6671, "step": 9537 }, { "epoch": 1.3467946907653205, "grad_norm": 3.6055300176029235, "learning_rate": 1.2124027832219942e-05, "loss": 0.648, "step": 9538 }, { "epoch": 1.3469358938153064, "grad_norm": 4.043433125870707, "learning_rate": 1.2122538139246308e-05, "loss": 0.6794, "step": 9539 }, { "epoch": 1.3470770968652923, "grad_norm": 3.9877474671700477, "learning_rate": 1.2121048396945807e-05, "loss": 0.738, "step": 9540 }, { "epoch": 1.3472182999152782, "grad_norm": 3.8026822932372704, "learning_rate": 1.2119558605353055e-05, "loss": 0.6718, "step": 9541 }, { "epoch": 1.347359502965264, "grad_norm": 3.217331994605831, "learning_rate": 1.2118068764502677e-05, "loss": 0.4566, "step": 9542 }, { "epoch": 1.34750070601525, "grad_norm": 3.5242429495015286, "learning_rate": 1.2116578874429296e-05, "loss": 0.5035, "step": 9543 }, { "epoch": 1.3476419090652358, "grad_norm": 3.9053960290713903, "learning_rate": 1.2115088935167538e-05, "loss": 0.6263, "step": 9544 }, { "epoch": 1.3477831121152217, "grad_norm": 4.42878320311865, "learning_rate": 1.2113598946752024e-05, "loss": 0.7508, "step": 9545 }, { "epoch": 1.3479243151652076, "grad_norm": 3.7035363290646934, "learning_rate": 1.2112108909217386e-05, "loss": 0.5638, "step": 9546 }, { "epoch": 1.3480655182151935, "grad_norm": 3.3943791876491387, "learning_rate": 1.2110618822598244e-05, "loss": 0.533, "step": 9547 }, { "epoch": 1.3482067212651794, "grad_norm": 3.1301372543178565, "learning_rate": 1.2109128686929235e-05, "loss": 0.5697, "step": 9548 }, { "epoch": 1.3483479243151653, "grad_norm": 3.8403506783955375, "learning_rate": 1.2107638502244987e-05, "loss": 0.5298, "step": 9549 }, { "epoch": 1.3484891273651511, "grad_norm": 3.119745327035719, "learning_rate": 1.210614826858013e-05, "loss": 0.5531, "step": 9550 }, { "epoch": 1.348630330415137, "grad_norm": 3.648846785896331, "learning_rate": 1.2104657985969297e-05, "loss": 0.5876, "step": 9551 }, { "epoch": 1.348771533465123, "grad_norm": 2.8929726323726164, "learning_rate": 1.2103167654447121e-05, "loss": 0.4608, "step": 9552 }, { "epoch": 1.3489127365151088, "grad_norm": 3.544931281620047, "learning_rate": 1.2101677274048235e-05, "loss": 0.5945, "step": 9553 }, { "epoch": 1.3490539395650947, "grad_norm": 3.456959518184993, "learning_rate": 1.210018684480728e-05, "loss": 0.5418, "step": 9554 }, { "epoch": 1.3491951426150806, "grad_norm": 3.886402380303061, "learning_rate": 1.209869636675889e-05, "loss": 0.6027, "step": 9555 }, { "epoch": 1.3493363456650664, "grad_norm": 3.3387055498435543, "learning_rate": 1.20972058399377e-05, "loss": 0.5495, "step": 9556 }, { "epoch": 1.3494775487150523, "grad_norm": 3.958090957148684, "learning_rate": 1.2095715264378354e-05, "loss": 0.5441, "step": 9557 }, { "epoch": 1.3496187517650382, "grad_norm": 3.612642290091949, "learning_rate": 1.2094224640115488e-05, "loss": 0.504, "step": 9558 }, { "epoch": 1.349759954815024, "grad_norm": 3.540389752631854, "learning_rate": 1.2092733967183748e-05, "loss": 0.6273, "step": 9559 }, { "epoch": 1.34990115786501, "grad_norm": 3.5153059835275795, "learning_rate": 1.2091243245617774e-05, "loss": 0.5221, "step": 9560 }, { "epoch": 1.3500423609149959, "grad_norm": 4.8231698925566935, "learning_rate": 1.208975247545221e-05, "loss": 0.5928, "step": 9561 }, { "epoch": 1.3501835639649817, "grad_norm": 3.62756304152625, "learning_rate": 1.20882616567217e-05, "loss": 0.6213, "step": 9562 }, { "epoch": 1.3503247670149676, "grad_norm": 3.3866522704679576, "learning_rate": 1.2086770789460893e-05, "loss": 0.6137, "step": 9563 }, { "epoch": 1.3504659700649535, "grad_norm": 3.2778297241943672, "learning_rate": 1.2085279873704433e-05, "loss": 0.6035, "step": 9564 }, { "epoch": 1.3506071731149394, "grad_norm": 4.092318074083094, "learning_rate": 1.2083788909486968e-05, "loss": 0.7384, "step": 9565 }, { "epoch": 1.3507483761649253, "grad_norm": 4.1116713090401875, "learning_rate": 1.208229789684315e-05, "loss": 0.7017, "step": 9566 }, { "epoch": 1.3508895792149112, "grad_norm": 3.7660344721301158, "learning_rate": 1.2080806835807627e-05, "loss": 0.557, "step": 9567 }, { "epoch": 1.351030782264897, "grad_norm": 3.8051255262347916, "learning_rate": 1.2079315726415053e-05, "loss": 0.6113, "step": 9568 }, { "epoch": 1.351171985314883, "grad_norm": 4.092283924453941, "learning_rate": 1.2077824568700081e-05, "loss": 0.579, "step": 9569 }, { "epoch": 1.3513131883648688, "grad_norm": 3.8555923042832156, "learning_rate": 1.2076333362697358e-05, "loss": 0.5777, "step": 9570 }, { "epoch": 1.3514543914148547, "grad_norm": 3.83965775290258, "learning_rate": 1.2074842108441549e-05, "loss": 0.7044, "step": 9571 }, { "epoch": 1.3515955944648406, "grad_norm": 3.2322568850720432, "learning_rate": 1.2073350805967308e-05, "loss": 0.5628, "step": 9572 }, { "epoch": 1.3517367975148264, "grad_norm": 3.6840572335469775, "learning_rate": 1.2071859455309283e-05, "loss": 0.6861, "step": 9573 }, { "epoch": 1.3518780005648123, "grad_norm": 2.7052780743796947, "learning_rate": 1.2070368056502142e-05, "loss": 0.4357, "step": 9574 }, { "epoch": 1.3520192036147982, "grad_norm": 3.3132526904086532, "learning_rate": 1.2068876609580542e-05, "loss": 0.519, "step": 9575 }, { "epoch": 1.352160406664784, "grad_norm": 2.649418849941158, "learning_rate": 1.2067385114579144e-05, "loss": 0.4213, "step": 9576 }, { "epoch": 1.3523016097147698, "grad_norm": 3.423730960299369, "learning_rate": 1.2065893571532608e-05, "loss": 0.6404, "step": 9577 }, { "epoch": 1.3524428127647556, "grad_norm": 3.0746884448413363, "learning_rate": 1.2064401980475595e-05, "loss": 0.5439, "step": 9578 }, { "epoch": 1.3525840158147415, "grad_norm": 3.965427070319809, "learning_rate": 1.2062910341442772e-05, "loss": 0.4749, "step": 9579 }, { "epoch": 1.3527252188647274, "grad_norm": 4.650046649905797, "learning_rate": 1.2061418654468808e-05, "loss": 0.8024, "step": 9580 }, { "epoch": 1.3528664219147133, "grad_norm": 3.691678769830578, "learning_rate": 1.2059926919588362e-05, "loss": 0.517, "step": 9581 }, { "epoch": 1.3530076249646992, "grad_norm": 4.12070737124535, "learning_rate": 1.2058435136836101e-05, "loss": 0.7345, "step": 9582 }, { "epoch": 1.353148828014685, "grad_norm": 2.6607546974361256, "learning_rate": 1.20569433062467e-05, "loss": 0.4608, "step": 9583 }, { "epoch": 1.353290031064671, "grad_norm": 3.206979913276047, "learning_rate": 1.2055451427854825e-05, "loss": 0.5592, "step": 9584 }, { "epoch": 1.3534312341146568, "grad_norm": 3.4708931668316048, "learning_rate": 1.2053959501695144e-05, "loss": 0.5813, "step": 9585 }, { "epoch": 1.3535724371646427, "grad_norm": 4.150168831451216, "learning_rate": 1.2052467527802337e-05, "loss": 0.6581, "step": 9586 }, { "epoch": 1.3537136402146286, "grad_norm": 4.071220790020379, "learning_rate": 1.2050975506211064e-05, "loss": 0.6702, "step": 9587 }, { "epoch": 1.3538548432646145, "grad_norm": 3.185381947965574, "learning_rate": 1.2049483436956009e-05, "loss": 0.4974, "step": 9588 }, { "epoch": 1.3539960463146004, "grad_norm": 3.3874929161237968, "learning_rate": 1.2047991320071846e-05, "loss": 0.6607, "step": 9589 }, { "epoch": 1.3541372493645862, "grad_norm": 3.8498809519898094, "learning_rate": 1.2046499155593245e-05, "loss": 0.5391, "step": 9590 }, { "epoch": 1.3542784524145721, "grad_norm": 3.6170898665804576, "learning_rate": 1.2045006943554888e-05, "loss": 0.5916, "step": 9591 }, { "epoch": 1.354419655464558, "grad_norm": 3.4111803866828514, "learning_rate": 1.2043514683991455e-05, "loss": 0.6185, "step": 9592 }, { "epoch": 1.3545608585145439, "grad_norm": 3.8404482081581612, "learning_rate": 1.2042022376937623e-05, "loss": 0.6198, "step": 9593 }, { "epoch": 1.3547020615645298, "grad_norm": 3.3711475659094567, "learning_rate": 1.2040530022428074e-05, "loss": 0.6063, "step": 9594 }, { "epoch": 1.3548432646145157, "grad_norm": 3.018159802003804, "learning_rate": 1.2039037620497486e-05, "loss": 0.5202, "step": 9595 }, { "epoch": 1.3549844676645015, "grad_norm": 3.875906756022677, "learning_rate": 1.2037545171180545e-05, "loss": 0.5542, "step": 9596 }, { "epoch": 1.3551256707144874, "grad_norm": 3.265397958477719, "learning_rate": 1.2036052674511935e-05, "loss": 0.5538, "step": 9597 }, { "epoch": 1.3552668737644733, "grad_norm": 3.0044130476694453, "learning_rate": 1.2034560130526341e-05, "loss": 0.6045, "step": 9598 }, { "epoch": 1.3554080768144592, "grad_norm": 3.854163388616594, "learning_rate": 1.2033067539258445e-05, "loss": 0.5719, "step": 9599 }, { "epoch": 1.355549279864445, "grad_norm": 3.180140753909663, "learning_rate": 1.203157490074294e-05, "loss": 0.5658, "step": 9600 }, { "epoch": 1.355690482914431, "grad_norm": 3.952477447384529, "learning_rate": 1.2030082215014512e-05, "loss": 0.6382, "step": 9601 }, { "epoch": 1.3558316859644168, "grad_norm": 4.983977996624493, "learning_rate": 1.2028589482107847e-05, "loss": 0.9133, "step": 9602 }, { "epoch": 1.3559728890144027, "grad_norm": 4.5326877660431215, "learning_rate": 1.2027096702057642e-05, "loss": 0.6255, "step": 9603 }, { "epoch": 1.3561140920643886, "grad_norm": 3.271977635469051, "learning_rate": 1.2025603874898582e-05, "loss": 0.5985, "step": 9604 }, { "epoch": 1.3562552951143745, "grad_norm": 3.3828582566653664, "learning_rate": 1.2024111000665364e-05, "loss": 0.6167, "step": 9605 }, { "epoch": 1.3563964981643604, "grad_norm": 3.320390225738393, "learning_rate": 1.2022618079392683e-05, "loss": 0.5289, "step": 9606 }, { "epoch": 1.3565377012143462, "grad_norm": 3.602871716011303, "learning_rate": 1.2021125111115226e-05, "loss": 0.6812, "step": 9607 }, { "epoch": 1.3566789042643321, "grad_norm": 3.671038619851102, "learning_rate": 1.2019632095867697e-05, "loss": 0.6355, "step": 9608 }, { "epoch": 1.356820107314318, "grad_norm": 3.5070214707993985, "learning_rate": 1.2018139033684792e-05, "loss": 0.6224, "step": 9609 }, { "epoch": 1.356961310364304, "grad_norm": 3.159328657828265, "learning_rate": 1.2016645924601207e-05, "loss": 0.5248, "step": 9610 }, { "epoch": 1.3571025134142898, "grad_norm": 3.57104889441666, "learning_rate": 1.2015152768651639e-05, "loss": 0.6037, "step": 9611 }, { "epoch": 1.3572437164642757, "grad_norm": 3.862294965345124, "learning_rate": 1.2013659565870795e-05, "loss": 0.5974, "step": 9612 }, { "epoch": 1.3573849195142615, "grad_norm": 2.951118723195497, "learning_rate": 1.2012166316293368e-05, "loss": 0.4768, "step": 9613 }, { "epoch": 1.3575261225642474, "grad_norm": 3.2488337533779403, "learning_rate": 1.201067301995407e-05, "loss": 0.5017, "step": 9614 }, { "epoch": 1.3576673256142333, "grad_norm": 3.5839883674653055, "learning_rate": 1.2009179676887595e-05, "loss": 0.6074, "step": 9615 }, { "epoch": 1.3578085286642192, "grad_norm": 3.872031407575974, "learning_rate": 1.200768628712865e-05, "loss": 0.6949, "step": 9616 }, { "epoch": 1.357949731714205, "grad_norm": 3.7792197243867713, "learning_rate": 1.2006192850711947e-05, "loss": 0.5557, "step": 9617 }, { "epoch": 1.358090934764191, "grad_norm": 3.140092643543506, "learning_rate": 1.2004699367672187e-05, "loss": 0.5139, "step": 9618 }, { "epoch": 1.3582321378141768, "grad_norm": 3.4205135141860383, "learning_rate": 1.200320583804408e-05, "loss": 0.6581, "step": 9619 }, { "epoch": 1.3583733408641627, "grad_norm": 3.325379728959165, "learning_rate": 1.2001712261862335e-05, "loss": 0.5607, "step": 9620 }, { "epoch": 1.3585145439141486, "grad_norm": 3.8533916500499568, "learning_rate": 1.200021863916166e-05, "loss": 0.6201, "step": 9621 }, { "epoch": 1.3586557469641345, "grad_norm": 2.8772616894553695, "learning_rate": 1.1998724969976767e-05, "loss": 0.4814, "step": 9622 }, { "epoch": 1.3587969500141204, "grad_norm": 3.3077896887816465, "learning_rate": 1.1997231254342373e-05, "loss": 0.588, "step": 9623 }, { "epoch": 1.3589381530641063, "grad_norm": 3.480748247466281, "learning_rate": 1.1995737492293183e-05, "loss": 0.5331, "step": 9624 }, { "epoch": 1.3590793561140921, "grad_norm": 3.2350781462675577, "learning_rate": 1.1994243683863917e-05, "loss": 0.5811, "step": 9625 }, { "epoch": 1.359220559164078, "grad_norm": 3.5556758783072007, "learning_rate": 1.199274982908929e-05, "loss": 0.5822, "step": 9626 }, { "epoch": 1.359361762214064, "grad_norm": 3.6454018586939148, "learning_rate": 1.1991255928004017e-05, "loss": 0.6955, "step": 9627 }, { "epoch": 1.3595029652640496, "grad_norm": 3.138915238883516, "learning_rate": 1.1989761980642816e-05, "loss": 0.5355, "step": 9628 }, { "epoch": 1.3596441683140355, "grad_norm": 3.807185563231134, "learning_rate": 1.1988267987040407e-05, "loss": 0.6903, "step": 9629 }, { "epoch": 1.3597853713640213, "grad_norm": 3.711439996349804, "learning_rate": 1.1986773947231505e-05, "loss": 0.5559, "step": 9630 }, { "epoch": 1.3599265744140072, "grad_norm": 4.121347095392864, "learning_rate": 1.1985279861250839e-05, "loss": 0.8647, "step": 9631 }, { "epoch": 1.360067777463993, "grad_norm": 3.9800070935294927, "learning_rate": 1.1983785729133125e-05, "loss": 0.6464, "step": 9632 }, { "epoch": 1.360208980513979, "grad_norm": 3.3340724805056734, "learning_rate": 1.1982291550913086e-05, "loss": 0.4897, "step": 9633 }, { "epoch": 1.3603501835639649, "grad_norm": 3.773649616607825, "learning_rate": 1.1980797326625446e-05, "loss": 0.6684, "step": 9634 }, { "epoch": 1.3604913866139507, "grad_norm": 4.168909257878763, "learning_rate": 1.1979303056304939e-05, "loss": 0.6817, "step": 9635 }, { "epoch": 1.3606325896639366, "grad_norm": 4.360133535000221, "learning_rate": 1.1977808739986275e-05, "loss": 0.7174, "step": 9636 }, { "epoch": 1.3607737927139225, "grad_norm": 3.434483859719177, "learning_rate": 1.1976314377704194e-05, "loss": 0.5409, "step": 9637 }, { "epoch": 1.3609149957639084, "grad_norm": 3.29630776412953, "learning_rate": 1.1974819969493421e-05, "loss": 0.4974, "step": 9638 }, { "epoch": 1.3610561988138943, "grad_norm": 3.2994146302921874, "learning_rate": 1.197332551538868e-05, "loss": 0.5569, "step": 9639 }, { "epoch": 1.3611974018638802, "grad_norm": 3.4391787595369165, "learning_rate": 1.1971831015424713e-05, "loss": 0.5012, "step": 9640 }, { "epoch": 1.361338604913866, "grad_norm": 3.60454613573677, "learning_rate": 1.1970336469636242e-05, "loss": 0.5419, "step": 9641 }, { "epoch": 1.361479807963852, "grad_norm": 3.4139943660590655, "learning_rate": 1.1968841878057999e-05, "loss": 0.5419, "step": 9642 }, { "epoch": 1.3616210110138378, "grad_norm": 3.4780406608918573, "learning_rate": 1.1967347240724726e-05, "loss": 0.5563, "step": 9643 }, { "epoch": 1.3617622140638237, "grad_norm": 3.2040876231654667, "learning_rate": 1.196585255767115e-05, "loss": 0.5687, "step": 9644 }, { "epoch": 1.3619034171138096, "grad_norm": 3.639307647944429, "learning_rate": 1.1964357828932012e-05, "loss": 0.5126, "step": 9645 }, { "epoch": 1.3620446201637955, "grad_norm": 3.280448594947904, "learning_rate": 1.1962863054542045e-05, "loss": 0.5603, "step": 9646 }, { "epoch": 1.3621858232137813, "grad_norm": 3.401640038162629, "learning_rate": 1.1961368234535989e-05, "loss": 0.4777, "step": 9647 }, { "epoch": 1.3623270262637672, "grad_norm": 3.360545749756557, "learning_rate": 1.195987336894858e-05, "loss": 0.5937, "step": 9648 }, { "epoch": 1.362468229313753, "grad_norm": 3.887874244975685, "learning_rate": 1.1958378457814561e-05, "loss": 0.6562, "step": 9649 }, { "epoch": 1.362609432363739, "grad_norm": 3.3896912526995138, "learning_rate": 1.1956883501168672e-05, "loss": 0.5771, "step": 9650 }, { "epoch": 1.3627506354137249, "grad_norm": 3.9893455374066225, "learning_rate": 1.1955388499045657e-05, "loss": 0.6632, "step": 9651 }, { "epoch": 1.3628918384637108, "grad_norm": 3.7872396418076475, "learning_rate": 1.195389345148026e-05, "loss": 0.6515, "step": 9652 }, { "epoch": 1.3630330415136966, "grad_norm": 3.4555450865747335, "learning_rate": 1.1952398358507217e-05, "loss": 0.602, "step": 9653 }, { "epoch": 1.3631742445636825, "grad_norm": 3.7943293962979587, "learning_rate": 1.1950903220161286e-05, "loss": 0.5877, "step": 9654 }, { "epoch": 1.3633154476136684, "grad_norm": 4.207783619361302, "learning_rate": 1.19494080364772e-05, "loss": 0.6858, "step": 9655 }, { "epoch": 1.3634566506636543, "grad_norm": 3.559069176284669, "learning_rate": 1.1947912807489716e-05, "loss": 0.5028, "step": 9656 }, { "epoch": 1.3635978537136402, "grad_norm": 3.5770529567636893, "learning_rate": 1.1946417533233583e-05, "loss": 0.538, "step": 9657 }, { "epoch": 1.363739056763626, "grad_norm": 4.048065132112137, "learning_rate": 1.1944922213743543e-05, "loss": 0.6946, "step": 9658 }, { "epoch": 1.363880259813612, "grad_norm": 3.656929180681471, "learning_rate": 1.1943426849054352e-05, "loss": 0.5812, "step": 9659 }, { "epoch": 1.3640214628635978, "grad_norm": 3.4456719507511218, "learning_rate": 1.194193143920076e-05, "loss": 0.5694, "step": 9660 }, { "epoch": 1.3641626659135837, "grad_norm": 3.5589391590368598, "learning_rate": 1.194043598421752e-05, "loss": 0.5158, "step": 9661 }, { "epoch": 1.3643038689635696, "grad_norm": 3.6221146462769855, "learning_rate": 1.1938940484139387e-05, "loss": 0.5612, "step": 9662 }, { "epoch": 1.3644450720135555, "grad_norm": 4.006134342080583, "learning_rate": 1.1937444939001112e-05, "loss": 0.6565, "step": 9663 }, { "epoch": 1.3645862750635414, "grad_norm": 3.549335710020018, "learning_rate": 1.1935949348837457e-05, "loss": 0.538, "step": 9664 }, { "epoch": 1.3647274781135272, "grad_norm": 3.3004266406333724, "learning_rate": 1.1934453713683172e-05, "loss": 0.5646, "step": 9665 }, { "epoch": 1.3648686811635131, "grad_norm": 3.560759942181429, "learning_rate": 1.193295803357302e-05, "loss": 0.5983, "step": 9666 }, { "epoch": 1.365009884213499, "grad_norm": 3.182642121220377, "learning_rate": 1.1931462308541755e-05, "loss": 0.4601, "step": 9667 }, { "epoch": 1.3651510872634849, "grad_norm": 3.767783514403507, "learning_rate": 1.1929966538624143e-05, "loss": 0.6496, "step": 9668 }, { "epoch": 1.3652922903134708, "grad_norm": 3.4343599900659596, "learning_rate": 1.1928470723854943e-05, "loss": 0.5697, "step": 9669 }, { "epoch": 1.3654334933634567, "grad_norm": 3.1547662380213133, "learning_rate": 1.1926974864268914e-05, "loss": 0.5111, "step": 9670 }, { "epoch": 1.3655746964134425, "grad_norm": 3.1486913888952786, "learning_rate": 1.1925478959900822e-05, "loss": 0.5117, "step": 9671 }, { "epoch": 1.3657158994634284, "grad_norm": 4.000747073947447, "learning_rate": 1.1923983010785431e-05, "loss": 0.712, "step": 9672 }, { "epoch": 1.3658571025134143, "grad_norm": 3.6659613725404356, "learning_rate": 1.1922487016957502e-05, "loss": 0.6617, "step": 9673 }, { "epoch": 1.3659983055634002, "grad_norm": 3.1451920485647924, "learning_rate": 1.1920990978451812e-05, "loss": 0.5392, "step": 9674 }, { "epoch": 1.366139508613386, "grad_norm": 3.5731119234595057, "learning_rate": 1.1919494895303119e-05, "loss": 0.6448, "step": 9675 }, { "epoch": 1.366280711663372, "grad_norm": 2.9264539038494077, "learning_rate": 1.1917998767546188e-05, "loss": 0.4946, "step": 9676 }, { "epoch": 1.3664219147133578, "grad_norm": 2.989890993693794, "learning_rate": 1.1916502595215799e-05, "loss": 0.4369, "step": 9677 }, { "epoch": 1.3665631177633437, "grad_norm": 3.382161647620616, "learning_rate": 1.1915006378346719e-05, "loss": 0.6747, "step": 9678 }, { "epoch": 1.3667043208133296, "grad_norm": 4.741227238624383, "learning_rate": 1.1913510116973715e-05, "loss": 0.7579, "step": 9679 }, { "epoch": 1.3668455238633155, "grad_norm": 4.353092237430788, "learning_rate": 1.1912013811131562e-05, "loss": 0.7957, "step": 9680 }, { "epoch": 1.3669867269133014, "grad_norm": 3.453339276092592, "learning_rate": 1.1910517460855033e-05, "loss": 0.5697, "step": 9681 }, { "epoch": 1.3671279299632872, "grad_norm": 3.8990299737361074, "learning_rate": 1.1909021066178906e-05, "loss": 0.6719, "step": 9682 }, { "epoch": 1.3672691330132731, "grad_norm": 4.980671183569356, "learning_rate": 1.1907524627137952e-05, "loss": 0.8008, "step": 9683 }, { "epoch": 1.367410336063259, "grad_norm": 4.3038522735200155, "learning_rate": 1.1906028143766952e-05, "loss": 0.8133, "step": 9684 }, { "epoch": 1.367551539113245, "grad_norm": 3.898066080258327, "learning_rate": 1.1904531616100677e-05, "loss": 0.6813, "step": 9685 }, { "epoch": 1.3676927421632308, "grad_norm": 3.2000271490358125, "learning_rate": 1.1903035044173914e-05, "loss": 0.6034, "step": 9686 }, { "epoch": 1.3678339452132167, "grad_norm": 3.247603800982416, "learning_rate": 1.1901538428021434e-05, "loss": 0.5338, "step": 9687 }, { "epoch": 1.3679751482632025, "grad_norm": 3.638379363697139, "learning_rate": 1.1900041767678024e-05, "loss": 0.611, "step": 9688 }, { "epoch": 1.3681163513131884, "grad_norm": 3.2273460435656203, "learning_rate": 1.1898545063178464e-05, "loss": 0.5944, "step": 9689 }, { "epoch": 1.3682575543631743, "grad_norm": 3.748882091294822, "learning_rate": 1.1897048314557538e-05, "loss": 0.6217, "step": 9690 }, { "epoch": 1.3683987574131602, "grad_norm": 3.629050422243412, "learning_rate": 1.1895551521850029e-05, "loss": 0.59, "step": 9691 }, { "epoch": 1.368539960463146, "grad_norm": 3.828777141849607, "learning_rate": 1.189405468509072e-05, "loss": 0.6178, "step": 9692 }, { "epoch": 1.368681163513132, "grad_norm": 3.415580766378138, "learning_rate": 1.1892557804314393e-05, "loss": 0.628, "step": 9693 }, { "epoch": 1.3688223665631178, "grad_norm": 3.1561384863108444, "learning_rate": 1.1891060879555847e-05, "loss": 0.5067, "step": 9694 }, { "epoch": 1.3689635696131037, "grad_norm": 3.7993824449502975, "learning_rate": 1.1889563910849862e-05, "loss": 0.6941, "step": 9695 }, { "epoch": 1.3691047726630896, "grad_norm": 4.015409225989134, "learning_rate": 1.1888066898231223e-05, "loss": 0.6221, "step": 9696 }, { "epoch": 1.3692459757130755, "grad_norm": 3.3418894142521545, "learning_rate": 1.188656984173473e-05, "loss": 0.5816, "step": 9697 }, { "epoch": 1.3693871787630614, "grad_norm": 4.359613640592059, "learning_rate": 1.1885072741395165e-05, "loss": 0.7643, "step": 9698 }, { "epoch": 1.3695283818130473, "grad_norm": 3.156193514316977, "learning_rate": 1.1883575597247328e-05, "loss": 0.6123, "step": 9699 }, { "epoch": 1.3696695848630331, "grad_norm": 3.347179755112301, "learning_rate": 1.1882078409326003e-05, "loss": 0.5814, "step": 9700 }, { "epoch": 1.369810787913019, "grad_norm": 3.449162422752734, "learning_rate": 1.1880581177665992e-05, "loss": 0.5976, "step": 9701 }, { "epoch": 1.369951990963005, "grad_norm": 3.952167109872566, "learning_rate": 1.1879083902302082e-05, "loss": 0.8094, "step": 9702 }, { "epoch": 1.3700931940129908, "grad_norm": 4.094360212721599, "learning_rate": 1.187758658326908e-05, "loss": 0.7686, "step": 9703 }, { "epoch": 1.3702343970629767, "grad_norm": 2.8081190051965197, "learning_rate": 1.187608922060177e-05, "loss": 0.4638, "step": 9704 }, { "epoch": 1.3703756001129626, "grad_norm": 3.6687702897539953, "learning_rate": 1.187459181433496e-05, "loss": 0.6114, "step": 9705 }, { "epoch": 1.3705168031629484, "grad_norm": 2.862833450539544, "learning_rate": 1.1873094364503446e-05, "loss": 0.4513, "step": 9706 }, { "epoch": 1.3706580062129343, "grad_norm": 3.983960515132614, "learning_rate": 1.1871596871142028e-05, "loss": 0.5604, "step": 9707 }, { "epoch": 1.3707992092629202, "grad_norm": 4.138931200767664, "learning_rate": 1.1870099334285507e-05, "loss": 0.6688, "step": 9708 }, { "epoch": 1.370940412312906, "grad_norm": 4.129426963557256, "learning_rate": 1.1868601753968685e-05, "loss": 0.6732, "step": 9709 }, { "epoch": 1.371081615362892, "grad_norm": 4.183808897738178, "learning_rate": 1.1867104130226363e-05, "loss": 0.7716, "step": 9710 }, { "epoch": 1.3712228184128779, "grad_norm": 2.9515273160131184, "learning_rate": 1.1865606463093348e-05, "loss": 0.5989, "step": 9711 }, { "epoch": 1.3713640214628637, "grad_norm": 3.301489264859595, "learning_rate": 1.1864108752604451e-05, "loss": 0.581, "step": 9712 }, { "epoch": 1.3715052245128494, "grad_norm": 3.5452167062730036, "learning_rate": 1.1862610998794464e-05, "loss": 0.5289, "step": 9713 }, { "epoch": 1.3716464275628353, "grad_norm": 4.150861803632419, "learning_rate": 1.1861113201698204e-05, "loss": 0.5866, "step": 9714 }, { "epoch": 1.3717876306128212, "grad_norm": 3.3607604028115095, "learning_rate": 1.1859615361350476e-05, "loss": 0.5766, "step": 9715 }, { "epoch": 1.371928833662807, "grad_norm": 3.7059513660729557, "learning_rate": 1.1858117477786094e-05, "loss": 0.5998, "step": 9716 }, { "epoch": 1.372070036712793, "grad_norm": 3.5978810782207775, "learning_rate": 1.185661955103986e-05, "loss": 0.5843, "step": 9717 }, { "epoch": 1.3722112397627788, "grad_norm": 2.8662963344178283, "learning_rate": 1.1855121581146591e-05, "loss": 0.4701, "step": 9718 }, { "epoch": 1.3723524428127647, "grad_norm": 4.38119502110531, "learning_rate": 1.1853623568141096e-05, "loss": 0.8014, "step": 9719 }, { "epoch": 1.3724936458627506, "grad_norm": 3.3884837231900407, "learning_rate": 1.1852125512058194e-05, "loss": 0.5865, "step": 9720 }, { "epoch": 1.3726348489127365, "grad_norm": 3.3285667973469315, "learning_rate": 1.1850627412932689e-05, "loss": 0.5534, "step": 9721 }, { "epoch": 1.3727760519627223, "grad_norm": 3.3871523942701978, "learning_rate": 1.1849129270799407e-05, "loss": 0.6523, "step": 9722 }, { "epoch": 1.3729172550127082, "grad_norm": 3.3233362460581253, "learning_rate": 1.1847631085693159e-05, "loss": 0.5428, "step": 9723 }, { "epoch": 1.373058458062694, "grad_norm": 4.003976371980904, "learning_rate": 1.184613285764876e-05, "loss": 0.5814, "step": 9724 }, { "epoch": 1.37319966111268, "grad_norm": 3.1192788606947466, "learning_rate": 1.1844634586701033e-05, "loss": 0.4958, "step": 9725 }, { "epoch": 1.3733408641626659, "grad_norm": 4.9136562495903, "learning_rate": 1.1843136272884795e-05, "loss": 0.785, "step": 9726 }, { "epoch": 1.3734820672126518, "grad_norm": 3.263384357558984, "learning_rate": 1.1841637916234863e-05, "loss": 0.5401, "step": 9727 }, { "epoch": 1.3736232702626376, "grad_norm": 3.447933221610095, "learning_rate": 1.1840139516786062e-05, "loss": 0.5425, "step": 9728 }, { "epoch": 1.3737644733126235, "grad_norm": 4.661399010102938, "learning_rate": 1.1838641074573215e-05, "loss": 0.69, "step": 9729 }, { "epoch": 1.3739056763626094, "grad_norm": 3.929607806567896, "learning_rate": 1.1837142589631143e-05, "loss": 0.6756, "step": 9730 }, { "epoch": 1.3740468794125953, "grad_norm": 3.6594125342675428, "learning_rate": 1.1835644061994671e-05, "loss": 0.6849, "step": 9731 }, { "epoch": 1.3741880824625812, "grad_norm": 4.4694313735728235, "learning_rate": 1.1834145491698623e-05, "loss": 0.8197, "step": 9732 }, { "epoch": 1.374329285512567, "grad_norm": 3.229564401317567, "learning_rate": 1.1832646878777828e-05, "loss": 0.5154, "step": 9733 }, { "epoch": 1.374470488562553, "grad_norm": 3.515410559625843, "learning_rate": 1.1831148223267108e-05, "loss": 0.5867, "step": 9734 }, { "epoch": 1.3746116916125388, "grad_norm": 4.31672025687122, "learning_rate": 1.1829649525201295e-05, "loss": 0.6331, "step": 9735 }, { "epoch": 1.3747528946625247, "grad_norm": 3.552669283661908, "learning_rate": 1.1828150784615216e-05, "loss": 0.5944, "step": 9736 }, { "epoch": 1.3748940977125106, "grad_norm": 3.4551438493463884, "learning_rate": 1.1826652001543706e-05, "loss": 0.5886, "step": 9737 }, { "epoch": 1.3750353007624965, "grad_norm": 3.0695532393122935, "learning_rate": 1.1825153176021591e-05, "loss": 0.4931, "step": 9738 }, { "epoch": 1.3751765038124824, "grad_norm": 3.159324824804856, "learning_rate": 1.18236543080837e-05, "loss": 0.546, "step": 9739 }, { "epoch": 1.3753177068624682, "grad_norm": 3.6465346062754023, "learning_rate": 1.1822155397764873e-05, "loss": 0.5368, "step": 9740 }, { "epoch": 1.3754589099124541, "grad_norm": 3.7869249013729274, "learning_rate": 1.1820656445099945e-05, "loss": 0.6378, "step": 9741 }, { "epoch": 1.37560011296244, "grad_norm": 4.463094303519522, "learning_rate": 1.1819157450123745e-05, "loss": 0.7917, "step": 9742 }, { "epoch": 1.3757413160124259, "grad_norm": 3.340895739921409, "learning_rate": 1.1817658412871111e-05, "loss": 0.621, "step": 9743 }, { "epoch": 1.3758825190624118, "grad_norm": 3.709870672943968, "learning_rate": 1.1816159333376882e-05, "loss": 0.5998, "step": 9744 }, { "epoch": 1.3760237221123977, "grad_norm": 3.3313619283033153, "learning_rate": 1.181466021167589e-05, "loss": 0.5437, "step": 9745 }, { "epoch": 1.3761649251623835, "grad_norm": 3.275514741295965, "learning_rate": 1.1813161047802986e-05, "loss": 0.5392, "step": 9746 }, { "epoch": 1.3763061282123694, "grad_norm": 3.53508871286227, "learning_rate": 1.1811661841792994e-05, "loss": 0.5829, "step": 9747 }, { "epoch": 1.3764473312623553, "grad_norm": 3.1587306534107396, "learning_rate": 1.1810162593680768e-05, "loss": 0.5146, "step": 9748 }, { "epoch": 1.3765885343123412, "grad_norm": 3.454807859008688, "learning_rate": 1.1808663303501144e-05, "loss": 0.5529, "step": 9749 }, { "epoch": 1.376729737362327, "grad_norm": 3.7564512226826734, "learning_rate": 1.1807163971288967e-05, "loss": 0.5536, "step": 9750 }, { "epoch": 1.376870940412313, "grad_norm": 3.533939776177978, "learning_rate": 1.1805664597079079e-05, "loss": 0.6011, "step": 9751 }, { "epoch": 1.3770121434622988, "grad_norm": 3.6917663269597414, "learning_rate": 1.1804165180906326e-05, "loss": 0.5653, "step": 9752 }, { "epoch": 1.3771533465122847, "grad_norm": 2.769049607367366, "learning_rate": 1.180266572280555e-05, "loss": 0.5117, "step": 9753 }, { "epoch": 1.3772945495622706, "grad_norm": 2.8121869843323335, "learning_rate": 1.1801166222811607e-05, "loss": 0.4824, "step": 9754 }, { "epoch": 1.3774357526122565, "grad_norm": 3.146871945506094, "learning_rate": 1.1799666680959336e-05, "loss": 0.5277, "step": 9755 }, { "epoch": 1.3775769556622424, "grad_norm": 2.7525305789580683, "learning_rate": 1.1798167097283585e-05, "loss": 0.428, "step": 9756 }, { "epoch": 1.3777181587122282, "grad_norm": 3.892573235945514, "learning_rate": 1.1796667471819212e-05, "loss": 0.6413, "step": 9757 }, { "epoch": 1.3778593617622141, "grad_norm": 3.016804903544927, "learning_rate": 1.1795167804601062e-05, "loss": 0.5101, "step": 9758 }, { "epoch": 1.3780005648122, "grad_norm": 3.4675363112830433, "learning_rate": 1.1793668095663985e-05, "loss": 0.6008, "step": 9759 }, { "epoch": 1.378141767862186, "grad_norm": 4.417380556410781, "learning_rate": 1.179216834504284e-05, "loss": 0.6685, "step": 9760 }, { "epoch": 1.3782829709121718, "grad_norm": 3.3837343840831213, "learning_rate": 1.1790668552772474e-05, "loss": 0.5289, "step": 9761 }, { "epoch": 1.3784241739621577, "grad_norm": 3.5698947040078193, "learning_rate": 1.1789168718887745e-05, "loss": 0.5771, "step": 9762 }, { "epoch": 1.3785653770121435, "grad_norm": 3.330740856368138, "learning_rate": 1.178766884342351e-05, "loss": 0.5973, "step": 9763 }, { "epoch": 1.3787065800621292, "grad_norm": 3.490031525427344, "learning_rate": 1.1786168926414617e-05, "loss": 0.6195, "step": 9764 }, { "epoch": 1.378847783112115, "grad_norm": 3.348713811897324, "learning_rate": 1.1784668967895933e-05, "loss": 0.5544, "step": 9765 }, { "epoch": 1.378988986162101, "grad_norm": 3.690194137201315, "learning_rate": 1.1783168967902314e-05, "loss": 0.5572, "step": 9766 }, { "epoch": 1.3791301892120869, "grad_norm": 3.7642724196409034, "learning_rate": 1.1781668926468617e-05, "loss": 0.7328, "step": 9767 }, { "epoch": 1.3792713922620727, "grad_norm": 4.017831017060751, "learning_rate": 1.1780168843629705e-05, "loss": 0.5905, "step": 9768 }, { "epoch": 1.3794125953120586, "grad_norm": 3.4603542941422205, "learning_rate": 1.1778668719420436e-05, "loss": 0.56, "step": 9769 }, { "epoch": 1.3795537983620445, "grad_norm": 3.337968851817994, "learning_rate": 1.1777168553875673e-05, "loss": 0.5759, "step": 9770 }, { "epoch": 1.3796950014120304, "grad_norm": 2.9142122751242363, "learning_rate": 1.1775668347030285e-05, "loss": 0.4959, "step": 9771 }, { "epoch": 1.3798362044620163, "grad_norm": 3.180372606449813, "learning_rate": 1.177416809891913e-05, "loss": 0.55, "step": 9772 }, { "epoch": 1.3799774075120022, "grad_norm": 5.035317492115083, "learning_rate": 1.177266780957707e-05, "loss": 0.8248, "step": 9773 }, { "epoch": 1.380118610561988, "grad_norm": 4.663232741963081, "learning_rate": 1.1771167479038978e-05, "loss": 0.9226, "step": 9774 }, { "epoch": 1.380259813611974, "grad_norm": 3.132884227813378, "learning_rate": 1.1769667107339723e-05, "loss": 0.5052, "step": 9775 }, { "epoch": 1.3804010166619598, "grad_norm": 4.581676630594322, "learning_rate": 1.1768166694514164e-05, "loss": 0.8078, "step": 9776 }, { "epoch": 1.3805422197119457, "grad_norm": 4.193798055572653, "learning_rate": 1.1766666240597175e-05, "loss": 0.6975, "step": 9777 }, { "epoch": 1.3806834227619316, "grad_norm": 5.124414889814807, "learning_rate": 1.1765165745623628e-05, "loss": 0.619, "step": 9778 }, { "epoch": 1.3808246258119174, "grad_norm": 4.252056176484407, "learning_rate": 1.176366520962839e-05, "loss": 0.7259, "step": 9779 }, { "epoch": 1.3809658288619033, "grad_norm": 2.9195014983301752, "learning_rate": 1.1762164632646334e-05, "loss": 0.5025, "step": 9780 }, { "epoch": 1.3811070319118892, "grad_norm": 3.9734387021587545, "learning_rate": 1.1760664014712333e-05, "loss": 0.5762, "step": 9781 }, { "epoch": 1.381248234961875, "grad_norm": 3.7314506552838127, "learning_rate": 1.1759163355861258e-05, "loss": 0.6414, "step": 9782 }, { "epoch": 1.381389438011861, "grad_norm": 3.710591580155354, "learning_rate": 1.1757662656127991e-05, "loss": 0.6516, "step": 9783 }, { "epoch": 1.3815306410618469, "grad_norm": 3.6393822229586408, "learning_rate": 1.1756161915547399e-05, "loss": 0.5177, "step": 9784 }, { "epoch": 1.3816718441118327, "grad_norm": 5.264317155556696, "learning_rate": 1.1754661134154365e-05, "loss": 0.9137, "step": 9785 }, { "epoch": 1.3818130471618186, "grad_norm": 3.026734606674291, "learning_rate": 1.1753160311983764e-05, "loss": 0.539, "step": 9786 }, { "epoch": 1.3819542502118045, "grad_norm": 3.5862027531122544, "learning_rate": 1.1751659449070476e-05, "loss": 0.5774, "step": 9787 }, { "epoch": 1.3820954532617904, "grad_norm": 2.89619428501797, "learning_rate": 1.1750158545449377e-05, "loss": 0.5595, "step": 9788 }, { "epoch": 1.3822366563117763, "grad_norm": 3.971553670414237, "learning_rate": 1.1748657601155348e-05, "loss": 0.7073, "step": 9789 }, { "epoch": 1.3823778593617622, "grad_norm": 3.912382116884756, "learning_rate": 1.1747156616223272e-05, "loss": 0.7153, "step": 9790 }, { "epoch": 1.382519062411748, "grad_norm": 3.2735081099008143, "learning_rate": 1.1745655590688031e-05, "loss": 0.5579, "step": 9791 }, { "epoch": 1.382660265461734, "grad_norm": 3.739299942420825, "learning_rate": 1.174415452458451e-05, "loss": 0.6645, "step": 9792 }, { "epoch": 1.3828014685117198, "grad_norm": 3.916063995026166, "learning_rate": 1.174265341794759e-05, "loss": 0.6811, "step": 9793 }, { "epoch": 1.3829426715617057, "grad_norm": 2.831358415426989, "learning_rate": 1.1741152270812155e-05, "loss": 0.5025, "step": 9794 }, { "epoch": 1.3830838746116916, "grad_norm": 3.3238169771380335, "learning_rate": 1.1739651083213096e-05, "loss": 0.509, "step": 9795 }, { "epoch": 1.3832250776616775, "grad_norm": 3.1495024528451334, "learning_rate": 1.1738149855185295e-05, "loss": 0.5744, "step": 9796 }, { "epoch": 1.3833662807116633, "grad_norm": 4.342164407483664, "learning_rate": 1.1736648586763645e-05, "loss": 0.6318, "step": 9797 }, { "epoch": 1.3835074837616492, "grad_norm": 4.198229574454235, "learning_rate": 1.1735147277983027e-05, "loss": 0.7214, "step": 9798 }, { "epoch": 1.383648686811635, "grad_norm": 3.435757810516378, "learning_rate": 1.173364592887834e-05, "loss": 0.5782, "step": 9799 }, { "epoch": 1.383789889861621, "grad_norm": 3.5343009118416986, "learning_rate": 1.1732144539484467e-05, "loss": 0.5617, "step": 9800 }, { "epoch": 1.3839310929116069, "grad_norm": 4.512213061190638, "learning_rate": 1.1730643109836306e-05, "loss": 0.8468, "step": 9801 }, { "epoch": 1.3840722959615928, "grad_norm": 3.1442163701738886, "learning_rate": 1.1729141639968745e-05, "loss": 0.5359, "step": 9802 }, { "epoch": 1.3842134990115786, "grad_norm": 3.9419128741573264, "learning_rate": 1.172764012991668e-05, "loss": 0.6845, "step": 9803 }, { "epoch": 1.3843547020615645, "grad_norm": 3.6723802782369126, "learning_rate": 1.1726138579715002e-05, "loss": 0.6403, "step": 9804 }, { "epoch": 1.3844959051115504, "grad_norm": 3.1925545760678347, "learning_rate": 1.1724636989398613e-05, "loss": 0.5057, "step": 9805 }, { "epoch": 1.3846371081615363, "grad_norm": 2.9651585994013585, "learning_rate": 1.1723135359002403e-05, "loss": 0.5296, "step": 9806 }, { "epoch": 1.3847783112115222, "grad_norm": 3.6699050345342727, "learning_rate": 1.1721633688561269e-05, "loss": 0.6312, "step": 9807 }, { "epoch": 1.384919514261508, "grad_norm": 3.691039350007807, "learning_rate": 1.1720131978110115e-05, "loss": 0.7026, "step": 9808 }, { "epoch": 1.385060717311494, "grad_norm": 3.146417988611298, "learning_rate": 1.1718630227683836e-05, "loss": 0.501, "step": 9809 }, { "epoch": 1.3852019203614798, "grad_norm": 3.2611257374837237, "learning_rate": 1.171712843731733e-05, "loss": 0.5615, "step": 9810 }, { "epoch": 1.3853431234114657, "grad_norm": 3.575599148623598, "learning_rate": 1.1715626607045502e-05, "loss": 0.7724, "step": 9811 }, { "epoch": 1.3854843264614516, "grad_norm": 3.2158472768598383, "learning_rate": 1.1714124736903254e-05, "loss": 0.4755, "step": 9812 }, { "epoch": 1.3856255295114375, "grad_norm": 3.196403257017226, "learning_rate": 1.1712622826925488e-05, "loss": 0.5397, "step": 9813 }, { "epoch": 1.3857667325614234, "grad_norm": 3.1979300951708893, "learning_rate": 1.1711120877147107e-05, "loss": 0.5166, "step": 9814 }, { "epoch": 1.3859079356114092, "grad_norm": 2.927604481874085, "learning_rate": 1.1709618887603013e-05, "loss": 0.549, "step": 9815 }, { "epoch": 1.3860491386613951, "grad_norm": 3.8130612155834367, "learning_rate": 1.1708116858328116e-05, "loss": 0.5751, "step": 9816 }, { "epoch": 1.386190341711381, "grad_norm": 3.3210961544953657, "learning_rate": 1.1706614789357321e-05, "loss": 0.5132, "step": 9817 }, { "epoch": 1.3863315447613669, "grad_norm": 3.926222479621664, "learning_rate": 1.1705112680725538e-05, "loss": 0.6276, "step": 9818 }, { "epoch": 1.3864727478113528, "grad_norm": 4.021742170805987, "learning_rate": 1.1703610532467669e-05, "loss": 0.6613, "step": 9819 }, { "epoch": 1.3866139508613387, "grad_norm": 3.4556335468295765, "learning_rate": 1.1702108344618627e-05, "loss": 0.5525, "step": 9820 }, { "epoch": 1.3867551539113245, "grad_norm": 4.499309035479272, "learning_rate": 1.1700606117213325e-05, "loss": 0.6804, "step": 9821 }, { "epoch": 1.3868963569613104, "grad_norm": 3.366822180333817, "learning_rate": 1.1699103850286668e-05, "loss": 0.6411, "step": 9822 }, { "epoch": 1.3870375600112963, "grad_norm": 3.6335560180900734, "learning_rate": 1.1697601543873573e-05, "loss": 0.6906, "step": 9823 }, { "epoch": 1.3871787630612822, "grad_norm": 3.2170350418810316, "learning_rate": 1.1696099198008953e-05, "loss": 0.471, "step": 9824 }, { "epoch": 1.387319966111268, "grad_norm": 3.810120499373073, "learning_rate": 1.1694596812727714e-05, "loss": 0.5571, "step": 9825 }, { "epoch": 1.387461169161254, "grad_norm": 4.397999742688604, "learning_rate": 1.1693094388064786e-05, "loss": 0.7314, "step": 9826 }, { "epoch": 1.3876023722112398, "grad_norm": 3.648693252172972, "learning_rate": 1.1691591924055068e-05, "loss": 0.5917, "step": 9827 }, { "epoch": 1.3877435752612257, "grad_norm": 4.439013085587571, "learning_rate": 1.1690089420733486e-05, "loss": 0.6649, "step": 9828 }, { "epoch": 1.3878847783112116, "grad_norm": 3.305157732351099, "learning_rate": 1.1688586878134957e-05, "loss": 0.5703, "step": 9829 }, { "epoch": 1.3880259813611975, "grad_norm": 3.4168869854880115, "learning_rate": 1.1687084296294398e-05, "loss": 0.5209, "step": 9830 }, { "epoch": 1.3881671844111834, "grad_norm": 3.333916935510874, "learning_rate": 1.1685581675246729e-05, "loss": 0.6147, "step": 9831 }, { "epoch": 1.3883083874611692, "grad_norm": 3.7175505390952064, "learning_rate": 1.168407901502687e-05, "loss": 0.5546, "step": 9832 }, { "epoch": 1.3884495905111551, "grad_norm": 2.7021335870646737, "learning_rate": 1.1682576315669738e-05, "loss": 0.4484, "step": 9833 }, { "epoch": 1.388590793561141, "grad_norm": 4.139068438553842, "learning_rate": 1.1681073577210262e-05, "loss": 0.6094, "step": 9834 }, { "epoch": 1.388731996611127, "grad_norm": 4.208528815254102, "learning_rate": 1.1679570799683365e-05, "loss": 0.7139, "step": 9835 }, { "epoch": 1.3888731996611128, "grad_norm": 3.782106734504938, "learning_rate": 1.1678067983123965e-05, "loss": 0.6417, "step": 9836 }, { "epoch": 1.3890144027110987, "grad_norm": 3.3050716208228588, "learning_rate": 1.167656512756699e-05, "loss": 0.5154, "step": 9837 }, { "epoch": 1.3891556057610845, "grad_norm": 2.8798767534758616, "learning_rate": 1.1675062233047365e-05, "loss": 0.4874, "step": 9838 }, { "epoch": 1.3892968088110704, "grad_norm": 4.161224567767254, "learning_rate": 1.167355929960002e-05, "loss": 0.5871, "step": 9839 }, { "epoch": 1.3894380118610563, "grad_norm": 3.5447509114756715, "learning_rate": 1.1672056327259876e-05, "loss": 0.6609, "step": 9840 }, { "epoch": 1.3895792149110422, "grad_norm": 3.6684300654529878, "learning_rate": 1.1670553316061865e-05, "loss": 0.573, "step": 9841 }, { "epoch": 1.389720417961028, "grad_norm": 3.94433741990711, "learning_rate": 1.1669050266040917e-05, "loss": 0.6535, "step": 9842 }, { "epoch": 1.389861621011014, "grad_norm": 4.473791419229923, "learning_rate": 1.1667547177231966e-05, "loss": 0.624, "step": 9843 }, { "epoch": 1.3900028240609998, "grad_norm": 4.1886857825418184, "learning_rate": 1.1666044049669934e-05, "loss": 0.6623, "step": 9844 }, { "epoch": 1.3901440271109857, "grad_norm": 3.4432835279406597, "learning_rate": 1.1664540883389757e-05, "loss": 0.5244, "step": 9845 }, { "epoch": 1.3902852301609716, "grad_norm": 3.3807762809208954, "learning_rate": 1.1663037678426371e-05, "loss": 0.5699, "step": 9846 }, { "epoch": 1.3904264332109575, "grad_norm": 3.579029842404051, "learning_rate": 1.1661534434814707e-05, "loss": 0.5556, "step": 9847 }, { "epoch": 1.3905676362609434, "grad_norm": 3.4287956254850154, "learning_rate": 1.16600311525897e-05, "loss": 0.5294, "step": 9848 }, { "epoch": 1.390708839310929, "grad_norm": 4.0355526996282665, "learning_rate": 1.1658527831786289e-05, "loss": 0.5515, "step": 9849 }, { "epoch": 1.390850042360915, "grad_norm": 4.239185880095875, "learning_rate": 1.1657024472439402e-05, "loss": 0.618, "step": 9850 }, { "epoch": 1.3909912454109008, "grad_norm": 3.983146704913183, "learning_rate": 1.1655521074583986e-05, "loss": 0.5716, "step": 9851 }, { "epoch": 1.3911324484608867, "grad_norm": 3.3410843897360936, "learning_rate": 1.1654017638254976e-05, "loss": 0.6279, "step": 9852 }, { "epoch": 1.3912736515108726, "grad_norm": 3.6403595886753464, "learning_rate": 1.1652514163487307e-05, "loss": 0.5591, "step": 9853 }, { "epoch": 1.3914148545608584, "grad_norm": 3.384141194899694, "learning_rate": 1.1651010650315923e-05, "loss": 0.576, "step": 9854 }, { "epoch": 1.3915560576108443, "grad_norm": 3.7380540587158393, "learning_rate": 1.1649507098775765e-05, "loss": 0.6175, "step": 9855 }, { "epoch": 1.3916972606608302, "grad_norm": 4.076833621569356, "learning_rate": 1.1648003508901775e-05, "loss": 0.648, "step": 9856 }, { "epoch": 1.391838463710816, "grad_norm": 3.3176339572170135, "learning_rate": 1.1646499880728897e-05, "loss": 0.5357, "step": 9857 }, { "epoch": 1.391979666760802, "grad_norm": 4.420644302896781, "learning_rate": 1.164499621429207e-05, "loss": 0.7863, "step": 9858 }, { "epoch": 1.3921208698107879, "grad_norm": 4.1825793253469845, "learning_rate": 1.1643492509626242e-05, "loss": 0.681, "step": 9859 }, { "epoch": 1.3922620728607737, "grad_norm": 3.7939241788630405, "learning_rate": 1.1641988766766359e-05, "loss": 0.5817, "step": 9860 }, { "epoch": 1.3924032759107596, "grad_norm": 3.514503969440174, "learning_rate": 1.1640484985747365e-05, "loss": 0.6232, "step": 9861 }, { "epoch": 1.3925444789607455, "grad_norm": 4.367900527283707, "learning_rate": 1.1638981166604206e-05, "loss": 0.653, "step": 9862 }, { "epoch": 1.3926856820107314, "grad_norm": 4.135579436941323, "learning_rate": 1.1637477309371837e-05, "loss": 0.775, "step": 9863 }, { "epoch": 1.3928268850607173, "grad_norm": 3.8635195881828857, "learning_rate": 1.16359734140852e-05, "loss": 0.7002, "step": 9864 }, { "epoch": 1.3929680881107032, "grad_norm": 4.04607179465437, "learning_rate": 1.1634469480779249e-05, "loss": 0.6851, "step": 9865 }, { "epoch": 1.393109291160689, "grad_norm": 3.447249623318818, "learning_rate": 1.1632965509488932e-05, "loss": 0.5526, "step": 9866 }, { "epoch": 1.393250494210675, "grad_norm": 3.525782843010651, "learning_rate": 1.1631461500249199e-05, "loss": 0.6295, "step": 9867 }, { "epoch": 1.3933916972606608, "grad_norm": 3.345715714935897, "learning_rate": 1.1629957453095013e-05, "loss": 0.5684, "step": 9868 }, { "epoch": 1.3935329003106467, "grad_norm": 3.5043972621294537, "learning_rate": 1.1628453368061315e-05, "loss": 0.5934, "step": 9869 }, { "epoch": 1.3936741033606326, "grad_norm": 3.5506528786630382, "learning_rate": 1.1626949245183061e-05, "loss": 0.6043, "step": 9870 }, { "epoch": 1.3938153064106185, "grad_norm": 2.8280871187474528, "learning_rate": 1.1625445084495213e-05, "loss": 0.5489, "step": 9871 }, { "epoch": 1.3939565094606043, "grad_norm": 3.251375902078435, "learning_rate": 1.1623940886032723e-05, "loss": 0.5236, "step": 9872 }, { "epoch": 1.3940977125105902, "grad_norm": 3.015023220784678, "learning_rate": 1.1622436649830546e-05, "loss": 0.5119, "step": 9873 }, { "epoch": 1.394238915560576, "grad_norm": 3.36756802486958, "learning_rate": 1.1620932375923644e-05, "loss": 0.505, "step": 9874 }, { "epoch": 1.394380118610562, "grad_norm": 3.5814695906214515, "learning_rate": 1.1619428064346973e-05, "loss": 0.6922, "step": 9875 }, { "epoch": 1.3945213216605479, "grad_norm": 3.743606131725301, "learning_rate": 1.1617923715135493e-05, "loss": 0.6426, "step": 9876 }, { "epoch": 1.3946625247105338, "grad_norm": 3.433415906677097, "learning_rate": 1.1616419328324166e-05, "loss": 0.5339, "step": 9877 }, { "epoch": 1.3948037277605196, "grad_norm": 3.2092886784737016, "learning_rate": 1.1614914903947952e-05, "loss": 0.5567, "step": 9878 }, { "epoch": 1.3949449308105055, "grad_norm": 4.074211293431233, "learning_rate": 1.1613410442041808e-05, "loss": 0.6666, "step": 9879 }, { "epoch": 1.3950861338604914, "grad_norm": 4.291510472790171, "learning_rate": 1.1611905942640707e-05, "loss": 0.7636, "step": 9880 }, { "epoch": 1.3952273369104773, "grad_norm": 4.035418183227724, "learning_rate": 1.1610401405779608e-05, "loss": 0.6795, "step": 9881 }, { "epoch": 1.3953685399604632, "grad_norm": 4.96995317458963, "learning_rate": 1.1608896831493475e-05, "loss": 0.8271, "step": 9882 }, { "epoch": 1.395509743010449, "grad_norm": 2.627076894427599, "learning_rate": 1.1607392219817272e-05, "loss": 0.4173, "step": 9883 }, { "epoch": 1.395650946060435, "grad_norm": 3.7438005211037546, "learning_rate": 1.1605887570785972e-05, "loss": 0.5769, "step": 9884 }, { "epoch": 1.3957921491104208, "grad_norm": 3.5761898545115387, "learning_rate": 1.1604382884434537e-05, "loss": 0.5281, "step": 9885 }, { "epoch": 1.3959333521604067, "grad_norm": 3.6553671431407366, "learning_rate": 1.1602878160797936e-05, "loss": 0.5084, "step": 9886 }, { "epoch": 1.3960745552103926, "grad_norm": 4.344755588560155, "learning_rate": 1.1601373399911137e-05, "loss": 0.7339, "step": 9887 }, { "epoch": 1.3962157582603785, "grad_norm": 3.5602518300063393, "learning_rate": 1.1599868601809114e-05, "loss": 0.5599, "step": 9888 }, { "epoch": 1.3963569613103644, "grad_norm": 3.430855151090691, "learning_rate": 1.1598363766526834e-05, "loss": 0.6009, "step": 9889 }, { "epoch": 1.3964981643603502, "grad_norm": 3.635641645994789, "learning_rate": 1.1596858894099272e-05, "loss": 0.5932, "step": 9890 }, { "epoch": 1.3966393674103361, "grad_norm": 3.5617751593296947, "learning_rate": 1.15953539845614e-05, "loss": 0.6612, "step": 9891 }, { "epoch": 1.396780570460322, "grad_norm": 3.1035933100114614, "learning_rate": 1.1593849037948189e-05, "loss": 0.5431, "step": 9892 }, { "epoch": 1.3969217735103079, "grad_norm": 3.245455322386706, "learning_rate": 1.1592344054294613e-05, "loss": 0.5808, "step": 9893 }, { "epoch": 1.3970629765602938, "grad_norm": 3.319015716889081, "learning_rate": 1.1590839033635652e-05, "loss": 0.5563, "step": 9894 }, { "epoch": 1.3972041796102797, "grad_norm": 2.985883387348687, "learning_rate": 1.1589333976006278e-05, "loss": 0.4987, "step": 9895 }, { "epoch": 1.3973453826602655, "grad_norm": 4.44666187870931, "learning_rate": 1.1587828881441468e-05, "loss": 0.7496, "step": 9896 }, { "epoch": 1.3974865857102514, "grad_norm": 4.6670398033800735, "learning_rate": 1.1586323749976201e-05, "loss": 0.655, "step": 9897 }, { "epoch": 1.3976277887602373, "grad_norm": 2.7928571337114008, "learning_rate": 1.1584818581645453e-05, "loss": 0.4452, "step": 9898 }, { "epoch": 1.3977689918102232, "grad_norm": 5.164255213268622, "learning_rate": 1.1583313376484209e-05, "loss": 0.8635, "step": 9899 }, { "epoch": 1.3979101948602088, "grad_norm": 4.089959308270347, "learning_rate": 1.1581808134527443e-05, "loss": 0.531, "step": 9900 }, { "epoch": 1.3980513979101947, "grad_norm": 3.5520014738183843, "learning_rate": 1.1580302855810142e-05, "loss": 0.5803, "step": 9901 }, { "epoch": 1.3981926009601806, "grad_norm": 2.6428292260988737, "learning_rate": 1.1578797540367284e-05, "loss": 0.4608, "step": 9902 }, { "epoch": 1.3983338040101665, "grad_norm": 3.681028921829734, "learning_rate": 1.1577292188233853e-05, "loss": 0.6104, "step": 9903 }, { "epoch": 1.3984750070601524, "grad_norm": 2.7962695063828074, "learning_rate": 1.157578679944483e-05, "loss": 0.4772, "step": 9904 }, { "epoch": 1.3986162101101383, "grad_norm": 4.163563947196601, "learning_rate": 1.1574281374035206e-05, "loss": 0.6053, "step": 9905 }, { "epoch": 1.3987574131601241, "grad_norm": 2.986014200504435, "learning_rate": 1.157277591203996e-05, "loss": 0.5422, "step": 9906 }, { "epoch": 1.39889861621011, "grad_norm": 2.9481756687840637, "learning_rate": 1.1571270413494082e-05, "loss": 0.4456, "step": 9907 }, { "epoch": 1.399039819260096, "grad_norm": 4.063261320869723, "learning_rate": 1.1569764878432559e-05, "loss": 0.6013, "step": 9908 }, { "epoch": 1.3991810223100818, "grad_norm": 3.3496478110684778, "learning_rate": 1.156825930689038e-05, "loss": 0.514, "step": 9909 }, { "epoch": 1.3993222253600677, "grad_norm": 3.091898970426328, "learning_rate": 1.1566753698902527e-05, "loss": 0.4635, "step": 9910 }, { "epoch": 1.3994634284100536, "grad_norm": 3.695572337695737, "learning_rate": 1.1565248054503999e-05, "loss": 0.6152, "step": 9911 }, { "epoch": 1.3996046314600394, "grad_norm": 3.678411544056201, "learning_rate": 1.156374237372978e-05, "loss": 0.6309, "step": 9912 }, { "epoch": 1.3997458345100253, "grad_norm": 4.155175543974391, "learning_rate": 1.1562236656614863e-05, "loss": 0.5537, "step": 9913 }, { "epoch": 1.3998870375600112, "grad_norm": 4.523804379150606, "learning_rate": 1.1560730903194242e-05, "loss": 0.7067, "step": 9914 }, { "epoch": 1.400028240609997, "grad_norm": 3.4751366444823035, "learning_rate": 1.155922511350291e-05, "loss": 0.5602, "step": 9915 }, { "epoch": 1.400169443659983, "grad_norm": 3.1838686907039135, "learning_rate": 1.1557719287575858e-05, "loss": 0.4078, "step": 9916 }, { "epoch": 1.4003106467099689, "grad_norm": 3.9072861243559607, "learning_rate": 1.1556213425448082e-05, "loss": 0.4787, "step": 9917 }, { "epoch": 1.4004518497599547, "grad_norm": 3.6372154595961694, "learning_rate": 1.155470752715458e-05, "loss": 0.5769, "step": 9918 }, { "epoch": 1.4005930528099406, "grad_norm": 4.080503965457572, "learning_rate": 1.1553201592730345e-05, "loss": 0.7389, "step": 9919 }, { "epoch": 1.4007342558599265, "grad_norm": 3.3875410333751046, "learning_rate": 1.1551695622210377e-05, "loss": 0.4923, "step": 9920 }, { "epoch": 1.4008754589099124, "grad_norm": 4.5441608103377185, "learning_rate": 1.1550189615629672e-05, "loss": 0.5013, "step": 9921 }, { "epoch": 1.4010166619598983, "grad_norm": 3.7740130223879893, "learning_rate": 1.1548683573023229e-05, "loss": 0.5647, "step": 9922 }, { "epoch": 1.4011578650098842, "grad_norm": 3.2370838240847157, "learning_rate": 1.154717749442605e-05, "loss": 0.5198, "step": 9923 }, { "epoch": 1.40129906805987, "grad_norm": 3.5835146748517595, "learning_rate": 1.1545671379873134e-05, "loss": 0.5716, "step": 9924 }, { "epoch": 1.401440271109856, "grad_norm": 3.39535029704569, "learning_rate": 1.1544165229399481e-05, "loss": 0.5243, "step": 9925 }, { "epoch": 1.4015814741598418, "grad_norm": 4.358751874097533, "learning_rate": 1.1542659043040097e-05, "loss": 0.6843, "step": 9926 }, { "epoch": 1.4017226772098277, "grad_norm": 3.2639322635725927, "learning_rate": 1.1541152820829984e-05, "loss": 0.5403, "step": 9927 }, { "epoch": 1.4018638802598136, "grad_norm": 3.4067849244233837, "learning_rate": 1.1539646562804143e-05, "loss": 0.509, "step": 9928 }, { "epoch": 1.4020050833097994, "grad_norm": 3.0480837846178517, "learning_rate": 1.1538140268997583e-05, "loss": 0.4499, "step": 9929 }, { "epoch": 1.4021462863597853, "grad_norm": 3.5205926500311415, "learning_rate": 1.1536633939445302e-05, "loss": 0.5647, "step": 9930 }, { "epoch": 1.4022874894097712, "grad_norm": 3.885079150439811, "learning_rate": 1.1535127574182315e-05, "loss": 0.6031, "step": 9931 }, { "epoch": 1.402428692459757, "grad_norm": 4.968488923984317, "learning_rate": 1.153362117324363e-05, "loss": 0.8495, "step": 9932 }, { "epoch": 1.402569895509743, "grad_norm": 3.4678362147507134, "learning_rate": 1.1532114736664247e-05, "loss": 0.5386, "step": 9933 }, { "epoch": 1.4027110985597289, "grad_norm": 3.920330613032662, "learning_rate": 1.153060826447918e-05, "loss": 0.6705, "step": 9934 }, { "epoch": 1.4028523016097147, "grad_norm": 4.407297759808663, "learning_rate": 1.1529101756723437e-05, "loss": 0.7682, "step": 9935 }, { "epoch": 1.4029935046597006, "grad_norm": 2.9995003349214144, "learning_rate": 1.152759521343203e-05, "loss": 0.4892, "step": 9936 }, { "epoch": 1.4031347077096865, "grad_norm": 3.5563325660103, "learning_rate": 1.1526088634639971e-05, "loss": 0.6243, "step": 9937 }, { "epoch": 1.4032759107596724, "grad_norm": 3.3098449471199336, "learning_rate": 1.1524582020382271e-05, "loss": 0.563, "step": 9938 }, { "epoch": 1.4034171138096583, "grad_norm": 3.6315818488780485, "learning_rate": 1.1523075370693942e-05, "loss": 0.6929, "step": 9939 }, { "epoch": 1.4035583168596442, "grad_norm": 3.4625189347739247, "learning_rate": 1.1521568685610003e-05, "loss": 0.5728, "step": 9940 }, { "epoch": 1.40369951990963, "grad_norm": 2.9052230762252265, "learning_rate": 1.152006196516546e-05, "loss": 0.5621, "step": 9941 }, { "epoch": 1.403840722959616, "grad_norm": 3.672861170918665, "learning_rate": 1.1518555209395334e-05, "loss": 0.8082, "step": 9942 }, { "epoch": 1.4039819260096018, "grad_norm": 3.422702997674202, "learning_rate": 1.1517048418334644e-05, "loss": 0.5315, "step": 9943 }, { "epoch": 1.4041231290595877, "grad_norm": 3.424173657753212, "learning_rate": 1.1515541592018402e-05, "loss": 0.6116, "step": 9944 }, { "epoch": 1.4042643321095736, "grad_norm": 3.348063860069627, "learning_rate": 1.1514034730481627e-05, "loss": 0.5376, "step": 9945 }, { "epoch": 1.4044055351595595, "grad_norm": 3.242136339919406, "learning_rate": 1.1512527833759339e-05, "loss": 0.6332, "step": 9946 }, { "epoch": 1.4045467382095453, "grad_norm": 2.8300872173050644, "learning_rate": 1.1511020901886559e-05, "loss": 0.4877, "step": 9947 }, { "epoch": 1.4046879412595312, "grad_norm": 2.841581205226284, "learning_rate": 1.1509513934898303e-05, "loss": 0.5086, "step": 9948 }, { "epoch": 1.404829144309517, "grad_norm": 3.8673580597270614, "learning_rate": 1.1508006932829601e-05, "loss": 0.5855, "step": 9949 }, { "epoch": 1.404970347359503, "grad_norm": 3.814007735173787, "learning_rate": 1.1506499895715462e-05, "loss": 0.5789, "step": 9950 }, { "epoch": 1.4051115504094889, "grad_norm": 3.223935578785833, "learning_rate": 1.150499282359092e-05, "loss": 0.4186, "step": 9951 }, { "epoch": 1.4052527534594748, "grad_norm": 4.250623921984275, "learning_rate": 1.1503485716490994e-05, "loss": 0.7245, "step": 9952 }, { "epoch": 1.4053939565094606, "grad_norm": 3.461544245584893, "learning_rate": 1.150197857445071e-05, "loss": 0.5743, "step": 9953 }, { "epoch": 1.4055351595594465, "grad_norm": 3.549416857556625, "learning_rate": 1.1500471397505091e-05, "loss": 0.5783, "step": 9954 }, { "epoch": 1.4056763626094324, "grad_norm": 3.172461769115385, "learning_rate": 1.1498964185689166e-05, "loss": 0.4725, "step": 9955 }, { "epoch": 1.4058175656594183, "grad_norm": 3.5471306267087424, "learning_rate": 1.1497456939037957e-05, "loss": 0.5747, "step": 9956 }, { "epoch": 1.4059587687094042, "grad_norm": 4.396020191509421, "learning_rate": 1.14959496575865e-05, "loss": 0.7524, "step": 9957 }, { "epoch": 1.40609997175939, "grad_norm": 4.016762394884701, "learning_rate": 1.1494442341369819e-05, "loss": 0.4982, "step": 9958 }, { "epoch": 1.406241174809376, "grad_norm": 3.196397403382184, "learning_rate": 1.149293499042294e-05, "loss": 0.4967, "step": 9959 }, { "epoch": 1.4063823778593618, "grad_norm": 3.3524463210519264, "learning_rate": 1.1491427604780898e-05, "loss": 0.471, "step": 9960 }, { "epoch": 1.4065235809093477, "grad_norm": 3.351595289088621, "learning_rate": 1.1489920184478724e-05, "loss": 0.6062, "step": 9961 }, { "epoch": 1.4066647839593336, "grad_norm": 3.006542705937755, "learning_rate": 1.1488412729551449e-05, "loss": 0.5358, "step": 9962 }, { "epoch": 1.4068059870093195, "grad_norm": 3.6891630825740758, "learning_rate": 1.1486905240034103e-05, "loss": 0.5835, "step": 9963 }, { "epoch": 1.4069471900593054, "grad_norm": 3.6646470565168814, "learning_rate": 1.1485397715961719e-05, "loss": 0.5274, "step": 9964 }, { "epoch": 1.4070883931092912, "grad_norm": 3.2873355406246887, "learning_rate": 1.1483890157369338e-05, "loss": 0.5053, "step": 9965 }, { "epoch": 1.4072295961592771, "grad_norm": 3.347787165528886, "learning_rate": 1.148238256429199e-05, "loss": 0.5185, "step": 9966 }, { "epoch": 1.407370799209263, "grad_norm": 4.086498348153865, "learning_rate": 1.1480874936764708e-05, "loss": 0.6709, "step": 9967 }, { "epoch": 1.4075120022592489, "grad_norm": 3.286952228251929, "learning_rate": 1.1479367274822535e-05, "loss": 0.454, "step": 9968 }, { "epoch": 1.4076532053092348, "grad_norm": 4.231713885757386, "learning_rate": 1.1477859578500505e-05, "loss": 0.6515, "step": 9969 }, { "epoch": 1.4077944083592207, "grad_norm": 3.515755331629892, "learning_rate": 1.1476351847833656e-05, "loss": 0.5199, "step": 9970 }, { "epoch": 1.4079356114092065, "grad_norm": 3.3727204050160067, "learning_rate": 1.1474844082857028e-05, "loss": 0.5076, "step": 9971 }, { "epoch": 1.4080768144591924, "grad_norm": 3.83093851023455, "learning_rate": 1.1473336283605661e-05, "loss": 0.5691, "step": 9972 }, { "epoch": 1.4082180175091783, "grad_norm": 3.5622267102619434, "learning_rate": 1.1471828450114593e-05, "loss": 0.5471, "step": 9973 }, { "epoch": 1.4083592205591642, "grad_norm": 4.346334946167332, "learning_rate": 1.1470320582418873e-05, "loss": 0.741, "step": 9974 }, { "epoch": 1.40850042360915, "grad_norm": 5.3133415210162305, "learning_rate": 1.1468812680553531e-05, "loss": 0.8298, "step": 9975 }, { "epoch": 1.408641626659136, "grad_norm": 4.1181824749170115, "learning_rate": 1.1467304744553618e-05, "loss": 0.7347, "step": 9976 }, { "epoch": 1.4087828297091218, "grad_norm": 3.5970778980601463, "learning_rate": 1.1465796774454179e-05, "loss": 0.5772, "step": 9977 }, { "epoch": 1.4089240327591077, "grad_norm": 4.183882615901438, "learning_rate": 1.1464288770290255e-05, "loss": 0.8161, "step": 9978 }, { "epoch": 1.4090652358090936, "grad_norm": 3.615891647298768, "learning_rate": 1.1462780732096892e-05, "loss": 0.6358, "step": 9979 }, { "epoch": 1.4092064388590795, "grad_norm": 3.8267839590585075, "learning_rate": 1.1461272659909137e-05, "loss": 0.6225, "step": 9980 }, { "epoch": 1.4093476419090654, "grad_norm": 4.167799870539445, "learning_rate": 1.1459764553762036e-05, "loss": 0.6346, "step": 9981 }, { "epoch": 1.4094888449590512, "grad_norm": 4.490430220674449, "learning_rate": 1.1458256413690634e-05, "loss": 0.8326, "step": 9982 }, { "epoch": 1.4096300480090371, "grad_norm": 3.511666082860179, "learning_rate": 1.1456748239729988e-05, "loss": 0.6215, "step": 9983 }, { "epoch": 1.409771251059023, "grad_norm": 3.717853277539238, "learning_rate": 1.1455240031915139e-05, "loss": 0.6476, "step": 9984 }, { "epoch": 1.4099124541090087, "grad_norm": 3.385886170262072, "learning_rate": 1.1453731790281142e-05, "loss": 0.5055, "step": 9985 }, { "epoch": 1.4100536571589946, "grad_norm": 3.134590817649844, "learning_rate": 1.1452223514863046e-05, "loss": 0.514, "step": 9986 }, { "epoch": 1.4101948602089804, "grad_norm": 3.004481077025368, "learning_rate": 1.14507152056959e-05, "loss": 0.5499, "step": 9987 }, { "epoch": 1.4103360632589663, "grad_norm": 3.259040859642275, "learning_rate": 1.1449206862814762e-05, "loss": 0.5499, "step": 9988 }, { "epoch": 1.4104772663089522, "grad_norm": 3.2017786449153753, "learning_rate": 1.1447698486254681e-05, "loss": 0.6083, "step": 9989 }, { "epoch": 1.410618469358938, "grad_norm": 3.396536233214283, "learning_rate": 1.144619007605071e-05, "loss": 0.6517, "step": 9990 }, { "epoch": 1.410759672408924, "grad_norm": 3.2862296928033086, "learning_rate": 1.1444681632237913e-05, "loss": 0.5821, "step": 9991 }, { "epoch": 1.4109008754589099, "grad_norm": 3.1123118393496783, "learning_rate": 1.1443173154851335e-05, "loss": 0.5721, "step": 9992 }, { "epoch": 1.4110420785088957, "grad_norm": 3.4013941680864623, "learning_rate": 1.1441664643926033e-05, "loss": 0.6318, "step": 9993 }, { "epoch": 1.4111832815588816, "grad_norm": 3.395551338655201, "learning_rate": 1.1440156099497071e-05, "loss": 0.6149, "step": 9994 }, { "epoch": 1.4113244846088675, "grad_norm": 3.4671509784773, "learning_rate": 1.1438647521599502e-05, "loss": 0.5391, "step": 9995 }, { "epoch": 1.4114656876588534, "grad_norm": 3.5569540484970616, "learning_rate": 1.1437138910268387e-05, "loss": 0.5668, "step": 9996 }, { "epoch": 1.4116068907088393, "grad_norm": 2.682772077995674, "learning_rate": 1.1435630265538783e-05, "loss": 0.4719, "step": 9997 }, { "epoch": 1.4117480937588252, "grad_norm": 3.3146192991063566, "learning_rate": 1.1434121587445752e-05, "loss": 0.7314, "step": 9998 }, { "epoch": 1.411889296808811, "grad_norm": 3.7208281612050618, "learning_rate": 1.1432612876024351e-05, "loss": 0.6564, "step": 9999 }, { "epoch": 1.412030499858797, "grad_norm": 3.1413816015659046, "learning_rate": 1.1431104131309654e-05, "loss": 0.4828, "step": 10000 }, { "epoch": 1.4121717029087828, "grad_norm": 3.6349313832083703, "learning_rate": 1.1429595353336707e-05, "loss": 0.5336, "step": 10001 }, { "epoch": 1.4123129059587687, "grad_norm": 3.5651510400409356, "learning_rate": 1.1428086542140587e-05, "loss": 0.6487, "step": 10002 }, { "epoch": 1.4124541090087546, "grad_norm": 3.9647051327805367, "learning_rate": 1.1426577697756349e-05, "loss": 0.6683, "step": 10003 }, { "epoch": 1.4125953120587404, "grad_norm": 3.6574314298222608, "learning_rate": 1.1425068820219063e-05, "loss": 0.5625, "step": 10004 }, { "epoch": 1.4127365151087263, "grad_norm": 3.776228587647339, "learning_rate": 1.1423559909563792e-05, "loss": 0.6115, "step": 10005 }, { "epoch": 1.4128777181587122, "grad_norm": 3.3667811015005817, "learning_rate": 1.1422050965825603e-05, "loss": 0.5398, "step": 10006 }, { "epoch": 1.413018921208698, "grad_norm": 3.3994103835766443, "learning_rate": 1.1420541989039565e-05, "loss": 0.5416, "step": 10007 }, { "epoch": 1.413160124258684, "grad_norm": 3.8719629145732593, "learning_rate": 1.1419032979240748e-05, "loss": 0.6466, "step": 10008 }, { "epoch": 1.4133013273086699, "grad_norm": 3.539220745006238, "learning_rate": 1.1417523936464212e-05, "loss": 0.5222, "step": 10009 }, { "epoch": 1.4134425303586557, "grad_norm": 4.365083169059222, "learning_rate": 1.1416014860745032e-05, "loss": 0.7139, "step": 10010 }, { "epoch": 1.4135837334086416, "grad_norm": 3.3852775372234736, "learning_rate": 1.1414505752118282e-05, "loss": 0.5866, "step": 10011 }, { "epoch": 1.4137249364586275, "grad_norm": 3.525910146370708, "learning_rate": 1.1412996610619028e-05, "loss": 0.5339, "step": 10012 }, { "epoch": 1.4138661395086134, "grad_norm": 3.4110828728229583, "learning_rate": 1.141148743628234e-05, "loss": 0.6024, "step": 10013 }, { "epoch": 1.4140073425585993, "grad_norm": 4.044285463390223, "learning_rate": 1.1409978229143297e-05, "loss": 0.6896, "step": 10014 }, { "epoch": 1.4141485456085852, "grad_norm": 3.795701719462897, "learning_rate": 1.1408468989236967e-05, "loss": 0.6615, "step": 10015 }, { "epoch": 1.414289748658571, "grad_norm": 3.5849166246125104, "learning_rate": 1.1406959716598424e-05, "loss": 0.5201, "step": 10016 }, { "epoch": 1.414430951708557, "grad_norm": 3.659608138703216, "learning_rate": 1.140545041126275e-05, "loss": 0.6697, "step": 10017 }, { "epoch": 1.4145721547585428, "grad_norm": 3.7727706551832108, "learning_rate": 1.1403941073265014e-05, "loss": 0.6302, "step": 10018 }, { "epoch": 1.4147133578085287, "grad_norm": 3.829956282098092, "learning_rate": 1.140243170264029e-05, "loss": 0.5471, "step": 10019 }, { "epoch": 1.4148545608585146, "grad_norm": 3.4829474118955654, "learning_rate": 1.1400922299423663e-05, "loss": 0.6554, "step": 10020 }, { "epoch": 1.4149957639085005, "grad_norm": 3.471874382662149, "learning_rate": 1.1399412863650205e-05, "loss": 0.4447, "step": 10021 }, { "epoch": 1.4151369669584863, "grad_norm": 4.081544652349606, "learning_rate": 1.1397903395354996e-05, "loss": 0.6764, "step": 10022 }, { "epoch": 1.4152781700084722, "grad_norm": 3.833129327185525, "learning_rate": 1.1396393894573116e-05, "loss": 0.6219, "step": 10023 }, { "epoch": 1.415419373058458, "grad_norm": 2.9448576279083647, "learning_rate": 1.1394884361339647e-05, "loss": 0.5228, "step": 10024 }, { "epoch": 1.415560576108444, "grad_norm": 4.503364054908912, "learning_rate": 1.1393374795689666e-05, "loss": 0.6854, "step": 10025 }, { "epoch": 1.4157017791584299, "grad_norm": 3.3516196025328155, "learning_rate": 1.1391865197658256e-05, "loss": 0.5391, "step": 10026 }, { "epoch": 1.4158429822084158, "grad_norm": 3.455169752340693, "learning_rate": 1.13903555672805e-05, "loss": 0.5352, "step": 10027 }, { "epoch": 1.4159841852584016, "grad_norm": 3.526703076122184, "learning_rate": 1.1388845904591482e-05, "loss": 0.6047, "step": 10028 }, { "epoch": 1.4161253883083875, "grad_norm": 4.1404653219438154, "learning_rate": 1.1387336209626287e-05, "loss": 0.738, "step": 10029 }, { "epoch": 1.4162665913583734, "grad_norm": 3.447765148310987, "learning_rate": 1.1385826482419993e-05, "loss": 0.6092, "step": 10030 }, { "epoch": 1.4164077944083593, "grad_norm": 3.3899178421102487, "learning_rate": 1.138431672300769e-05, "loss": 0.5192, "step": 10031 }, { "epoch": 1.4165489974583452, "grad_norm": 2.9975048522453926, "learning_rate": 1.1382806931424468e-05, "loss": 0.488, "step": 10032 }, { "epoch": 1.416690200508331, "grad_norm": 3.3878732852401536, "learning_rate": 1.1381297107705407e-05, "loss": 0.5937, "step": 10033 }, { "epoch": 1.416831403558317, "grad_norm": 3.4790842219086735, "learning_rate": 1.1379787251885603e-05, "loss": 0.5662, "step": 10034 }, { "epoch": 1.4169726066083028, "grad_norm": 3.703196528754753, "learning_rate": 1.1378277364000133e-05, "loss": 0.632, "step": 10035 }, { "epoch": 1.4171138096582885, "grad_norm": 3.3248357693739647, "learning_rate": 1.1376767444084096e-05, "loss": 0.4813, "step": 10036 }, { "epoch": 1.4172550127082744, "grad_norm": 3.755396631752553, "learning_rate": 1.1375257492172575e-05, "loss": 0.6783, "step": 10037 }, { "epoch": 1.4173962157582602, "grad_norm": 4.302061202132109, "learning_rate": 1.1373747508300668e-05, "loss": 0.7003, "step": 10038 }, { "epoch": 1.4175374188082461, "grad_norm": 3.343482062875269, "learning_rate": 1.1372237492503459e-05, "loss": 0.5764, "step": 10039 }, { "epoch": 1.417678621858232, "grad_norm": 3.6256899506041567, "learning_rate": 1.1370727444816045e-05, "loss": 0.6389, "step": 10040 }, { "epoch": 1.417819824908218, "grad_norm": 3.460323848004478, "learning_rate": 1.1369217365273517e-05, "loss": 0.4948, "step": 10041 }, { "epoch": 1.4179610279582038, "grad_norm": 3.7989541442836186, "learning_rate": 1.1367707253910969e-05, "loss": 0.6031, "step": 10042 }, { "epoch": 1.4181022310081897, "grad_norm": 3.4381903073420172, "learning_rate": 1.1366197110763493e-05, "loss": 0.5904, "step": 10043 }, { "epoch": 1.4182434340581755, "grad_norm": 3.4553998578044767, "learning_rate": 1.1364686935866186e-05, "loss": 0.5903, "step": 10044 }, { "epoch": 1.4183846371081614, "grad_norm": 3.598849443496455, "learning_rate": 1.1363176729254147e-05, "loss": 0.5836, "step": 10045 }, { "epoch": 1.4185258401581473, "grad_norm": 4.125552027602078, "learning_rate": 1.1361666490962468e-05, "loss": 0.6509, "step": 10046 }, { "epoch": 1.4186670432081332, "grad_norm": 4.127785072037561, "learning_rate": 1.1360156221026246e-05, "loss": 0.6187, "step": 10047 }, { "epoch": 1.418808246258119, "grad_norm": 4.025195119537113, "learning_rate": 1.1358645919480585e-05, "loss": 0.6796, "step": 10048 }, { "epoch": 1.418949449308105, "grad_norm": 4.417630974708647, "learning_rate": 1.1357135586360575e-05, "loss": 0.7052, "step": 10049 }, { "epoch": 1.4190906523580908, "grad_norm": 3.3595057860822877, "learning_rate": 1.1355625221701321e-05, "loss": 0.5599, "step": 10050 }, { "epoch": 1.4192318554080767, "grad_norm": 3.3233873671940732, "learning_rate": 1.1354114825537925e-05, "loss": 0.5443, "step": 10051 }, { "epoch": 1.4193730584580626, "grad_norm": 3.5449895236963265, "learning_rate": 1.1352604397905485e-05, "loss": 0.5999, "step": 10052 }, { "epoch": 1.4195142615080485, "grad_norm": 3.5816043286985435, "learning_rate": 1.1351093938839099e-05, "loss": 0.6738, "step": 10053 }, { "epoch": 1.4196554645580344, "grad_norm": 4.801135317371865, "learning_rate": 1.1349583448373878e-05, "loss": 0.6476, "step": 10054 }, { "epoch": 1.4197966676080203, "grad_norm": 3.5187197591562525, "learning_rate": 1.134807292654492e-05, "loss": 0.604, "step": 10055 }, { "epoch": 1.4199378706580061, "grad_norm": 4.033554471543881, "learning_rate": 1.1346562373387326e-05, "loss": 0.6779, "step": 10056 }, { "epoch": 1.420079073707992, "grad_norm": 3.170373876497829, "learning_rate": 1.1345051788936206e-05, "loss": 0.5381, "step": 10057 }, { "epoch": 1.420220276757978, "grad_norm": 3.2004358732909672, "learning_rate": 1.1343541173226664e-05, "loss": 0.4171, "step": 10058 }, { "epoch": 1.4203614798079638, "grad_norm": 4.034171207031646, "learning_rate": 1.1342030526293804e-05, "loss": 0.6205, "step": 10059 }, { "epoch": 1.4205026828579497, "grad_norm": 3.602893136333748, "learning_rate": 1.1340519848172735e-05, "loss": 0.5757, "step": 10060 }, { "epoch": 1.4206438859079356, "grad_norm": 3.548121028516786, "learning_rate": 1.1339009138898564e-05, "loss": 0.611, "step": 10061 }, { "epoch": 1.4207850889579214, "grad_norm": 3.5531648574565677, "learning_rate": 1.1337498398506397e-05, "loss": 0.527, "step": 10062 }, { "epoch": 1.4209262920079073, "grad_norm": 3.280239510184984, "learning_rate": 1.133598762703135e-05, "loss": 0.4548, "step": 10063 }, { "epoch": 1.4210674950578932, "grad_norm": 3.1523811278898277, "learning_rate": 1.133447682450852e-05, "loss": 0.5064, "step": 10064 }, { "epoch": 1.421208698107879, "grad_norm": 3.7126644776368534, "learning_rate": 1.1332965990973028e-05, "loss": 0.5538, "step": 10065 }, { "epoch": 1.421349901157865, "grad_norm": 3.483237396566373, "learning_rate": 1.1331455126459983e-05, "loss": 0.5783, "step": 10066 }, { "epoch": 1.4214911042078509, "grad_norm": 4.209117729853742, "learning_rate": 1.1329944231004494e-05, "loss": 0.8424, "step": 10067 }, { "epoch": 1.4216323072578367, "grad_norm": 3.8785652487225786, "learning_rate": 1.132843330464168e-05, "loss": 0.7277, "step": 10068 }, { "epoch": 1.4217735103078226, "grad_norm": 3.8448678844708275, "learning_rate": 1.1326922347406645e-05, "loss": 0.68, "step": 10069 }, { "epoch": 1.4219147133578085, "grad_norm": 4.171553367440104, "learning_rate": 1.1325411359334509e-05, "loss": 0.6629, "step": 10070 }, { "epoch": 1.4220559164077944, "grad_norm": 3.65608230804267, "learning_rate": 1.1323900340460385e-05, "loss": 0.5878, "step": 10071 }, { "epoch": 1.4221971194577803, "grad_norm": 3.7803653923456317, "learning_rate": 1.1322389290819391e-05, "loss": 0.7111, "step": 10072 }, { "epoch": 1.4223383225077662, "grad_norm": 3.1434950651420817, "learning_rate": 1.132087821044664e-05, "loss": 0.5101, "step": 10073 }, { "epoch": 1.422479525557752, "grad_norm": 3.363581981107003, "learning_rate": 1.1319367099377248e-05, "loss": 0.5507, "step": 10074 }, { "epoch": 1.422620728607738, "grad_norm": 4.813689993234532, "learning_rate": 1.1317855957646335e-05, "loss": 0.8702, "step": 10075 }, { "epoch": 1.4227619316577238, "grad_norm": 3.390249432997044, "learning_rate": 1.1316344785289022e-05, "loss": 0.5283, "step": 10076 }, { "epoch": 1.4229031347077097, "grad_norm": 3.134231867583827, "learning_rate": 1.131483358234042e-05, "loss": 0.5832, "step": 10077 }, { "epoch": 1.4230443377576956, "grad_norm": 3.512508328350173, "learning_rate": 1.1313322348835658e-05, "loss": 0.6358, "step": 10078 }, { "epoch": 1.4231855408076814, "grad_norm": 3.1585828766481856, "learning_rate": 1.1311811084809847e-05, "loss": 0.5076, "step": 10079 }, { "epoch": 1.4233267438576673, "grad_norm": 3.6376497920883653, "learning_rate": 1.1310299790298118e-05, "loss": 0.5494, "step": 10080 }, { "epoch": 1.4234679469076532, "grad_norm": 3.1739506470476964, "learning_rate": 1.1308788465335583e-05, "loss": 0.588, "step": 10081 }, { "epoch": 1.423609149957639, "grad_norm": 3.7061683579348417, "learning_rate": 1.130727710995737e-05, "loss": 0.6534, "step": 10082 }, { "epoch": 1.423750353007625, "grad_norm": 3.5890849433233787, "learning_rate": 1.1305765724198603e-05, "loss": 0.6273, "step": 10083 }, { "epoch": 1.4238915560576109, "grad_norm": 3.698203003315032, "learning_rate": 1.1304254308094405e-05, "loss": 0.6656, "step": 10084 }, { "epoch": 1.4240327591075967, "grad_norm": 3.1788128681781886, "learning_rate": 1.1302742861679898e-05, "loss": 0.5996, "step": 10085 }, { "epoch": 1.4241739621575826, "grad_norm": 4.001386634996595, "learning_rate": 1.1301231384990213e-05, "loss": 0.6156, "step": 10086 }, { "epoch": 1.4243151652075685, "grad_norm": 3.4250401478291472, "learning_rate": 1.1299719878060469e-05, "loss": 0.5627, "step": 10087 }, { "epoch": 1.4244563682575544, "grad_norm": 3.6127853973090347, "learning_rate": 1.1298208340925798e-05, "loss": 0.6111, "step": 10088 }, { "epoch": 1.4245975713075403, "grad_norm": 3.184735396888158, "learning_rate": 1.1296696773621327e-05, "loss": 0.594, "step": 10089 }, { "epoch": 1.4247387743575262, "grad_norm": 3.8256713583121384, "learning_rate": 1.129518517618218e-05, "loss": 0.6449, "step": 10090 }, { "epoch": 1.424879977407512, "grad_norm": 4.012636465211117, "learning_rate": 1.1293673548643492e-05, "loss": 0.6778, "step": 10091 }, { "epoch": 1.425021180457498, "grad_norm": 3.0650595520790453, "learning_rate": 1.1292161891040388e-05, "loss": 0.5679, "step": 10092 }, { "epoch": 1.4251623835074838, "grad_norm": 3.510698717776447, "learning_rate": 1.1290650203407998e-05, "loss": 0.6625, "step": 10093 }, { "epoch": 1.4253035865574697, "grad_norm": 3.4067238145420546, "learning_rate": 1.1289138485781456e-05, "loss": 0.4823, "step": 10094 }, { "epoch": 1.4254447896074556, "grad_norm": 4.328913928382474, "learning_rate": 1.1287626738195895e-05, "loss": 0.4883, "step": 10095 }, { "epoch": 1.4255859926574415, "grad_norm": 3.9056028516413677, "learning_rate": 1.128611496068644e-05, "loss": 0.7912, "step": 10096 }, { "epoch": 1.4257271957074273, "grad_norm": 4.669399952348939, "learning_rate": 1.1284603153288232e-05, "loss": 0.6673, "step": 10097 }, { "epoch": 1.4258683987574132, "grad_norm": 3.9558370532856517, "learning_rate": 1.12830913160364e-05, "loss": 0.581, "step": 10098 }, { "epoch": 1.426009601807399, "grad_norm": 3.568023375904364, "learning_rate": 1.128157944896608e-05, "loss": 0.6443, "step": 10099 }, { "epoch": 1.426150804857385, "grad_norm": 3.8817606320590374, "learning_rate": 1.1280067552112408e-05, "loss": 0.6459, "step": 10100 }, { "epoch": 1.4262920079073709, "grad_norm": 3.162502847560901, "learning_rate": 1.1278555625510519e-05, "loss": 0.5225, "step": 10101 }, { "epoch": 1.4264332109573568, "grad_norm": 3.9475545005016386, "learning_rate": 1.1277043669195549e-05, "loss": 0.5131, "step": 10102 }, { "epoch": 1.4265744140073426, "grad_norm": 3.5782800979875784, "learning_rate": 1.1275531683202634e-05, "loss": 0.5818, "step": 10103 }, { "epoch": 1.4267156170573285, "grad_norm": 4.568194632289159, "learning_rate": 1.1274019667566913e-05, "loss": 0.7814, "step": 10104 }, { "epoch": 1.4268568201073144, "grad_norm": 3.222619447638465, "learning_rate": 1.127250762232353e-05, "loss": 0.5888, "step": 10105 }, { "epoch": 1.4269980231573003, "grad_norm": 3.447640974094161, "learning_rate": 1.1270995547507617e-05, "loss": 0.5915, "step": 10106 }, { "epoch": 1.4271392262072862, "grad_norm": 4.246528452627301, "learning_rate": 1.1269483443154314e-05, "loss": 0.7003, "step": 10107 }, { "epoch": 1.427280429257272, "grad_norm": 4.087662469252202, "learning_rate": 1.1267971309298767e-05, "loss": 0.6286, "step": 10108 }, { "epoch": 1.427421632307258, "grad_norm": 3.534723640275289, "learning_rate": 1.1266459145976114e-05, "loss": 0.4541, "step": 10109 }, { "epoch": 1.4275628353572438, "grad_norm": 3.021244897788777, "learning_rate": 1.1264946953221496e-05, "loss": 0.5568, "step": 10110 }, { "epoch": 1.4277040384072297, "grad_norm": 3.0020911783715216, "learning_rate": 1.1263434731070058e-05, "loss": 0.5255, "step": 10111 }, { "epoch": 1.4278452414572156, "grad_norm": 3.362516578569044, "learning_rate": 1.1261922479556944e-05, "loss": 0.6224, "step": 10112 }, { "epoch": 1.4279864445072015, "grad_norm": 3.3896484959221698, "learning_rate": 1.1260410198717291e-05, "loss": 0.6175, "step": 10113 }, { "epoch": 1.4281276475571874, "grad_norm": 5.803728744142781, "learning_rate": 1.1258897888586256e-05, "loss": 0.9258, "step": 10114 }, { "epoch": 1.4282688506071732, "grad_norm": 4.068852270699901, "learning_rate": 1.1257385549198976e-05, "loss": 0.6771, "step": 10115 }, { "epoch": 1.4284100536571591, "grad_norm": 3.557138896162329, "learning_rate": 1.1255873180590595e-05, "loss": 0.5702, "step": 10116 }, { "epoch": 1.428551256707145, "grad_norm": 3.185678366725922, "learning_rate": 1.1254360782796268e-05, "loss": 0.5307, "step": 10117 }, { "epoch": 1.4286924597571309, "grad_norm": 3.6473374255597992, "learning_rate": 1.1252848355851136e-05, "loss": 0.6067, "step": 10118 }, { "epoch": 1.4288336628071168, "grad_norm": 4.239508292707983, "learning_rate": 1.125133589979035e-05, "loss": 0.7621, "step": 10119 }, { "epoch": 1.4289748658571026, "grad_norm": 3.7802713305141937, "learning_rate": 1.124982341464906e-05, "loss": 0.6425, "step": 10120 }, { "epoch": 1.4291160689070883, "grad_norm": 4.268602868263709, "learning_rate": 1.124831090046241e-05, "loss": 0.6077, "step": 10121 }, { "epoch": 1.4292572719570742, "grad_norm": 3.772950591973506, "learning_rate": 1.1246798357265554e-05, "loss": 0.6318, "step": 10122 }, { "epoch": 1.42939847500706, "grad_norm": 3.759970235997846, "learning_rate": 1.1245285785093646e-05, "loss": 0.4728, "step": 10123 }, { "epoch": 1.429539678057046, "grad_norm": 2.9890596734883546, "learning_rate": 1.124377318398183e-05, "loss": 0.4803, "step": 10124 }, { "epoch": 1.4296808811070318, "grad_norm": 3.51049368271252, "learning_rate": 1.1242260553965265e-05, "loss": 0.6963, "step": 10125 }, { "epoch": 1.4298220841570177, "grad_norm": 3.0291971980100865, "learning_rate": 1.12407478950791e-05, "loss": 0.6131, "step": 10126 }, { "epoch": 1.4299632872070036, "grad_norm": 3.852998759122205, "learning_rate": 1.1239235207358492e-05, "loss": 0.7303, "step": 10127 }, { "epoch": 1.4301044902569895, "grad_norm": 3.6568276061424685, "learning_rate": 1.1237722490838592e-05, "loss": 0.5816, "step": 10128 }, { "epoch": 1.4302456933069754, "grad_norm": 4.3713562866131115, "learning_rate": 1.1236209745554554e-05, "loss": 0.8152, "step": 10129 }, { "epoch": 1.4303868963569613, "grad_norm": 3.2415836764171906, "learning_rate": 1.1234696971541534e-05, "loss": 0.4835, "step": 10130 }, { "epoch": 1.4305280994069471, "grad_norm": 3.019477648177745, "learning_rate": 1.1233184168834694e-05, "loss": 0.5517, "step": 10131 }, { "epoch": 1.430669302456933, "grad_norm": 3.1334642147951657, "learning_rate": 1.1231671337469185e-05, "loss": 0.592, "step": 10132 }, { "epoch": 1.430810505506919, "grad_norm": 4.087925308437173, "learning_rate": 1.1230158477480165e-05, "loss": 0.7543, "step": 10133 }, { "epoch": 1.4309517085569048, "grad_norm": 3.521174549983525, "learning_rate": 1.1228645588902793e-05, "loss": 0.688, "step": 10134 }, { "epoch": 1.4310929116068907, "grad_norm": 4.721521847604285, "learning_rate": 1.1227132671772232e-05, "loss": 0.7504, "step": 10135 }, { "epoch": 1.4312341146568766, "grad_norm": 4.140792224424472, "learning_rate": 1.1225619726123632e-05, "loss": 0.7295, "step": 10136 }, { "epoch": 1.4313753177068624, "grad_norm": 3.1125455060753127, "learning_rate": 1.1224106751992164e-05, "loss": 0.4761, "step": 10137 }, { "epoch": 1.4315165207568483, "grad_norm": 4.0107455294605225, "learning_rate": 1.1222593749412982e-05, "loss": 0.7552, "step": 10138 }, { "epoch": 1.4316577238068342, "grad_norm": 4.197765774506653, "learning_rate": 1.1221080718421247e-05, "loss": 0.806, "step": 10139 }, { "epoch": 1.43179892685682, "grad_norm": 3.722371905268063, "learning_rate": 1.1219567659052126e-05, "loss": 0.5966, "step": 10140 }, { "epoch": 1.431940129906806, "grad_norm": 3.30589107116539, "learning_rate": 1.1218054571340778e-05, "loss": 0.5849, "step": 10141 }, { "epoch": 1.4320813329567919, "grad_norm": 3.6538551289050565, "learning_rate": 1.1216541455322367e-05, "loss": 0.5685, "step": 10142 }, { "epoch": 1.4322225360067777, "grad_norm": 3.687562027237578, "learning_rate": 1.1215028311032059e-05, "loss": 0.5663, "step": 10143 }, { "epoch": 1.4323637390567636, "grad_norm": 3.5327948574719334, "learning_rate": 1.121351513850502e-05, "loss": 0.455, "step": 10144 }, { "epoch": 1.4325049421067495, "grad_norm": 2.8241258553594357, "learning_rate": 1.1212001937776412e-05, "loss": 0.4577, "step": 10145 }, { "epoch": 1.4326461451567354, "grad_norm": 3.111218165323433, "learning_rate": 1.12104887088814e-05, "loss": 0.5555, "step": 10146 }, { "epoch": 1.4327873482067213, "grad_norm": 3.674938050227375, "learning_rate": 1.1208975451855152e-05, "loss": 0.6037, "step": 10147 }, { "epoch": 1.4329285512567072, "grad_norm": 2.950362479360618, "learning_rate": 1.1207462166732844e-05, "loss": 0.5096, "step": 10148 }, { "epoch": 1.433069754306693, "grad_norm": 3.00214144613208, "learning_rate": 1.1205948853549631e-05, "loss": 0.4827, "step": 10149 }, { "epoch": 1.433210957356679, "grad_norm": 3.312204463432061, "learning_rate": 1.1204435512340688e-05, "loss": 0.527, "step": 10150 }, { "epoch": 1.4333521604066648, "grad_norm": 3.6016499685967798, "learning_rate": 1.1202922143141185e-05, "loss": 0.5302, "step": 10151 }, { "epoch": 1.4334933634566507, "grad_norm": 3.641478649800596, "learning_rate": 1.120140874598629e-05, "loss": 0.5605, "step": 10152 }, { "epoch": 1.4336345665066366, "grad_norm": 3.461178219723336, "learning_rate": 1.1199895320911174e-05, "loss": 0.5483, "step": 10153 }, { "epoch": 1.4337757695566224, "grad_norm": 4.3829749981566195, "learning_rate": 1.119838186795101e-05, "loss": 0.7078, "step": 10154 }, { "epoch": 1.4339169726066083, "grad_norm": 4.288655171451108, "learning_rate": 1.119686838714097e-05, "loss": 0.7457, "step": 10155 }, { "epoch": 1.4340581756565942, "grad_norm": 3.126774155741125, "learning_rate": 1.1195354878516222e-05, "loss": 0.4628, "step": 10156 }, { "epoch": 1.43419937870658, "grad_norm": 3.5600594018434517, "learning_rate": 1.1193841342111947e-05, "loss": 0.571, "step": 10157 }, { "epoch": 1.434340581756566, "grad_norm": 3.464086245115906, "learning_rate": 1.1192327777963313e-05, "loss": 0.6437, "step": 10158 }, { "epoch": 1.4344817848065519, "grad_norm": 3.672665205264537, "learning_rate": 1.1190814186105495e-05, "loss": 0.5995, "step": 10159 }, { "epoch": 1.4346229878565377, "grad_norm": 3.204834062967515, "learning_rate": 1.118930056657367e-05, "loss": 0.5026, "step": 10160 }, { "epoch": 1.4347641909065236, "grad_norm": 4.001264861690151, "learning_rate": 1.1187786919403017e-05, "loss": 0.6692, "step": 10161 }, { "epoch": 1.4349053939565095, "grad_norm": 3.8138128473344217, "learning_rate": 1.1186273244628705e-05, "loss": 0.5745, "step": 10162 }, { "epoch": 1.4350465970064954, "grad_norm": 4.08058845709455, "learning_rate": 1.1184759542285917e-05, "loss": 0.5837, "step": 10163 }, { "epoch": 1.4351878000564813, "grad_norm": 2.5467754537744303, "learning_rate": 1.1183245812409828e-05, "loss": 0.418, "step": 10164 }, { "epoch": 1.4353290031064672, "grad_norm": 3.20132441965069, "learning_rate": 1.118173205503562e-05, "loss": 0.5481, "step": 10165 }, { "epoch": 1.435470206156453, "grad_norm": 3.080664964595875, "learning_rate": 1.1180218270198466e-05, "loss": 0.4625, "step": 10166 }, { "epoch": 1.435611409206439, "grad_norm": 3.51871012702738, "learning_rate": 1.1178704457933546e-05, "loss": 0.5679, "step": 10167 }, { "epoch": 1.4357526122564248, "grad_norm": 3.1157088700753324, "learning_rate": 1.117719061827605e-05, "loss": 0.5397, "step": 10168 }, { "epoch": 1.4358938153064107, "grad_norm": 3.495509725769545, "learning_rate": 1.1175676751261151e-05, "loss": 0.5509, "step": 10169 }, { "epoch": 1.4360350183563966, "grad_norm": 3.7507170194994073, "learning_rate": 1.1174162856924029e-05, "loss": 0.5533, "step": 10170 }, { "epoch": 1.4361762214063825, "grad_norm": 4.5732246375444765, "learning_rate": 1.1172648935299872e-05, "loss": 0.7592, "step": 10171 }, { "epoch": 1.4363174244563681, "grad_norm": 3.278282511128118, "learning_rate": 1.1171134986423859e-05, "loss": 0.5738, "step": 10172 }, { "epoch": 1.436458627506354, "grad_norm": 3.4152127451451264, "learning_rate": 1.1169621010331176e-05, "loss": 0.589, "step": 10173 }, { "epoch": 1.4365998305563399, "grad_norm": 4.348242870296071, "learning_rate": 1.1168107007057006e-05, "loss": 0.7292, "step": 10174 }, { "epoch": 1.4367410336063258, "grad_norm": 4.014098025095221, "learning_rate": 1.1166592976636532e-05, "loss": 0.5757, "step": 10175 }, { "epoch": 1.4368822366563117, "grad_norm": 3.333658710563332, "learning_rate": 1.1165078919104942e-05, "loss": 0.5061, "step": 10176 }, { "epoch": 1.4370234397062975, "grad_norm": 3.9786729519539175, "learning_rate": 1.116356483449742e-05, "loss": 0.6587, "step": 10177 }, { "epoch": 1.4371646427562834, "grad_norm": 3.778595706799023, "learning_rate": 1.1162050722849153e-05, "loss": 0.5312, "step": 10178 }, { "epoch": 1.4373058458062693, "grad_norm": 3.209460634321353, "learning_rate": 1.1160536584195332e-05, "loss": 0.5323, "step": 10179 }, { "epoch": 1.4374470488562552, "grad_norm": 4.055262128367164, "learning_rate": 1.115902241857114e-05, "loss": 0.5667, "step": 10180 }, { "epoch": 1.437588251906241, "grad_norm": 3.3614654244154565, "learning_rate": 1.1157508226011768e-05, "loss": 0.5814, "step": 10181 }, { "epoch": 1.437729454956227, "grad_norm": 3.240685209552864, "learning_rate": 1.1155994006552406e-05, "loss": 0.5573, "step": 10182 }, { "epoch": 1.4378706580062128, "grad_norm": 3.4900926883220764, "learning_rate": 1.1154479760228242e-05, "loss": 0.5752, "step": 10183 }, { "epoch": 1.4380118610561987, "grad_norm": 3.880972088849451, "learning_rate": 1.1152965487074466e-05, "loss": 0.5338, "step": 10184 }, { "epoch": 1.4381530641061846, "grad_norm": 3.0956490516356077, "learning_rate": 1.115145118712627e-05, "loss": 0.5092, "step": 10185 }, { "epoch": 1.4382942671561705, "grad_norm": 3.8312963897792622, "learning_rate": 1.1149936860418846e-05, "loss": 0.6713, "step": 10186 }, { "epoch": 1.4384354702061564, "grad_norm": 3.6708463790489816, "learning_rate": 1.1148422506987385e-05, "loss": 0.6105, "step": 10187 }, { "epoch": 1.4385766732561422, "grad_norm": 3.589515968594199, "learning_rate": 1.1146908126867082e-05, "loss": 0.5822, "step": 10188 }, { "epoch": 1.4387178763061281, "grad_norm": 3.8727669037905716, "learning_rate": 1.114539372009313e-05, "loss": 0.5173, "step": 10189 }, { "epoch": 1.438859079356114, "grad_norm": 3.362661750179107, "learning_rate": 1.1143879286700723e-05, "loss": 0.5349, "step": 10190 }, { "epoch": 1.4390002824061, "grad_norm": 3.109731969504989, "learning_rate": 1.1142364826725055e-05, "loss": 0.5878, "step": 10191 }, { "epoch": 1.4391414854560858, "grad_norm": 3.0927715764901578, "learning_rate": 1.1140850340201319e-05, "loss": 0.4969, "step": 10192 }, { "epoch": 1.4392826885060717, "grad_norm": 3.9690771930073305, "learning_rate": 1.1139335827164715e-05, "loss": 0.6086, "step": 10193 }, { "epoch": 1.4394238915560575, "grad_norm": 4.150455120590869, "learning_rate": 1.1137821287650438e-05, "loss": 0.6051, "step": 10194 }, { "epoch": 1.4395650946060434, "grad_norm": 5.291669510513383, "learning_rate": 1.1136306721693688e-05, "loss": 0.7183, "step": 10195 }, { "epoch": 1.4397062976560293, "grad_norm": 3.3108907927954006, "learning_rate": 1.113479212932966e-05, "loss": 0.6306, "step": 10196 }, { "epoch": 1.4398475007060152, "grad_norm": 4.428329472629895, "learning_rate": 1.1133277510593552e-05, "loss": 0.7193, "step": 10197 }, { "epoch": 1.439988703756001, "grad_norm": 2.9972137173000797, "learning_rate": 1.1131762865520566e-05, "loss": 0.5454, "step": 10198 }, { "epoch": 1.440129906805987, "grad_norm": 3.9618604168423452, "learning_rate": 1.1130248194145898e-05, "loss": 0.6817, "step": 10199 }, { "epoch": 1.4402711098559728, "grad_norm": 3.3085697962349294, "learning_rate": 1.1128733496504751e-05, "loss": 0.5125, "step": 10200 }, { "epoch": 1.4404123129059587, "grad_norm": 3.4743132199286686, "learning_rate": 1.1127218772632323e-05, "loss": 0.5296, "step": 10201 }, { "epoch": 1.4405535159559446, "grad_norm": 3.3512081699143645, "learning_rate": 1.112570402256382e-05, "loss": 0.6092, "step": 10202 }, { "epoch": 1.4406947190059305, "grad_norm": 3.068384931849448, "learning_rate": 1.1124189246334441e-05, "loss": 0.5967, "step": 10203 }, { "epoch": 1.4408359220559164, "grad_norm": 3.4967763505332807, "learning_rate": 1.1122674443979387e-05, "loss": 0.5594, "step": 10204 }, { "epoch": 1.4409771251059023, "grad_norm": 3.2500715496935157, "learning_rate": 1.1121159615533865e-05, "loss": 0.4761, "step": 10205 }, { "epoch": 1.4411183281558881, "grad_norm": 3.034916105626154, "learning_rate": 1.1119644761033079e-05, "loss": 0.4467, "step": 10206 }, { "epoch": 1.441259531205874, "grad_norm": 3.0998088830214057, "learning_rate": 1.1118129880512233e-05, "loss": 0.5207, "step": 10207 }, { "epoch": 1.44140073425586, "grad_norm": 3.7484412171881596, "learning_rate": 1.111661497400653e-05, "loss": 0.569, "step": 10208 }, { "epoch": 1.4415419373058458, "grad_norm": 3.994928821852951, "learning_rate": 1.1115100041551179e-05, "loss": 0.724, "step": 10209 }, { "epoch": 1.4416831403558317, "grad_norm": 3.4519019507425375, "learning_rate": 1.111358508318138e-05, "loss": 0.6123, "step": 10210 }, { "epoch": 1.4418243434058176, "grad_norm": 3.646120758335199, "learning_rate": 1.1112070098932348e-05, "loss": 0.5522, "step": 10211 }, { "epoch": 1.4419655464558034, "grad_norm": 4.281977412438733, "learning_rate": 1.1110555088839289e-05, "loss": 0.7513, "step": 10212 }, { "epoch": 1.4421067495057893, "grad_norm": 4.129462842693757, "learning_rate": 1.1109040052937405e-05, "loss": 0.6076, "step": 10213 }, { "epoch": 1.4422479525557752, "grad_norm": 3.751064814271135, "learning_rate": 1.1107524991261913e-05, "loss": 0.668, "step": 10214 }, { "epoch": 1.442389155605761, "grad_norm": 3.1525467752235694, "learning_rate": 1.1106009903848016e-05, "loss": 0.5598, "step": 10215 }, { "epoch": 1.442530358655747, "grad_norm": 3.699244231313742, "learning_rate": 1.1104494790730929e-05, "loss": 0.5881, "step": 10216 }, { "epoch": 1.4426715617057329, "grad_norm": 2.7778617164859587, "learning_rate": 1.1102979651945858e-05, "loss": 0.5036, "step": 10217 }, { "epoch": 1.4428127647557187, "grad_norm": 3.5449157844731625, "learning_rate": 1.1101464487528017e-05, "loss": 0.5324, "step": 10218 }, { "epoch": 1.4429539678057046, "grad_norm": 3.609013425091384, "learning_rate": 1.1099949297512614e-05, "loss": 0.4805, "step": 10219 }, { "epoch": 1.4430951708556905, "grad_norm": 3.859529008220305, "learning_rate": 1.1098434081934871e-05, "loss": 0.5877, "step": 10220 }, { "epoch": 1.4432363739056764, "grad_norm": 4.185219422689172, "learning_rate": 1.1096918840829987e-05, "loss": 0.6904, "step": 10221 }, { "epoch": 1.4433775769556623, "grad_norm": 3.248507218942643, "learning_rate": 1.1095403574233185e-05, "loss": 0.5747, "step": 10222 }, { "epoch": 1.4435187800056481, "grad_norm": 3.890015793755085, "learning_rate": 1.109388828217968e-05, "loss": 0.6095, "step": 10223 }, { "epoch": 1.443659983055634, "grad_norm": 3.7417523024313013, "learning_rate": 1.1092372964704681e-05, "loss": 0.6152, "step": 10224 }, { "epoch": 1.44380118610562, "grad_norm": 3.1791941541936537, "learning_rate": 1.1090857621843407e-05, "loss": 0.5531, "step": 10225 }, { "epoch": 1.4439423891556058, "grad_norm": 3.5683323594358862, "learning_rate": 1.1089342253631074e-05, "loss": 0.5281, "step": 10226 }, { "epoch": 1.4440835922055917, "grad_norm": 3.345598021586362, "learning_rate": 1.1087826860102895e-05, "loss": 0.504, "step": 10227 }, { "epoch": 1.4442247952555776, "grad_norm": 2.976035220109421, "learning_rate": 1.108631144129409e-05, "loss": 0.478, "step": 10228 }, { "epoch": 1.4443659983055634, "grad_norm": 3.342457219337774, "learning_rate": 1.108479599723988e-05, "loss": 0.5642, "step": 10229 }, { "epoch": 1.4445072013555493, "grad_norm": 3.3091616659156737, "learning_rate": 1.1083280527975475e-05, "loss": 0.5695, "step": 10230 }, { "epoch": 1.4446484044055352, "grad_norm": 3.6218192690139, "learning_rate": 1.10817650335361e-05, "loss": 0.6577, "step": 10231 }, { "epoch": 1.444789607455521, "grad_norm": 3.1510252401785306, "learning_rate": 1.1080249513956973e-05, "loss": 0.4891, "step": 10232 }, { "epoch": 1.444930810505507, "grad_norm": 2.599431962433065, "learning_rate": 1.1078733969273315e-05, "loss": 0.5103, "step": 10233 }, { "epoch": 1.4450720135554929, "grad_norm": 3.582409456931109, "learning_rate": 1.1077218399520344e-05, "loss": 0.5087, "step": 10234 }, { "epoch": 1.4452132166054787, "grad_norm": 3.1594310607993124, "learning_rate": 1.107570280473328e-05, "loss": 0.5941, "step": 10235 }, { "epoch": 1.4453544196554646, "grad_norm": 3.160235914729992, "learning_rate": 1.1074187184947351e-05, "loss": 0.5561, "step": 10236 }, { "epoch": 1.4454956227054505, "grad_norm": 3.8734880272602736, "learning_rate": 1.1072671540197777e-05, "loss": 0.5561, "step": 10237 }, { "epoch": 1.4456368257554364, "grad_norm": 3.6081771343340634, "learning_rate": 1.1071155870519777e-05, "loss": 0.5966, "step": 10238 }, { "epoch": 1.4457780288054223, "grad_norm": 3.6605425834216976, "learning_rate": 1.1069640175948577e-05, "loss": 0.7157, "step": 10239 }, { "epoch": 1.4459192318554082, "grad_norm": 3.3878699621693844, "learning_rate": 1.1068124456519402e-05, "loss": 0.6234, "step": 10240 }, { "epoch": 1.446060434905394, "grad_norm": 3.844983757105961, "learning_rate": 1.1066608712267475e-05, "loss": 0.6527, "step": 10241 }, { "epoch": 1.44620163795538, "grad_norm": 4.155255481025409, "learning_rate": 1.1065092943228024e-05, "loss": 0.6436, "step": 10242 }, { "epoch": 1.4463428410053658, "grad_norm": 3.5204935428486217, "learning_rate": 1.1063577149436274e-05, "loss": 0.6684, "step": 10243 }, { "epoch": 1.4464840440553517, "grad_norm": 3.7783460641116573, "learning_rate": 1.1062061330927445e-05, "loss": 0.685, "step": 10244 }, { "epoch": 1.4466252471053376, "grad_norm": 3.6386803765460276, "learning_rate": 1.1060545487736772e-05, "loss": 0.5434, "step": 10245 }, { "epoch": 1.4467664501553235, "grad_norm": 3.3790685106411202, "learning_rate": 1.1059029619899483e-05, "loss": 0.5858, "step": 10246 }, { "epoch": 1.4469076532053093, "grad_norm": 3.218739877938669, "learning_rate": 1.1057513727450798e-05, "loss": 0.5692, "step": 10247 }, { "epoch": 1.4470488562552952, "grad_norm": 4.138984876204369, "learning_rate": 1.1055997810425954e-05, "loss": 0.7225, "step": 10248 }, { "epoch": 1.447190059305281, "grad_norm": 3.4874890445087234, "learning_rate": 1.1054481868860177e-05, "loss": 0.5904, "step": 10249 }, { "epoch": 1.447331262355267, "grad_norm": 4.085315244710668, "learning_rate": 1.1052965902788694e-05, "loss": 0.6542, "step": 10250 }, { "epoch": 1.4474724654052529, "grad_norm": 3.4982887998917516, "learning_rate": 1.1051449912246742e-05, "loss": 0.4717, "step": 10251 }, { "epoch": 1.4476136684552388, "grad_norm": 3.5071305921382145, "learning_rate": 1.1049933897269547e-05, "loss": 0.6296, "step": 10252 }, { "epoch": 1.4477548715052246, "grad_norm": 3.641597912661041, "learning_rate": 1.1048417857892339e-05, "loss": 0.6142, "step": 10253 }, { "epoch": 1.4478960745552105, "grad_norm": 3.3152501380191715, "learning_rate": 1.1046901794150358e-05, "loss": 0.5459, "step": 10254 }, { "epoch": 1.4480372776051964, "grad_norm": 3.3479763042900834, "learning_rate": 1.1045385706078826e-05, "loss": 0.5681, "step": 10255 }, { "epoch": 1.4481784806551823, "grad_norm": 3.8582218447196195, "learning_rate": 1.1043869593712984e-05, "loss": 0.6682, "step": 10256 }, { "epoch": 1.448319683705168, "grad_norm": 3.802552777334054, "learning_rate": 1.1042353457088061e-05, "loss": 0.6307, "step": 10257 }, { "epoch": 1.4484608867551538, "grad_norm": 3.3372462847401443, "learning_rate": 1.1040837296239298e-05, "loss": 0.6404, "step": 10258 }, { "epoch": 1.4486020898051397, "grad_norm": 3.801508859395669, "learning_rate": 1.1039321111201925e-05, "loss": 0.6327, "step": 10259 }, { "epoch": 1.4487432928551256, "grad_norm": 3.7170545622961635, "learning_rate": 1.1037804902011175e-05, "loss": 0.6207, "step": 10260 }, { "epoch": 1.4488844959051115, "grad_norm": 3.7115005010479125, "learning_rate": 1.103628866870229e-05, "loss": 0.5959, "step": 10261 }, { "epoch": 1.4490256989550974, "grad_norm": 3.7759117772848207, "learning_rate": 1.1034772411310503e-05, "loss": 0.5902, "step": 10262 }, { "epoch": 1.4491669020050832, "grad_norm": 3.4630420107690827, "learning_rate": 1.1033256129871053e-05, "loss": 0.5829, "step": 10263 }, { "epoch": 1.4493081050550691, "grad_norm": 3.586183584217599, "learning_rate": 1.1031739824419175e-05, "loss": 0.5029, "step": 10264 }, { "epoch": 1.449449308105055, "grad_norm": 3.876543347961651, "learning_rate": 1.103022349499011e-05, "loss": 0.6651, "step": 10265 }, { "epoch": 1.449590511155041, "grad_norm": 2.796508523258564, "learning_rate": 1.1028707141619095e-05, "loss": 0.4626, "step": 10266 }, { "epoch": 1.4497317142050268, "grad_norm": 4.148759062646071, "learning_rate": 1.102719076434137e-05, "loss": 0.6524, "step": 10267 }, { "epoch": 1.4498729172550127, "grad_norm": 3.1981412789287296, "learning_rate": 1.1025674363192179e-05, "loss": 0.5829, "step": 10268 }, { "epoch": 1.4500141203049985, "grad_norm": 3.336529985561878, "learning_rate": 1.1024157938206755e-05, "loss": 0.623, "step": 10269 }, { "epoch": 1.4501553233549844, "grad_norm": 3.37614490203797, "learning_rate": 1.1022641489420342e-05, "loss": 0.6336, "step": 10270 }, { "epoch": 1.4502965264049703, "grad_norm": 3.4216815206577635, "learning_rate": 1.1021125016868189e-05, "loss": 0.5397, "step": 10271 }, { "epoch": 1.4504377294549562, "grad_norm": 4.081607858482453, "learning_rate": 1.1019608520585525e-05, "loss": 0.7116, "step": 10272 }, { "epoch": 1.450578932504942, "grad_norm": 3.3634227207403797, "learning_rate": 1.1018092000607599e-05, "loss": 0.5857, "step": 10273 }, { "epoch": 1.450720135554928, "grad_norm": 3.2964944355924493, "learning_rate": 1.1016575456969658e-05, "loss": 0.5394, "step": 10274 }, { "epoch": 1.4508613386049138, "grad_norm": 3.4346688129993446, "learning_rate": 1.1015058889706942e-05, "loss": 0.6222, "step": 10275 }, { "epoch": 1.4510025416548997, "grad_norm": 3.456330243041741, "learning_rate": 1.1013542298854696e-05, "loss": 0.6402, "step": 10276 }, { "epoch": 1.4511437447048856, "grad_norm": 3.1236848332178835, "learning_rate": 1.1012025684448162e-05, "loss": 0.4698, "step": 10277 }, { "epoch": 1.4512849477548715, "grad_norm": 2.8406817123197703, "learning_rate": 1.101050904652259e-05, "loss": 0.5285, "step": 10278 }, { "epoch": 1.4514261508048574, "grad_norm": 3.5626459349515405, "learning_rate": 1.1008992385113224e-05, "loss": 0.5626, "step": 10279 }, { "epoch": 1.4515673538548433, "grad_norm": 4.292306082624468, "learning_rate": 1.1007475700255313e-05, "loss": 0.7603, "step": 10280 }, { "epoch": 1.4517085569048291, "grad_norm": 3.236743585812309, "learning_rate": 1.1005958991984096e-05, "loss": 0.4301, "step": 10281 }, { "epoch": 1.451849759954815, "grad_norm": 4.105005606777591, "learning_rate": 1.1004442260334832e-05, "loss": 0.6253, "step": 10282 }, { "epoch": 1.451990963004801, "grad_norm": 3.1024464325030516, "learning_rate": 1.1002925505342761e-05, "loss": 0.4616, "step": 10283 }, { "epoch": 1.4521321660547868, "grad_norm": 2.8474071568495902, "learning_rate": 1.1001408727043135e-05, "loss": 0.5353, "step": 10284 }, { "epoch": 1.4522733691047727, "grad_norm": 3.704881012967332, "learning_rate": 1.0999891925471205e-05, "loss": 0.6117, "step": 10285 }, { "epoch": 1.4524145721547586, "grad_norm": 3.711059401069895, "learning_rate": 1.0998375100662215e-05, "loss": 0.5509, "step": 10286 }, { "epoch": 1.4525557752047444, "grad_norm": 4.105082232695406, "learning_rate": 1.0996858252651419e-05, "loss": 0.6737, "step": 10287 }, { "epoch": 1.4526969782547303, "grad_norm": 4.124078518220901, "learning_rate": 1.099534138147407e-05, "loss": 0.6554, "step": 10288 }, { "epoch": 1.4528381813047162, "grad_norm": 4.378122190321998, "learning_rate": 1.0993824487165416e-05, "loss": 0.7573, "step": 10289 }, { "epoch": 1.452979384354702, "grad_norm": 3.3598573413751107, "learning_rate": 1.0992307569760709e-05, "loss": 0.5607, "step": 10290 }, { "epoch": 1.453120587404688, "grad_norm": 3.4125949641752857, "learning_rate": 1.0990790629295204e-05, "loss": 0.5449, "step": 10291 }, { "epoch": 1.4532617904546739, "grad_norm": 4.044745567078359, "learning_rate": 1.0989273665804154e-05, "loss": 0.6825, "step": 10292 }, { "epoch": 1.4534029935046597, "grad_norm": 3.29475211094661, "learning_rate": 1.0987756679322807e-05, "loss": 0.527, "step": 10293 }, { "epoch": 1.4535441965546456, "grad_norm": 3.216781855239586, "learning_rate": 1.0986239669886425e-05, "loss": 0.4855, "step": 10294 }, { "epoch": 1.4536853996046315, "grad_norm": 3.961603309861623, "learning_rate": 1.0984722637530258e-05, "loss": 0.6702, "step": 10295 }, { "epoch": 1.4538266026546174, "grad_norm": 4.012340623267666, "learning_rate": 1.0983205582289563e-05, "loss": 0.7655, "step": 10296 }, { "epoch": 1.4539678057046033, "grad_norm": 3.522880308302813, "learning_rate": 1.0981688504199595e-05, "loss": 0.5083, "step": 10297 }, { "epoch": 1.4541090087545891, "grad_norm": 2.904759395936941, "learning_rate": 1.098017140329561e-05, "loss": 0.4356, "step": 10298 }, { "epoch": 1.454250211804575, "grad_norm": 4.222084561270195, "learning_rate": 1.0978654279612862e-05, "loss": 0.7376, "step": 10299 }, { "epoch": 1.454391414854561, "grad_norm": 3.9992250996167775, "learning_rate": 1.0977137133186613e-05, "loss": 0.5776, "step": 10300 }, { "epoch": 1.4545326179045468, "grad_norm": 3.9470818868904813, "learning_rate": 1.0975619964052118e-05, "loss": 0.6164, "step": 10301 }, { "epoch": 1.4546738209545327, "grad_norm": 3.2716421620549934, "learning_rate": 1.0974102772244638e-05, "loss": 0.6067, "step": 10302 }, { "epoch": 1.4548150240045186, "grad_norm": 3.925522801048943, "learning_rate": 1.097258555779943e-05, "loss": 0.6954, "step": 10303 }, { "epoch": 1.4549562270545044, "grad_norm": 4.230996507642602, "learning_rate": 1.0971068320751753e-05, "loss": 0.5994, "step": 10304 }, { "epoch": 1.4550974301044903, "grad_norm": 2.769565190211379, "learning_rate": 1.096955106113687e-05, "loss": 0.3795, "step": 10305 }, { "epoch": 1.4552386331544762, "grad_norm": 3.2421017341293767, "learning_rate": 1.0968033778990038e-05, "loss": 0.5702, "step": 10306 }, { "epoch": 1.455379836204462, "grad_norm": 3.234909043921726, "learning_rate": 1.0966516474346515e-05, "loss": 0.5278, "step": 10307 }, { "epoch": 1.455521039254448, "grad_norm": 3.3577658470041847, "learning_rate": 1.096499914724157e-05, "loss": 0.5446, "step": 10308 }, { "epoch": 1.4556622423044336, "grad_norm": 3.407952393923366, "learning_rate": 1.0963481797710465e-05, "loss": 0.518, "step": 10309 }, { "epoch": 1.4558034453544195, "grad_norm": 3.9776042585345466, "learning_rate": 1.0961964425788452e-05, "loss": 0.6584, "step": 10310 }, { "epoch": 1.4559446484044054, "grad_norm": 3.9229993359686564, "learning_rate": 1.0960447031510806e-05, "loss": 0.6244, "step": 10311 }, { "epoch": 1.4560858514543913, "grad_norm": 3.7691702152409166, "learning_rate": 1.0958929614912782e-05, "loss": 0.5441, "step": 10312 }, { "epoch": 1.4562270545043772, "grad_norm": 4.336623781477096, "learning_rate": 1.0957412176029654e-05, "loss": 0.8348, "step": 10313 }, { "epoch": 1.456368257554363, "grad_norm": 3.1462450253665852, "learning_rate": 1.0955894714896675e-05, "loss": 0.5004, "step": 10314 }, { "epoch": 1.456509460604349, "grad_norm": 3.2509581267816925, "learning_rate": 1.0954377231549118e-05, "loss": 0.4721, "step": 10315 }, { "epoch": 1.4566506636543348, "grad_norm": 4.061972220006804, "learning_rate": 1.0952859726022245e-05, "loss": 0.6344, "step": 10316 }, { "epoch": 1.4567918667043207, "grad_norm": 3.2250983500476575, "learning_rate": 1.0951342198351323e-05, "loss": 0.4898, "step": 10317 }, { "epoch": 1.4569330697543066, "grad_norm": 4.172205771095662, "learning_rate": 1.094982464857162e-05, "loss": 0.7877, "step": 10318 }, { "epoch": 1.4570742728042925, "grad_norm": 3.6020383806437155, "learning_rate": 1.0948307076718402e-05, "loss": 0.5291, "step": 10319 }, { "epoch": 1.4572154758542784, "grad_norm": 3.3860146948272223, "learning_rate": 1.094678948282694e-05, "loss": 0.5806, "step": 10320 }, { "epoch": 1.4573566789042642, "grad_norm": 4.156249286545769, "learning_rate": 1.0945271866932496e-05, "loss": 0.6285, "step": 10321 }, { "epoch": 1.4574978819542501, "grad_norm": 2.797652314536296, "learning_rate": 1.0943754229070344e-05, "loss": 0.3955, "step": 10322 }, { "epoch": 1.457639085004236, "grad_norm": 4.019988930518867, "learning_rate": 1.094223656927575e-05, "loss": 0.6738, "step": 10323 }, { "epoch": 1.4577802880542219, "grad_norm": 2.845459398148412, "learning_rate": 1.0940718887583985e-05, "loss": 0.4818, "step": 10324 }, { "epoch": 1.4579214911042078, "grad_norm": 4.095464728730396, "learning_rate": 1.093920118403032e-05, "loss": 0.5949, "step": 10325 }, { "epoch": 1.4580626941541937, "grad_norm": 3.596800392040946, "learning_rate": 1.0937683458650029e-05, "loss": 0.6219, "step": 10326 }, { "epoch": 1.4582038972041795, "grad_norm": 3.62212557203208, "learning_rate": 1.0936165711478373e-05, "loss": 0.6237, "step": 10327 }, { "epoch": 1.4583451002541654, "grad_norm": 3.8831305825837306, "learning_rate": 1.093464794255063e-05, "loss": 0.6654, "step": 10328 }, { "epoch": 1.4584863033041513, "grad_norm": 4.221288618480987, "learning_rate": 1.0933130151902077e-05, "loss": 0.6433, "step": 10329 }, { "epoch": 1.4586275063541372, "grad_norm": 4.903291535950068, "learning_rate": 1.093161233956798e-05, "loss": 0.8723, "step": 10330 }, { "epoch": 1.458768709404123, "grad_norm": 3.5842497495379657, "learning_rate": 1.0930094505583615e-05, "loss": 0.6363, "step": 10331 }, { "epoch": 1.458909912454109, "grad_norm": 4.227973659363782, "learning_rate": 1.0928576649984254e-05, "loss": 0.6225, "step": 10332 }, { "epoch": 1.4590511155040948, "grad_norm": 3.843970772672388, "learning_rate": 1.0927058772805172e-05, "loss": 0.5726, "step": 10333 }, { "epoch": 1.4591923185540807, "grad_norm": 2.8340116964624062, "learning_rate": 1.0925540874081649e-05, "loss": 0.4943, "step": 10334 }, { "epoch": 1.4593335216040666, "grad_norm": 3.5688549562963168, "learning_rate": 1.0924022953848951e-05, "loss": 0.6283, "step": 10335 }, { "epoch": 1.4594747246540525, "grad_norm": 3.456899256247618, "learning_rate": 1.092250501214236e-05, "loss": 0.5626, "step": 10336 }, { "epoch": 1.4596159277040384, "grad_norm": 3.409645630217186, "learning_rate": 1.0920987048997153e-05, "loss": 0.5695, "step": 10337 }, { "epoch": 1.4597571307540242, "grad_norm": 3.21927827517015, "learning_rate": 1.0919469064448604e-05, "loss": 0.6086, "step": 10338 }, { "epoch": 1.4598983338040101, "grad_norm": 3.1512295950067557, "learning_rate": 1.091795105853199e-05, "loss": 0.5323, "step": 10339 }, { "epoch": 1.460039536853996, "grad_norm": 4.263606246348635, "learning_rate": 1.0916433031282592e-05, "loss": 0.7008, "step": 10340 }, { "epoch": 1.460180739903982, "grad_norm": 3.6660234701842658, "learning_rate": 1.0914914982735682e-05, "loss": 0.6468, "step": 10341 }, { "epoch": 1.4603219429539678, "grad_norm": 7.799598329527165, "learning_rate": 1.0913396912926546e-05, "loss": 0.6673, "step": 10342 }, { "epoch": 1.4604631460039537, "grad_norm": 3.50623794955251, "learning_rate": 1.0911878821890461e-05, "loss": 0.5584, "step": 10343 }, { "epoch": 1.4606043490539395, "grad_norm": 2.973710745789418, "learning_rate": 1.0910360709662701e-05, "loss": 0.5324, "step": 10344 }, { "epoch": 1.4607455521039254, "grad_norm": 3.1027784580377142, "learning_rate": 1.0908842576278555e-05, "loss": 0.5893, "step": 10345 }, { "epoch": 1.4608867551539113, "grad_norm": 3.3180762187604924, "learning_rate": 1.0907324421773302e-05, "loss": 0.5423, "step": 10346 }, { "epoch": 1.4610279582038972, "grad_norm": 4.162682565085654, "learning_rate": 1.0905806246182218e-05, "loss": 0.649, "step": 10347 }, { "epoch": 1.461169161253883, "grad_norm": 3.458562571212591, "learning_rate": 1.090428804954059e-05, "loss": 0.5893, "step": 10348 }, { "epoch": 1.461310364303869, "grad_norm": 3.164810450700185, "learning_rate": 1.0902769831883697e-05, "loss": 0.4923, "step": 10349 }, { "epoch": 1.4614515673538548, "grad_norm": 4.0093631410551955, "learning_rate": 1.0901251593246822e-05, "loss": 0.6879, "step": 10350 }, { "epoch": 1.4615927704038407, "grad_norm": 3.3033322653133683, "learning_rate": 1.0899733333665252e-05, "loss": 0.5475, "step": 10351 }, { "epoch": 1.4617339734538266, "grad_norm": 3.8529186288983595, "learning_rate": 1.0898215053174268e-05, "loss": 0.5359, "step": 10352 }, { "epoch": 1.4618751765038125, "grad_norm": 4.098375724514003, "learning_rate": 1.089669675180915e-05, "loss": 0.8594, "step": 10353 }, { "epoch": 1.4620163795537984, "grad_norm": 3.6184167574785615, "learning_rate": 1.0895178429605189e-05, "loss": 0.5819, "step": 10354 }, { "epoch": 1.4621575826037843, "grad_norm": 3.8381203701209112, "learning_rate": 1.0893660086597668e-05, "loss": 0.7206, "step": 10355 }, { "epoch": 1.4622987856537701, "grad_norm": 2.938402918949583, "learning_rate": 1.0892141722821873e-05, "loss": 0.4686, "step": 10356 }, { "epoch": 1.462439988703756, "grad_norm": 3.698528741292727, "learning_rate": 1.0890623338313089e-05, "loss": 0.6042, "step": 10357 }, { "epoch": 1.462581191753742, "grad_norm": 2.8452363322434056, "learning_rate": 1.0889104933106604e-05, "loss": 0.4726, "step": 10358 }, { "epoch": 1.4627223948037278, "grad_norm": 3.556340544864714, "learning_rate": 1.0887586507237702e-05, "loss": 0.6424, "step": 10359 }, { "epoch": 1.4628635978537137, "grad_norm": 4.071606828981776, "learning_rate": 1.0886068060741676e-05, "loss": 0.7168, "step": 10360 }, { "epoch": 1.4630048009036996, "grad_norm": 4.441757937489678, "learning_rate": 1.0884549593653808e-05, "loss": 0.7299, "step": 10361 }, { "epoch": 1.4631460039536854, "grad_norm": 3.614522493534942, "learning_rate": 1.0883031106009393e-05, "loss": 0.6229, "step": 10362 }, { "epoch": 1.4632872070036713, "grad_norm": 3.234600905190223, "learning_rate": 1.0881512597843713e-05, "loss": 0.4813, "step": 10363 }, { "epoch": 1.4634284100536572, "grad_norm": 4.331661675565904, "learning_rate": 1.0879994069192064e-05, "loss": 0.6641, "step": 10364 }, { "epoch": 1.463569613103643, "grad_norm": 3.4802010588176504, "learning_rate": 1.0878475520089732e-05, "loss": 0.5323, "step": 10365 }, { "epoch": 1.463710816153629, "grad_norm": 4.479205546729229, "learning_rate": 1.0876956950572006e-05, "loss": 0.8329, "step": 10366 }, { "epoch": 1.4638520192036149, "grad_norm": 3.134131170952976, "learning_rate": 1.087543836067418e-05, "loss": 0.5336, "step": 10367 }, { "epoch": 1.4639932222536007, "grad_norm": 3.643354150070815, "learning_rate": 1.0873919750431548e-05, "loss": 0.6946, "step": 10368 }, { "epoch": 1.4641344253035866, "grad_norm": 3.548535316032411, "learning_rate": 1.0872401119879396e-05, "loss": 0.5605, "step": 10369 }, { "epoch": 1.4642756283535725, "grad_norm": 4.302161238258756, "learning_rate": 1.0870882469053016e-05, "loss": 0.6791, "step": 10370 }, { "epoch": 1.4644168314035584, "grad_norm": 3.249436752740073, "learning_rate": 1.0869363797987707e-05, "loss": 0.5461, "step": 10371 }, { "epoch": 1.4645580344535443, "grad_norm": 3.4682971959672995, "learning_rate": 1.0867845106718758e-05, "loss": 0.5786, "step": 10372 }, { "epoch": 1.4646992375035301, "grad_norm": 3.2388828889311854, "learning_rate": 1.0866326395281463e-05, "loss": 0.5161, "step": 10373 }, { "epoch": 1.464840440553516, "grad_norm": 3.751561565352993, "learning_rate": 1.0864807663711118e-05, "loss": 0.6248, "step": 10374 }, { "epoch": 1.464981643603502, "grad_norm": 3.7928936501005586, "learning_rate": 1.0863288912043016e-05, "loss": 0.6161, "step": 10375 }, { "epoch": 1.4651228466534878, "grad_norm": 4.038829385433803, "learning_rate": 1.0861770140312449e-05, "loss": 0.7093, "step": 10376 }, { "epoch": 1.4652640497034737, "grad_norm": 4.0490856934253605, "learning_rate": 1.0860251348554723e-05, "loss": 0.7059, "step": 10377 }, { "epoch": 1.4654052527534596, "grad_norm": 3.0923054480654466, "learning_rate": 1.085873253680512e-05, "loss": 0.5296, "step": 10378 }, { "epoch": 1.4655464558034454, "grad_norm": 3.3423014111151343, "learning_rate": 1.0857213705098947e-05, "loss": 0.5797, "step": 10379 }, { "epoch": 1.4656876588534313, "grad_norm": 4.0309226084984005, "learning_rate": 1.0855694853471499e-05, "loss": 0.5816, "step": 10380 }, { "epoch": 1.4658288619034172, "grad_norm": 3.38876769052743, "learning_rate": 1.085417598195807e-05, "loss": 0.6294, "step": 10381 }, { "epoch": 1.465970064953403, "grad_norm": 3.46832708229239, "learning_rate": 1.0852657090593961e-05, "loss": 0.6476, "step": 10382 }, { "epoch": 1.466111268003389, "grad_norm": 4.39712011402426, "learning_rate": 1.0851138179414471e-05, "loss": 0.7632, "step": 10383 }, { "epoch": 1.4662524710533749, "grad_norm": 3.6646474929537862, "learning_rate": 1.0849619248454893e-05, "loss": 0.5928, "step": 10384 }, { "epoch": 1.4663936741033607, "grad_norm": 3.6290054584983404, "learning_rate": 1.0848100297750535e-05, "loss": 0.6257, "step": 10385 }, { "epoch": 1.4665348771533466, "grad_norm": 3.7683273180857673, "learning_rate": 1.0846581327336692e-05, "loss": 0.6369, "step": 10386 }, { "epoch": 1.4666760802033325, "grad_norm": 3.7830890593924082, "learning_rate": 1.084506233724866e-05, "loss": 0.5587, "step": 10387 }, { "epoch": 1.4668172832533184, "grad_norm": 3.133506776080068, "learning_rate": 1.0843543327521748e-05, "loss": 0.4483, "step": 10388 }, { "epoch": 1.4669584863033043, "grad_norm": 3.4443633395274724, "learning_rate": 1.0842024298191254e-05, "loss": 0.5697, "step": 10389 }, { "epoch": 1.4670996893532902, "grad_norm": 3.323986688336242, "learning_rate": 1.0840505249292477e-05, "loss": 0.5318, "step": 10390 }, { "epoch": 1.467240892403276, "grad_norm": 3.511022936738072, "learning_rate": 1.0838986180860722e-05, "loss": 0.5561, "step": 10391 }, { "epoch": 1.467382095453262, "grad_norm": 3.2610959828646617, "learning_rate": 1.083746709293129e-05, "loss": 0.5336, "step": 10392 }, { "epoch": 1.4675232985032478, "grad_norm": 3.501655613180448, "learning_rate": 1.0835947985539483e-05, "loss": 0.6355, "step": 10393 }, { "epoch": 1.4676645015532335, "grad_norm": 2.7594802884987515, "learning_rate": 1.0834428858720608e-05, "loss": 0.42, "step": 10394 }, { "epoch": 1.4678057046032194, "grad_norm": 4.308681262876709, "learning_rate": 1.0832909712509969e-05, "loss": 0.6767, "step": 10395 }, { "epoch": 1.4679469076532052, "grad_norm": 4.1245905150373865, "learning_rate": 1.083139054694286e-05, "loss": 0.7199, "step": 10396 }, { "epoch": 1.4680881107031911, "grad_norm": 3.166668807628487, "learning_rate": 1.0829871362054601e-05, "loss": 0.6161, "step": 10397 }, { "epoch": 1.468229313753177, "grad_norm": 4.394860796622902, "learning_rate": 1.0828352157880489e-05, "loss": 0.7449, "step": 10398 }, { "epoch": 1.4683705168031629, "grad_norm": 3.5970071894644957, "learning_rate": 1.0826832934455828e-05, "loss": 0.527, "step": 10399 }, { "epoch": 1.4685117198531488, "grad_norm": 3.5337563681169595, "learning_rate": 1.0825313691815928e-05, "loss": 0.5587, "step": 10400 }, { "epoch": 1.4686529229031346, "grad_norm": 3.6965098036515394, "learning_rate": 1.0823794429996094e-05, "loss": 0.5124, "step": 10401 }, { "epoch": 1.4687941259531205, "grad_norm": 3.8904744486095564, "learning_rate": 1.0822275149031635e-05, "loss": 0.5851, "step": 10402 }, { "epoch": 1.4689353290031064, "grad_norm": 3.6872827611950143, "learning_rate": 1.0820755848957855e-05, "loss": 0.5208, "step": 10403 }, { "epoch": 1.4690765320530923, "grad_norm": 4.33146482257058, "learning_rate": 1.0819236529810062e-05, "loss": 0.7343, "step": 10404 }, { "epoch": 1.4692177351030782, "grad_norm": 3.5183057383562026, "learning_rate": 1.0817717191623569e-05, "loss": 0.6118, "step": 10405 }, { "epoch": 1.469358938153064, "grad_norm": 3.445767871033962, "learning_rate": 1.081619783443368e-05, "loss": 0.595, "step": 10406 }, { "epoch": 1.46950014120305, "grad_norm": 3.835817735510652, "learning_rate": 1.0814678458275705e-05, "loss": 0.6636, "step": 10407 }, { "epoch": 1.4696413442530358, "grad_norm": 3.093060476196578, "learning_rate": 1.0813159063184958e-05, "loss": 0.495, "step": 10408 }, { "epoch": 1.4697825473030217, "grad_norm": 3.852423528651246, "learning_rate": 1.081163964919674e-05, "loss": 0.5267, "step": 10409 }, { "epoch": 1.4699237503530076, "grad_norm": 3.302644399578199, "learning_rate": 1.0810120216346368e-05, "loss": 0.4996, "step": 10410 }, { "epoch": 1.4700649534029935, "grad_norm": 3.3701716090886054, "learning_rate": 1.0808600764669158e-05, "loss": 0.5387, "step": 10411 }, { "epoch": 1.4702061564529794, "grad_norm": 3.036843921020384, "learning_rate": 1.0807081294200413e-05, "loss": 0.5244, "step": 10412 }, { "epoch": 1.4703473595029652, "grad_norm": 3.4710049001811765, "learning_rate": 1.0805561804975443e-05, "loss": 0.5501, "step": 10413 }, { "epoch": 1.4704885625529511, "grad_norm": 3.5341725858566346, "learning_rate": 1.0804042297029567e-05, "loss": 0.6908, "step": 10414 }, { "epoch": 1.470629765602937, "grad_norm": 3.4580664701748196, "learning_rate": 1.0802522770398096e-05, "loss": 0.4999, "step": 10415 }, { "epoch": 1.470770968652923, "grad_norm": 3.6156021207366114, "learning_rate": 1.0801003225116341e-05, "loss": 0.7137, "step": 10416 }, { "epoch": 1.4709121717029088, "grad_norm": 3.620213872791416, "learning_rate": 1.0799483661219618e-05, "loss": 0.7032, "step": 10417 }, { "epoch": 1.4710533747528947, "grad_norm": 4.898069680793037, "learning_rate": 1.0797964078743241e-05, "loss": 0.6199, "step": 10418 }, { "epoch": 1.4711945778028805, "grad_norm": 4.200809871991145, "learning_rate": 1.0796444477722522e-05, "loss": 0.6432, "step": 10419 }, { "epoch": 1.4713357808528664, "grad_norm": 4.293409899271141, "learning_rate": 1.0794924858192779e-05, "loss": 0.6598, "step": 10420 }, { "epoch": 1.4714769839028523, "grad_norm": 3.5658779674426793, "learning_rate": 1.0793405220189321e-05, "loss": 0.5144, "step": 10421 }, { "epoch": 1.4716181869528382, "grad_norm": 3.884556252460006, "learning_rate": 1.0791885563747472e-05, "loss": 0.6591, "step": 10422 }, { "epoch": 1.471759390002824, "grad_norm": 3.6834960834569905, "learning_rate": 1.0790365888902548e-05, "loss": 0.6348, "step": 10423 }, { "epoch": 1.47190059305281, "grad_norm": 4.256157278418224, "learning_rate": 1.0788846195689856e-05, "loss": 0.7412, "step": 10424 }, { "epoch": 1.4720417961027958, "grad_norm": 3.162951852510564, "learning_rate": 1.078732648414472e-05, "loss": 0.4989, "step": 10425 }, { "epoch": 1.4721829991527817, "grad_norm": 3.5988432162041213, "learning_rate": 1.078580675430246e-05, "loss": 0.5783, "step": 10426 }, { "epoch": 1.4723242022027676, "grad_norm": 3.688778344833739, "learning_rate": 1.0784287006198386e-05, "loss": 0.5468, "step": 10427 }, { "epoch": 1.4724654052527535, "grad_norm": 3.418627873227131, "learning_rate": 1.0782767239867824e-05, "loss": 0.5452, "step": 10428 }, { "epoch": 1.4726066083027394, "grad_norm": 3.9318323875691275, "learning_rate": 1.078124745534609e-05, "loss": 0.5106, "step": 10429 }, { "epoch": 1.4727478113527253, "grad_norm": 2.9535645132165262, "learning_rate": 1.0779727652668496e-05, "loss": 0.4616, "step": 10430 }, { "epoch": 1.4728890144027111, "grad_norm": 3.4650808317254467, "learning_rate": 1.0778207831870375e-05, "loss": 0.6312, "step": 10431 }, { "epoch": 1.473030217452697, "grad_norm": 3.391749201837755, "learning_rate": 1.0776687992987038e-05, "loss": 0.5626, "step": 10432 }, { "epoch": 1.473171420502683, "grad_norm": 3.2243127522452775, "learning_rate": 1.0775168136053809e-05, "loss": 0.4784, "step": 10433 }, { "epoch": 1.4733126235526688, "grad_norm": 4.005976290688714, "learning_rate": 1.0773648261106005e-05, "loss": 0.6841, "step": 10434 }, { "epoch": 1.4734538266026547, "grad_norm": 3.960551467831488, "learning_rate": 1.0772128368178949e-05, "loss": 0.6163, "step": 10435 }, { "epoch": 1.4735950296526406, "grad_norm": 3.5547132592901702, "learning_rate": 1.0770608457307965e-05, "loss": 0.6315, "step": 10436 }, { "epoch": 1.4737362327026264, "grad_norm": 4.727722578441118, "learning_rate": 1.0769088528528373e-05, "loss": 0.7355, "step": 10437 }, { "epoch": 1.4738774357526123, "grad_norm": 3.9931113841292625, "learning_rate": 1.0767568581875494e-05, "loss": 0.5316, "step": 10438 }, { "epoch": 1.4740186388025982, "grad_norm": 3.3682292153760587, "learning_rate": 1.0766048617384654e-05, "loss": 0.5396, "step": 10439 }, { "epoch": 1.474159841852584, "grad_norm": 3.8446960816553046, "learning_rate": 1.0764528635091179e-05, "loss": 0.6224, "step": 10440 }, { "epoch": 1.47430104490257, "grad_norm": 3.913708299313398, "learning_rate": 1.076300863503038e-05, "loss": 0.6832, "step": 10441 }, { "epoch": 1.4744422479525559, "grad_norm": 3.1458983115580392, "learning_rate": 1.0761488617237597e-05, "loss": 0.4528, "step": 10442 }, { "epoch": 1.4745834510025417, "grad_norm": 4.977736567548205, "learning_rate": 1.0759968581748143e-05, "loss": 0.919, "step": 10443 }, { "epoch": 1.4747246540525276, "grad_norm": 3.882070019188596, "learning_rate": 1.075844852859735e-05, "loss": 0.5102, "step": 10444 }, { "epoch": 1.4748658571025133, "grad_norm": 3.212998954048273, "learning_rate": 1.075692845782054e-05, "loss": 0.6291, "step": 10445 }, { "epoch": 1.4750070601524992, "grad_norm": 4.20949430170499, "learning_rate": 1.075540836945304e-05, "loss": 0.6801, "step": 10446 }, { "epoch": 1.475148263202485, "grad_norm": 3.0995461694264277, "learning_rate": 1.0753888263530174e-05, "loss": 0.5119, "step": 10447 }, { "epoch": 1.475289466252471, "grad_norm": 3.837965917437043, "learning_rate": 1.0752368140087272e-05, "loss": 0.6213, "step": 10448 }, { "epoch": 1.4754306693024568, "grad_norm": 3.8250854350871415, "learning_rate": 1.0750847999159662e-05, "loss": 0.6223, "step": 10449 }, { "epoch": 1.4755718723524427, "grad_norm": 4.290464494944606, "learning_rate": 1.0749327840782663e-05, "loss": 0.6823, "step": 10450 }, { "epoch": 1.4757130754024286, "grad_norm": 3.973253194572791, "learning_rate": 1.0747807664991613e-05, "loss": 0.6312, "step": 10451 }, { "epoch": 1.4758542784524145, "grad_norm": 3.1632984312404133, "learning_rate": 1.0746287471821833e-05, "loss": 0.5967, "step": 10452 }, { "epoch": 1.4759954815024003, "grad_norm": 3.1598379459742887, "learning_rate": 1.0744767261308655e-05, "loss": 0.481, "step": 10453 }, { "epoch": 1.4761366845523862, "grad_norm": 3.6438554689759153, "learning_rate": 1.074324703348741e-05, "loss": 0.5884, "step": 10454 }, { "epoch": 1.476277887602372, "grad_norm": 3.5519157104017025, "learning_rate": 1.0741726788393422e-05, "loss": 0.5907, "step": 10455 }, { "epoch": 1.476419090652358, "grad_norm": 4.303627433455919, "learning_rate": 1.0740206526062022e-05, "loss": 0.8525, "step": 10456 }, { "epoch": 1.4765602937023439, "grad_norm": 3.2900217342551197, "learning_rate": 1.0738686246528549e-05, "loss": 0.5198, "step": 10457 }, { "epoch": 1.4767014967523298, "grad_norm": 3.7588471465778124, "learning_rate": 1.073716594982832e-05, "loss": 0.6257, "step": 10458 }, { "epoch": 1.4768426998023156, "grad_norm": 4.907918311461345, "learning_rate": 1.0735645635996676e-05, "loss": 0.7455, "step": 10459 }, { "epoch": 1.4769839028523015, "grad_norm": 3.619490274864713, "learning_rate": 1.0734125305068943e-05, "loss": 0.5059, "step": 10460 }, { "epoch": 1.4771251059022874, "grad_norm": 3.96297611982448, "learning_rate": 1.0732604957080458e-05, "loss": 0.5879, "step": 10461 }, { "epoch": 1.4772663089522733, "grad_norm": 4.242860233005695, "learning_rate": 1.0731084592066548e-05, "loss": 0.7557, "step": 10462 }, { "epoch": 1.4774075120022592, "grad_norm": 4.081686665614728, "learning_rate": 1.072956421006255e-05, "loss": 0.627, "step": 10463 }, { "epoch": 1.477548715052245, "grad_norm": 4.083272162312083, "learning_rate": 1.072804381110379e-05, "loss": 0.6534, "step": 10464 }, { "epoch": 1.477689918102231, "grad_norm": 3.709892848076743, "learning_rate": 1.072652339522561e-05, "loss": 0.5189, "step": 10465 }, { "epoch": 1.4778311211522168, "grad_norm": 3.428386960961124, "learning_rate": 1.072500296246334e-05, "loss": 0.6283, "step": 10466 }, { "epoch": 1.4779723242022027, "grad_norm": 3.720846284707587, "learning_rate": 1.0723482512852312e-05, "loss": 0.5575, "step": 10467 }, { "epoch": 1.4781135272521886, "grad_norm": 3.307112108843093, "learning_rate": 1.0721962046427866e-05, "loss": 0.5369, "step": 10468 }, { "epoch": 1.4782547303021745, "grad_norm": 3.6539954849903045, "learning_rate": 1.0720441563225333e-05, "loss": 0.575, "step": 10469 }, { "epoch": 1.4783959333521604, "grad_norm": 3.0968390958822902, "learning_rate": 1.0718921063280048e-05, "loss": 0.4234, "step": 10470 }, { "epoch": 1.4785371364021462, "grad_norm": 3.8720564908569424, "learning_rate": 1.0717400546627347e-05, "loss": 0.741, "step": 10471 }, { "epoch": 1.4786783394521321, "grad_norm": 3.8014641982390227, "learning_rate": 1.0715880013302568e-05, "loss": 0.668, "step": 10472 }, { "epoch": 1.478819542502118, "grad_norm": 3.2517846199681104, "learning_rate": 1.0714359463341047e-05, "loss": 0.5572, "step": 10473 }, { "epoch": 1.4789607455521039, "grad_norm": 3.3244556731199797, "learning_rate": 1.0712838896778124e-05, "loss": 0.5669, "step": 10474 }, { "epoch": 1.4791019486020898, "grad_norm": 3.420177805326652, "learning_rate": 1.0711318313649125e-05, "loss": 0.5046, "step": 10475 }, { "epoch": 1.4792431516520756, "grad_norm": 2.9662333024529124, "learning_rate": 1.0709797713989403e-05, "loss": 0.5026, "step": 10476 }, { "epoch": 1.4793843547020615, "grad_norm": 3.4940641445958645, "learning_rate": 1.0708277097834285e-05, "loss": 0.5016, "step": 10477 }, { "epoch": 1.4795255577520474, "grad_norm": 4.280897091987144, "learning_rate": 1.0706756465219114e-05, "loss": 0.6082, "step": 10478 }, { "epoch": 1.4796667608020333, "grad_norm": 3.3523169701562265, "learning_rate": 1.070523581617923e-05, "loss": 0.5188, "step": 10479 }, { "epoch": 1.4798079638520192, "grad_norm": 3.3310197952152523, "learning_rate": 1.0703715150749967e-05, "loss": 0.5789, "step": 10480 }, { "epoch": 1.479949166902005, "grad_norm": 3.990554209894295, "learning_rate": 1.0702194468966667e-05, "loss": 0.5855, "step": 10481 }, { "epoch": 1.480090369951991, "grad_norm": 3.67082440346671, "learning_rate": 1.0700673770864673e-05, "loss": 0.5599, "step": 10482 }, { "epoch": 1.4802315730019768, "grad_norm": 3.396742924243031, "learning_rate": 1.0699153056479326e-05, "loss": 0.5582, "step": 10483 }, { "epoch": 1.4803727760519627, "grad_norm": 2.912900201529329, "learning_rate": 1.069763232584596e-05, "loss": 0.4813, "step": 10484 }, { "epoch": 1.4805139791019486, "grad_norm": 2.9239851619140294, "learning_rate": 1.069611157899992e-05, "loss": 0.4996, "step": 10485 }, { "epoch": 1.4806551821519345, "grad_norm": 3.389065766287909, "learning_rate": 1.0694590815976549e-05, "loss": 0.5679, "step": 10486 }, { "epoch": 1.4807963852019204, "grad_norm": 2.881672226789623, "learning_rate": 1.0693070036811187e-05, "loss": 0.4959, "step": 10487 }, { "epoch": 1.4809375882519062, "grad_norm": 4.609545738960422, "learning_rate": 1.0691549241539177e-05, "loss": 0.6994, "step": 10488 }, { "epoch": 1.4810787913018921, "grad_norm": 2.410051903462418, "learning_rate": 1.069002843019586e-05, "loss": 0.4109, "step": 10489 }, { "epoch": 1.481219994351878, "grad_norm": 3.5991031829862843, "learning_rate": 1.0688507602816581e-05, "loss": 0.6699, "step": 10490 }, { "epoch": 1.481361197401864, "grad_norm": 3.1026943708245254, "learning_rate": 1.0686986759436684e-05, "loss": 0.4499, "step": 10491 }, { "epoch": 1.4815024004518498, "grad_norm": 3.3735279823470092, "learning_rate": 1.068546590009151e-05, "loss": 0.5906, "step": 10492 }, { "epoch": 1.4816436035018357, "grad_norm": 3.7321381186528106, "learning_rate": 1.0683945024816403e-05, "loss": 0.6402, "step": 10493 }, { "epoch": 1.4817848065518215, "grad_norm": 3.164104625445981, "learning_rate": 1.0682424133646712e-05, "loss": 0.5077, "step": 10494 }, { "epoch": 1.4819260096018074, "grad_norm": 3.6875769512235554, "learning_rate": 1.0680903226617776e-05, "loss": 0.5717, "step": 10495 }, { "epoch": 1.4820672126517933, "grad_norm": 3.6813699426432644, "learning_rate": 1.0679382303764945e-05, "loss": 0.5197, "step": 10496 }, { "epoch": 1.4822084157017792, "grad_norm": 3.9876994534708494, "learning_rate": 1.0677861365123564e-05, "loss": 0.5978, "step": 10497 }, { "epoch": 1.482349618751765, "grad_norm": 3.010261162378477, "learning_rate": 1.0676340410728976e-05, "loss": 0.4811, "step": 10498 }, { "epoch": 1.482490821801751, "grad_norm": 4.906393482702666, "learning_rate": 1.0674819440616526e-05, "loss": 0.8318, "step": 10499 }, { "epoch": 1.4826320248517368, "grad_norm": 4.267711930911907, "learning_rate": 1.0673298454821567e-05, "loss": 0.7232, "step": 10500 }, { "epoch": 1.4827732279017227, "grad_norm": 3.5744974265662965, "learning_rate": 1.0671777453379442e-05, "loss": 0.5965, "step": 10501 }, { "epoch": 1.4829144309517086, "grad_norm": 3.9434576297083326, "learning_rate": 1.0670256436325499e-05, "loss": 0.608, "step": 10502 }, { "epoch": 1.4830556340016945, "grad_norm": 4.469037055151412, "learning_rate": 1.0668735403695087e-05, "loss": 0.6079, "step": 10503 }, { "epoch": 1.4831968370516804, "grad_norm": 2.8797954872366858, "learning_rate": 1.0667214355523552e-05, "loss": 0.4407, "step": 10504 }, { "epoch": 1.4833380401016663, "grad_norm": 3.6042185703915712, "learning_rate": 1.0665693291846245e-05, "loss": 0.4911, "step": 10505 }, { "epoch": 1.4834792431516521, "grad_norm": 3.213877548022614, "learning_rate": 1.0664172212698512e-05, "loss": 0.5408, "step": 10506 }, { "epoch": 1.483620446201638, "grad_norm": 3.8103328697729224, "learning_rate": 1.0662651118115702e-05, "loss": 0.613, "step": 10507 }, { "epoch": 1.483761649251624, "grad_norm": 4.974678034431828, "learning_rate": 1.0661130008133169e-05, "loss": 0.7566, "step": 10508 }, { "epoch": 1.4839028523016098, "grad_norm": 3.9442237345619007, "learning_rate": 1.065960888278626e-05, "loss": 0.6382, "step": 10509 }, { "epoch": 1.4840440553515957, "grad_norm": 3.692718608417279, "learning_rate": 1.0658087742110322e-05, "loss": 0.6561, "step": 10510 }, { "epoch": 1.4841852584015816, "grad_norm": 3.609423864713247, "learning_rate": 1.065656658614071e-05, "loss": 0.5949, "step": 10511 }, { "epoch": 1.4843264614515674, "grad_norm": 3.0310862451438916, "learning_rate": 1.0655045414912777e-05, "loss": 0.5409, "step": 10512 }, { "epoch": 1.4844676645015533, "grad_norm": 4.711655444936806, "learning_rate": 1.0653524228461872e-05, "loss": 0.6625, "step": 10513 }, { "epoch": 1.4846088675515392, "grad_norm": 3.563533486167081, "learning_rate": 1.0652003026823344e-05, "loss": 0.5819, "step": 10514 }, { "epoch": 1.484750070601525, "grad_norm": 3.984902110079651, "learning_rate": 1.0650481810032546e-05, "loss": 0.6191, "step": 10515 }, { "epoch": 1.484891273651511, "grad_norm": 3.981829053893209, "learning_rate": 1.0648960578124831e-05, "loss": 0.6829, "step": 10516 }, { "epoch": 1.4850324767014969, "grad_norm": 4.071376510192055, "learning_rate": 1.0647439331135558e-05, "loss": 0.6931, "step": 10517 }, { "epoch": 1.4851736797514827, "grad_norm": 3.5497722139733314, "learning_rate": 1.064591806910007e-05, "loss": 0.6207, "step": 10518 }, { "epoch": 1.4853148828014686, "grad_norm": 3.362185798810174, "learning_rate": 1.0644396792053726e-05, "loss": 0.5832, "step": 10519 }, { "epoch": 1.4854560858514545, "grad_norm": 3.2524972429992722, "learning_rate": 1.0642875500031878e-05, "loss": 0.5468, "step": 10520 }, { "epoch": 1.4855972889014404, "grad_norm": 4.178699384834525, "learning_rate": 1.0641354193069882e-05, "loss": 0.6027, "step": 10521 }, { "epoch": 1.4857384919514263, "grad_norm": 3.1613355700054173, "learning_rate": 1.0639832871203094e-05, "loss": 0.5737, "step": 10522 }, { "epoch": 1.4858796950014121, "grad_norm": 3.224470206750618, "learning_rate": 1.0638311534466863e-05, "loss": 0.4917, "step": 10523 }, { "epoch": 1.486020898051398, "grad_norm": 3.4384606481890554, "learning_rate": 1.0636790182896545e-05, "loss": 0.5415, "step": 10524 }, { "epoch": 1.486162101101384, "grad_norm": 4.142255246534022, "learning_rate": 1.0635268816527505e-05, "loss": 0.5992, "step": 10525 }, { "epoch": 1.4863033041513698, "grad_norm": 3.152311809667922, "learning_rate": 1.063374743539509e-05, "loss": 0.4517, "step": 10526 }, { "epoch": 1.4864445072013557, "grad_norm": 4.249334141775024, "learning_rate": 1.0632226039534654e-05, "loss": 0.6566, "step": 10527 }, { "epoch": 1.4865857102513416, "grad_norm": 3.300371622912966, "learning_rate": 1.0630704628981561e-05, "loss": 0.4933, "step": 10528 }, { "epoch": 1.4867269133013274, "grad_norm": 3.5280982672157193, "learning_rate": 1.0629183203771167e-05, "loss": 0.6282, "step": 10529 }, { "epoch": 1.486868116351313, "grad_norm": 3.9558345275493707, "learning_rate": 1.0627661763938824e-05, "loss": 0.6994, "step": 10530 }, { "epoch": 1.487009319401299, "grad_norm": 3.7516673550226156, "learning_rate": 1.0626140309519892e-05, "loss": 0.6037, "step": 10531 }, { "epoch": 1.4871505224512849, "grad_norm": 3.4690088687965113, "learning_rate": 1.0624618840549732e-05, "loss": 0.5341, "step": 10532 }, { "epoch": 1.4872917255012708, "grad_norm": 3.541605509417699, "learning_rate": 1.0623097357063696e-05, "loss": 0.5196, "step": 10533 }, { "epoch": 1.4874329285512566, "grad_norm": 2.7597660674418774, "learning_rate": 1.0621575859097153e-05, "loss": 0.4097, "step": 10534 }, { "epoch": 1.4875741316012425, "grad_norm": 3.469382531640144, "learning_rate": 1.0620054346685448e-05, "loss": 0.5662, "step": 10535 }, { "epoch": 1.4877153346512284, "grad_norm": 3.739270298136029, "learning_rate": 1.0618532819863953e-05, "loss": 0.704, "step": 10536 }, { "epoch": 1.4878565377012143, "grad_norm": 3.858222073387154, "learning_rate": 1.061701127866802e-05, "loss": 0.6634, "step": 10537 }, { "epoch": 1.4879977407512002, "grad_norm": 3.8498646993296055, "learning_rate": 1.0615489723133015e-05, "loss": 0.6146, "step": 10538 }, { "epoch": 1.488138943801186, "grad_norm": 3.526163611950601, "learning_rate": 1.0613968153294291e-05, "loss": 0.6239, "step": 10539 }, { "epoch": 1.488280146851172, "grad_norm": 3.53467241320696, "learning_rate": 1.0612446569187214e-05, "loss": 0.5695, "step": 10540 }, { "epoch": 1.4884213499011578, "grad_norm": 3.097588675485939, "learning_rate": 1.061092497084714e-05, "loss": 0.469, "step": 10541 }, { "epoch": 1.4885625529511437, "grad_norm": 2.934146884734023, "learning_rate": 1.060940335830944e-05, "loss": 0.3585, "step": 10542 }, { "epoch": 1.4887037560011296, "grad_norm": 3.501140792415015, "learning_rate": 1.0607881731609464e-05, "loss": 0.7052, "step": 10543 }, { "epoch": 1.4888449590511155, "grad_norm": 3.521163004322675, "learning_rate": 1.0606360090782578e-05, "loss": 0.613, "step": 10544 }, { "epoch": 1.4889861621011014, "grad_norm": 3.584719252702429, "learning_rate": 1.0604838435864148e-05, "loss": 0.6691, "step": 10545 }, { "epoch": 1.4891273651510872, "grad_norm": 3.5341102046622868, "learning_rate": 1.0603316766889537e-05, "loss": 0.4873, "step": 10546 }, { "epoch": 1.4892685682010731, "grad_norm": 3.4630980682588586, "learning_rate": 1.0601795083894099e-05, "loss": 0.6669, "step": 10547 }, { "epoch": 1.489409771251059, "grad_norm": 3.870437461386357, "learning_rate": 1.0600273386913207e-05, "loss": 0.5526, "step": 10548 }, { "epoch": 1.4895509743010449, "grad_norm": 2.8343538965472956, "learning_rate": 1.059875167598222e-05, "loss": 0.4418, "step": 10549 }, { "epoch": 1.4896921773510308, "grad_norm": 4.138167662168074, "learning_rate": 1.0597229951136498e-05, "loss": 0.6783, "step": 10550 }, { "epoch": 1.4898333804010166, "grad_norm": 4.382081062055031, "learning_rate": 1.0595708212411417e-05, "loss": 0.7501, "step": 10551 }, { "epoch": 1.4899745834510025, "grad_norm": 2.7831376007897624, "learning_rate": 1.0594186459842333e-05, "loss": 0.4601, "step": 10552 }, { "epoch": 1.4901157865009884, "grad_norm": 3.090918659312495, "learning_rate": 1.0592664693464608e-05, "loss": 0.4574, "step": 10553 }, { "epoch": 1.4902569895509743, "grad_norm": 3.9146949039389036, "learning_rate": 1.0591142913313615e-05, "loss": 0.6319, "step": 10554 }, { "epoch": 1.4903981926009602, "grad_norm": 3.112110161441283, "learning_rate": 1.0589621119424714e-05, "loss": 0.4477, "step": 10555 }, { "epoch": 1.490539395650946, "grad_norm": 3.3978813275955124, "learning_rate": 1.0588099311833275e-05, "loss": 0.5037, "step": 10556 }, { "epoch": 1.490680598700932, "grad_norm": 3.773390049071862, "learning_rate": 1.0586577490574661e-05, "loss": 0.6361, "step": 10557 }, { "epoch": 1.4908218017509178, "grad_norm": 3.06915715140273, "learning_rate": 1.058505565568424e-05, "loss": 0.5276, "step": 10558 }, { "epoch": 1.4909630048009037, "grad_norm": 4.021270209815351, "learning_rate": 1.0583533807197377e-05, "loss": 0.5874, "step": 10559 }, { "epoch": 1.4911042078508896, "grad_norm": 2.768638649273078, "learning_rate": 1.058201194514944e-05, "loss": 0.4758, "step": 10560 }, { "epoch": 1.4912454109008755, "grad_norm": 3.5384790708016354, "learning_rate": 1.0580490069575795e-05, "loss": 0.5344, "step": 10561 }, { "epoch": 1.4913866139508614, "grad_norm": 3.0696366877638446, "learning_rate": 1.0578968180511815e-05, "loss": 0.4356, "step": 10562 }, { "epoch": 1.4915278170008472, "grad_norm": 3.6301349671201004, "learning_rate": 1.0577446277992866e-05, "loss": 0.5945, "step": 10563 }, { "epoch": 1.4916690200508331, "grad_norm": 2.794563585015319, "learning_rate": 1.057592436205431e-05, "loss": 0.476, "step": 10564 }, { "epoch": 1.491810223100819, "grad_norm": 3.0788590557984254, "learning_rate": 1.0574402432731523e-05, "loss": 0.5144, "step": 10565 }, { "epoch": 1.491951426150805, "grad_norm": 3.3632599451410687, "learning_rate": 1.0572880490059874e-05, "loss": 0.6204, "step": 10566 }, { "epoch": 1.4920926292007908, "grad_norm": 3.1286485872944927, "learning_rate": 1.0571358534074724e-05, "loss": 0.4443, "step": 10567 }, { "epoch": 1.4922338322507767, "grad_norm": 3.8319576452970083, "learning_rate": 1.0569836564811456e-05, "loss": 0.5307, "step": 10568 }, { "epoch": 1.4923750353007625, "grad_norm": 3.4182815908947695, "learning_rate": 1.0568314582305427e-05, "loss": 0.5941, "step": 10569 }, { "epoch": 1.4925162383507484, "grad_norm": 3.333577685718195, "learning_rate": 1.0566792586592012e-05, "loss": 0.5171, "step": 10570 }, { "epoch": 1.4926574414007343, "grad_norm": 3.5826923222589113, "learning_rate": 1.0565270577706584e-05, "loss": 0.5951, "step": 10571 }, { "epoch": 1.4927986444507202, "grad_norm": 3.606696533119227, "learning_rate": 1.0563748555684511e-05, "loss": 0.5473, "step": 10572 }, { "epoch": 1.492939847500706, "grad_norm": 4.272267248304502, "learning_rate": 1.0562226520561165e-05, "loss": 0.6211, "step": 10573 }, { "epoch": 1.493081050550692, "grad_norm": 3.924272104515598, "learning_rate": 1.0560704472371919e-05, "loss": 0.6349, "step": 10574 }, { "epoch": 1.4932222536006778, "grad_norm": 4.814539056416804, "learning_rate": 1.0559182411152142e-05, "loss": 0.6481, "step": 10575 }, { "epoch": 1.4933634566506637, "grad_norm": 3.471217405949264, "learning_rate": 1.0557660336937207e-05, "loss": 0.5457, "step": 10576 }, { "epoch": 1.4935046597006496, "grad_norm": 3.3181901563755503, "learning_rate": 1.0556138249762489e-05, "loss": 0.558, "step": 10577 }, { "epoch": 1.4936458627506355, "grad_norm": 3.495875702061927, "learning_rate": 1.0554616149663355e-05, "loss": 0.5614, "step": 10578 }, { "epoch": 1.4937870658006214, "grad_norm": 3.79344435268433, "learning_rate": 1.0553094036675182e-05, "loss": 0.6417, "step": 10579 }, { "epoch": 1.4939282688506073, "grad_norm": 3.5029799787392366, "learning_rate": 1.0551571910833344e-05, "loss": 0.5227, "step": 10580 }, { "epoch": 1.494069471900593, "grad_norm": 3.2333756950404147, "learning_rate": 1.0550049772173212e-05, "loss": 0.6126, "step": 10581 }, { "epoch": 1.4942106749505788, "grad_norm": 3.186463549849212, "learning_rate": 1.054852762073016e-05, "loss": 0.548, "step": 10582 }, { "epoch": 1.4943518780005647, "grad_norm": 3.961973801370825, "learning_rate": 1.0547005456539565e-05, "loss": 0.632, "step": 10583 }, { "epoch": 1.4944930810505506, "grad_norm": 3.4646340030379768, "learning_rate": 1.0545483279636799e-05, "loss": 0.5701, "step": 10584 }, { "epoch": 1.4946342841005364, "grad_norm": 3.1896558800902497, "learning_rate": 1.0543961090057237e-05, "loss": 0.5411, "step": 10585 }, { "epoch": 1.4947754871505223, "grad_norm": 3.1777557775516816, "learning_rate": 1.0542438887836252e-05, "loss": 0.4788, "step": 10586 }, { "epoch": 1.4949166902005082, "grad_norm": 3.3341202155699583, "learning_rate": 1.0540916673009223e-05, "loss": 0.517, "step": 10587 }, { "epoch": 1.495057893250494, "grad_norm": 3.65968268419147, "learning_rate": 1.0539394445611524e-05, "loss": 0.5962, "step": 10588 }, { "epoch": 1.49519909630048, "grad_norm": 3.833366955029374, "learning_rate": 1.0537872205678534e-05, "loss": 0.5776, "step": 10589 }, { "epoch": 1.4953402993504659, "grad_norm": 3.6204114151800395, "learning_rate": 1.0536349953245622e-05, "loss": 0.6622, "step": 10590 }, { "epoch": 1.4954815024004517, "grad_norm": 3.2003246043875304, "learning_rate": 1.053482768834817e-05, "loss": 0.5535, "step": 10591 }, { "epoch": 1.4956227054504376, "grad_norm": 3.5617696632607303, "learning_rate": 1.0533305411021555e-05, "loss": 0.5234, "step": 10592 }, { "epoch": 1.4957639085004235, "grad_norm": 3.2962863330580103, "learning_rate": 1.053178312130115e-05, "loss": 0.5383, "step": 10593 }, { "epoch": 1.4959051115504094, "grad_norm": 3.6471945286242864, "learning_rate": 1.0530260819222337e-05, "loss": 0.5807, "step": 10594 }, { "epoch": 1.4960463146003953, "grad_norm": 4.782929583496178, "learning_rate": 1.052873850482049e-05, "loss": 0.8106, "step": 10595 }, { "epoch": 1.4961875176503812, "grad_norm": 3.2727013839818557, "learning_rate": 1.0527216178130988e-05, "loss": 0.5404, "step": 10596 }, { "epoch": 1.496328720700367, "grad_norm": 3.069427279307464, "learning_rate": 1.0525693839189215e-05, "loss": 0.5051, "step": 10597 }, { "epoch": 1.496469923750353, "grad_norm": 4.166701709181684, "learning_rate": 1.0524171488030537e-05, "loss": 0.7346, "step": 10598 }, { "epoch": 1.4966111268003388, "grad_norm": 3.5052762460286657, "learning_rate": 1.0522649124690343e-05, "loss": 0.6641, "step": 10599 }, { "epoch": 1.4967523298503247, "grad_norm": 3.707020070244611, "learning_rate": 1.0521126749204009e-05, "loss": 0.6915, "step": 10600 }, { "epoch": 1.4968935329003106, "grad_norm": 2.9297796798064337, "learning_rate": 1.0519604361606916e-05, "loss": 0.4861, "step": 10601 }, { "epoch": 1.4970347359502965, "grad_norm": 4.386256048133974, "learning_rate": 1.051808196193444e-05, "loss": 0.7643, "step": 10602 }, { "epoch": 1.4971759390002823, "grad_norm": 4.059441242684399, "learning_rate": 1.0516559550221965e-05, "loss": 0.6182, "step": 10603 }, { "epoch": 1.4973171420502682, "grad_norm": 3.2807785678543113, "learning_rate": 1.0515037126504865e-05, "loss": 0.5311, "step": 10604 }, { "epoch": 1.497458345100254, "grad_norm": 3.3128275430459504, "learning_rate": 1.0513514690818529e-05, "loss": 0.5556, "step": 10605 }, { "epoch": 1.49759954815024, "grad_norm": 3.1836646885535798, "learning_rate": 1.0511992243198335e-05, "loss": 0.5238, "step": 10606 }, { "epoch": 1.4977407512002259, "grad_norm": 3.1745483141096114, "learning_rate": 1.0510469783679656e-05, "loss": 0.6052, "step": 10607 }, { "epoch": 1.4978819542502118, "grad_norm": 3.6747624398198466, "learning_rate": 1.0508947312297884e-05, "loss": 0.6404, "step": 10608 }, { "epoch": 1.4980231573001976, "grad_norm": 3.645298779581951, "learning_rate": 1.0507424829088394e-05, "loss": 0.5757, "step": 10609 }, { "epoch": 1.4981643603501835, "grad_norm": 3.8629791611481843, "learning_rate": 1.050590233408657e-05, "loss": 0.6609, "step": 10610 }, { "epoch": 1.4983055634001694, "grad_norm": 3.441691936240074, "learning_rate": 1.0504379827327798e-05, "loss": 0.6546, "step": 10611 }, { "epoch": 1.4984467664501553, "grad_norm": 3.6738136341037984, "learning_rate": 1.0502857308847453e-05, "loss": 0.4493, "step": 10612 }, { "epoch": 1.4985879695001412, "grad_norm": 3.5991797639020806, "learning_rate": 1.050133477868092e-05, "loss": 0.5777, "step": 10613 }, { "epoch": 1.498729172550127, "grad_norm": 3.0655042448320757, "learning_rate": 1.0499812236863589e-05, "loss": 0.5081, "step": 10614 }, { "epoch": 1.498870375600113, "grad_norm": 4.561699371880219, "learning_rate": 1.0498289683430831e-05, "loss": 0.5682, "step": 10615 }, { "epoch": 1.4990115786500988, "grad_norm": 3.3813446997734298, "learning_rate": 1.049676711841804e-05, "loss": 0.5261, "step": 10616 }, { "epoch": 1.4991527817000847, "grad_norm": 3.3080989600267374, "learning_rate": 1.0495244541860596e-05, "loss": 0.5053, "step": 10617 }, { "epoch": 1.4992939847500706, "grad_norm": 2.665709672655359, "learning_rate": 1.0493721953793881e-05, "loss": 0.418, "step": 10618 }, { "epoch": 1.4994351878000565, "grad_norm": 3.827623321147945, "learning_rate": 1.0492199354253283e-05, "loss": 0.6225, "step": 10619 }, { "epoch": 1.4995763908500424, "grad_norm": 3.7877647057735744, "learning_rate": 1.0490676743274181e-05, "loss": 0.6504, "step": 10620 }, { "epoch": 1.4997175939000282, "grad_norm": 3.230238534325567, "learning_rate": 1.0489154120891965e-05, "loss": 0.5249, "step": 10621 }, { "epoch": 1.4998587969500141, "grad_norm": 3.392045831094418, "learning_rate": 1.0487631487142018e-05, "loss": 0.5686, "step": 10622 }, { "epoch": 1.5, "grad_norm": 2.857943237891044, "learning_rate": 1.048610884205973e-05, "loss": 0.4656, "step": 10623 }, { "epoch": 1.5001412030499859, "grad_norm": 4.6928547385105635, "learning_rate": 1.0484586185680477e-05, "loss": 0.7959, "step": 10624 }, { "epoch": 1.5002824060999718, "grad_norm": 3.423783846338795, "learning_rate": 1.0483063518039653e-05, "loss": 0.5076, "step": 10625 }, { "epoch": 1.5004236091499576, "grad_norm": 3.7261336177651576, "learning_rate": 1.0481540839172641e-05, "loss": 0.5955, "step": 10626 }, { "epoch": 1.5005648121999435, "grad_norm": 3.9315293077384283, "learning_rate": 1.0480018149114828e-05, "loss": 0.6742, "step": 10627 }, { "epoch": 1.5007060152499294, "grad_norm": 3.9276241539481656, "learning_rate": 1.04784954479016e-05, "loss": 0.6066, "step": 10628 }, { "epoch": 1.5008472182999153, "grad_norm": 3.9673050975905384, "learning_rate": 1.0476972735568348e-05, "loss": 0.6024, "step": 10629 }, { "epoch": 1.5009884213499012, "grad_norm": 3.8060648418372462, "learning_rate": 1.0475450012150447e-05, "loss": 0.6438, "step": 10630 }, { "epoch": 1.501129624399887, "grad_norm": 3.7506308333558116, "learning_rate": 1.0473927277683303e-05, "loss": 0.6007, "step": 10631 }, { "epoch": 1.501270827449873, "grad_norm": 3.3662130918335054, "learning_rate": 1.0472404532202289e-05, "loss": 0.6333, "step": 10632 }, { "epoch": 1.5014120304998588, "grad_norm": 3.2741674350262664, "learning_rate": 1.0470881775742797e-05, "loss": 0.6102, "step": 10633 }, { "epoch": 1.5015532335498447, "grad_norm": 4.414556425691292, "learning_rate": 1.0469359008340216e-05, "loss": 0.5617, "step": 10634 }, { "epoch": 1.5016944365998306, "grad_norm": 3.475918923481075, "learning_rate": 1.0467836230029935e-05, "loss": 0.6183, "step": 10635 }, { "epoch": 1.5018356396498165, "grad_norm": 2.85522448221672, "learning_rate": 1.0466313440847343e-05, "loss": 0.4082, "step": 10636 }, { "epoch": 1.5019768426998024, "grad_norm": 3.1537581448977745, "learning_rate": 1.0464790640827827e-05, "loss": 0.5418, "step": 10637 }, { "epoch": 1.5021180457497882, "grad_norm": 3.072649167395925, "learning_rate": 1.0463267830006779e-05, "loss": 0.5372, "step": 10638 }, { "epoch": 1.5022592487997741, "grad_norm": 3.441337696152202, "learning_rate": 1.0461745008419582e-05, "loss": 0.6349, "step": 10639 }, { "epoch": 1.50240045184976, "grad_norm": 3.7221162697217087, "learning_rate": 1.0460222176101635e-05, "loss": 0.5879, "step": 10640 }, { "epoch": 1.502541654899746, "grad_norm": 3.4432384111094416, "learning_rate": 1.045869933308832e-05, "loss": 0.4924, "step": 10641 }, { "epoch": 1.5026828579497318, "grad_norm": 2.974841552635477, "learning_rate": 1.0457176479415034e-05, "loss": 0.4738, "step": 10642 }, { "epoch": 1.5028240609997177, "grad_norm": 3.1311169313746827, "learning_rate": 1.0455653615117163e-05, "loss": 0.4812, "step": 10643 }, { "epoch": 1.5029652640497035, "grad_norm": 3.3526803486148715, "learning_rate": 1.0454130740230098e-05, "loss": 0.541, "step": 10644 }, { "epoch": 1.5031064670996894, "grad_norm": 4.206735411906454, "learning_rate": 1.0452607854789231e-05, "loss": 0.5845, "step": 10645 }, { "epoch": 1.5032476701496753, "grad_norm": 4.176821995496634, "learning_rate": 1.0451084958829953e-05, "loss": 0.5541, "step": 10646 }, { "epoch": 1.5033888731996612, "grad_norm": 2.9943205452987294, "learning_rate": 1.0449562052387655e-05, "loss": 0.4235, "step": 10647 }, { "epoch": 1.503530076249647, "grad_norm": 4.077517394534269, "learning_rate": 1.0448039135497732e-05, "loss": 0.6591, "step": 10648 }, { "epoch": 1.503671279299633, "grad_norm": 4.988747574571694, "learning_rate": 1.044651620819557e-05, "loss": 0.7839, "step": 10649 }, { "epoch": 1.5038124823496188, "grad_norm": 3.2462356871546465, "learning_rate": 1.0444993270516562e-05, "loss": 0.5089, "step": 10650 }, { "epoch": 1.5039536853996047, "grad_norm": 3.7188883245116844, "learning_rate": 1.0443470322496106e-05, "loss": 0.5665, "step": 10651 }, { "epoch": 1.5040948884495906, "grad_norm": 3.4114700290368454, "learning_rate": 1.044194736416959e-05, "loss": 0.5231, "step": 10652 }, { "epoch": 1.5042360914995765, "grad_norm": 3.701160540621693, "learning_rate": 1.0440424395572408e-05, "loss": 0.5086, "step": 10653 }, { "epoch": 1.5043772945495624, "grad_norm": 3.474808920520696, "learning_rate": 1.0438901416739955e-05, "loss": 0.5573, "step": 10654 }, { "epoch": 1.5045184975995483, "grad_norm": 3.6405132407606033, "learning_rate": 1.0437378427707622e-05, "loss": 0.5435, "step": 10655 }, { "epoch": 1.5046597006495341, "grad_norm": 3.9930381232853565, "learning_rate": 1.04358554285108e-05, "loss": 0.6188, "step": 10656 }, { "epoch": 1.50480090369952, "grad_norm": 3.308892594305775, "learning_rate": 1.0434332419184891e-05, "loss": 0.506, "step": 10657 }, { "epoch": 1.504942106749506, "grad_norm": 3.5007520349967893, "learning_rate": 1.0432809399765281e-05, "loss": 0.5797, "step": 10658 }, { "epoch": 1.5050833097994918, "grad_norm": 3.4569411015149925, "learning_rate": 1.0431286370287368e-05, "loss": 0.5283, "step": 10659 }, { "epoch": 1.5052245128494777, "grad_norm": 3.7894744553181847, "learning_rate": 1.0429763330786546e-05, "loss": 0.5729, "step": 10660 }, { "epoch": 1.5053657158994636, "grad_norm": 3.064652749060904, "learning_rate": 1.042824028129821e-05, "loss": 0.4497, "step": 10661 }, { "epoch": 1.5055069189494494, "grad_norm": 4.126195711916415, "learning_rate": 1.0426717221857756e-05, "loss": 0.6448, "step": 10662 }, { "epoch": 1.5056481219994353, "grad_norm": 3.454300648568431, "learning_rate": 1.0425194152500578e-05, "loss": 0.4972, "step": 10663 }, { "epoch": 1.5057893250494212, "grad_norm": 3.5344410392032835, "learning_rate": 1.0423671073262067e-05, "loss": 0.6495, "step": 10664 }, { "epoch": 1.505930528099407, "grad_norm": 3.1513981312978427, "learning_rate": 1.042214798417763e-05, "loss": 0.5719, "step": 10665 }, { "epoch": 1.506071731149393, "grad_norm": 4.478435747627044, "learning_rate": 1.0420624885282653e-05, "loss": 0.6894, "step": 10666 }, { "epoch": 1.5062129341993789, "grad_norm": 3.505095033365735, "learning_rate": 1.0419101776612533e-05, "loss": 0.4854, "step": 10667 }, { "epoch": 1.5063541372493647, "grad_norm": 3.8911004716751876, "learning_rate": 1.0417578658202672e-05, "loss": 0.6173, "step": 10668 }, { "epoch": 1.5064953402993506, "grad_norm": 4.137518356080205, "learning_rate": 1.0416055530088462e-05, "loss": 0.6059, "step": 10669 }, { "epoch": 1.5066365433493365, "grad_norm": 3.663486816786475, "learning_rate": 1.0414532392305301e-05, "loss": 0.6695, "step": 10670 }, { "epoch": 1.5067777463993224, "grad_norm": 3.7672204183016205, "learning_rate": 1.0413009244888589e-05, "loss": 0.685, "step": 10671 }, { "epoch": 1.5069189494493083, "grad_norm": 2.6557741288718515, "learning_rate": 1.0411486087873717e-05, "loss": 0.4203, "step": 10672 }, { "epoch": 1.5070601524992941, "grad_norm": 3.7998675235143735, "learning_rate": 1.0409962921296086e-05, "loss": 0.4974, "step": 10673 }, { "epoch": 1.50720135554928, "grad_norm": 3.547406605501481, "learning_rate": 1.0408439745191096e-05, "loss": 0.5546, "step": 10674 }, { "epoch": 1.5073425585992657, "grad_norm": 3.7321021220746395, "learning_rate": 1.040691655959414e-05, "loss": 0.6021, "step": 10675 }, { "epoch": 1.5074837616492516, "grad_norm": 2.842238427103037, "learning_rate": 1.0405393364540618e-05, "loss": 0.4843, "step": 10676 }, { "epoch": 1.5076249646992375, "grad_norm": 3.934729011705652, "learning_rate": 1.0403870160065934e-05, "loss": 0.5095, "step": 10677 }, { "epoch": 1.5077661677492233, "grad_norm": 3.758741606697827, "learning_rate": 1.040234694620548e-05, "loss": 0.6459, "step": 10678 }, { "epoch": 1.5079073707992092, "grad_norm": 3.095702712973342, "learning_rate": 1.0400823722994657e-05, "loss": 0.4836, "step": 10679 }, { "epoch": 1.508048573849195, "grad_norm": 3.4551705566821176, "learning_rate": 1.0399300490468862e-05, "loss": 0.543, "step": 10680 }, { "epoch": 1.508189776899181, "grad_norm": 2.9819411028647838, "learning_rate": 1.0397777248663497e-05, "loss": 0.4668, "step": 10681 }, { "epoch": 1.5083309799491669, "grad_norm": 3.7547037972937685, "learning_rate": 1.0396253997613964e-05, "loss": 0.5888, "step": 10682 }, { "epoch": 1.5084721829991528, "grad_norm": 2.965033880619661, "learning_rate": 1.0394730737355655e-05, "loss": 0.4888, "step": 10683 }, { "epoch": 1.5086133860491386, "grad_norm": 3.455883602796061, "learning_rate": 1.0393207467923973e-05, "loss": 0.616, "step": 10684 }, { "epoch": 1.5087545890991245, "grad_norm": 4.266089654990721, "learning_rate": 1.039168418935432e-05, "loss": 0.7192, "step": 10685 }, { "epoch": 1.5088957921491104, "grad_norm": 3.8291436483811156, "learning_rate": 1.03901609016821e-05, "loss": 0.5379, "step": 10686 }, { "epoch": 1.5090369951990963, "grad_norm": 4.250646272389876, "learning_rate": 1.0388637604942707e-05, "loss": 0.6041, "step": 10687 }, { "epoch": 1.5091781982490822, "grad_norm": 3.3577096684394387, "learning_rate": 1.0387114299171541e-05, "loss": 0.539, "step": 10688 }, { "epoch": 1.509319401299068, "grad_norm": 3.39162793233433, "learning_rate": 1.0385590984404009e-05, "loss": 0.4743, "step": 10689 }, { "epoch": 1.509460604349054, "grad_norm": 3.740006335034745, "learning_rate": 1.0384067660675508e-05, "loss": 0.6054, "step": 10690 }, { "epoch": 1.5096018073990398, "grad_norm": 4.470842556156996, "learning_rate": 1.038254432802144e-05, "loss": 0.8593, "step": 10691 }, { "epoch": 1.5097430104490257, "grad_norm": 4.002830596222397, "learning_rate": 1.0381020986477209e-05, "loss": 0.6929, "step": 10692 }, { "epoch": 1.5098842134990116, "grad_norm": 3.9615154186449586, "learning_rate": 1.037949763607821e-05, "loss": 0.5939, "step": 10693 }, { "epoch": 1.5100254165489975, "grad_norm": 3.3118922781390547, "learning_rate": 1.0377974276859853e-05, "loss": 0.5474, "step": 10694 }, { "epoch": 1.5101666195989834, "grad_norm": 4.122721246407766, "learning_rate": 1.0376450908857538e-05, "loss": 0.7326, "step": 10695 }, { "epoch": 1.5103078226489692, "grad_norm": 4.663151265065283, "learning_rate": 1.0374927532106667e-05, "loss": 0.5862, "step": 10696 }, { "epoch": 1.5104490256989551, "grad_norm": 2.929039467369949, "learning_rate": 1.0373404146642639e-05, "loss": 0.4794, "step": 10697 }, { "epoch": 1.510590228748941, "grad_norm": 3.4532104361936646, "learning_rate": 1.0371880752500862e-05, "loss": 0.5628, "step": 10698 }, { "epoch": 1.5107314317989269, "grad_norm": 2.9490265843775254, "learning_rate": 1.0370357349716738e-05, "loss": 0.5863, "step": 10699 }, { "epoch": 1.5108726348489128, "grad_norm": 4.242329889155228, "learning_rate": 1.0368833938325667e-05, "loss": 0.808, "step": 10700 }, { "epoch": 1.5110138378988986, "grad_norm": 3.3805513941509315, "learning_rate": 1.0367310518363051e-05, "loss": 0.4878, "step": 10701 }, { "epoch": 1.5111550409488845, "grad_norm": 3.186667493806593, "learning_rate": 1.0365787089864303e-05, "loss": 0.5358, "step": 10702 }, { "epoch": 1.5112962439988704, "grad_norm": 3.2722161197868527, "learning_rate": 1.036426365286482e-05, "loss": 0.5306, "step": 10703 }, { "epoch": 1.5114374470488563, "grad_norm": 3.7593243171977115, "learning_rate": 1.0362740207400006e-05, "loss": 0.6014, "step": 10704 }, { "epoch": 1.5115786500988422, "grad_norm": 2.8273461660395585, "learning_rate": 1.0361216753505267e-05, "loss": 0.4599, "step": 10705 }, { "epoch": 1.511719853148828, "grad_norm": 3.1387034016425415, "learning_rate": 1.0359693291216007e-05, "loss": 0.5433, "step": 10706 }, { "epoch": 1.511861056198814, "grad_norm": 2.914923835904362, "learning_rate": 1.035816982056763e-05, "loss": 0.4455, "step": 10707 }, { "epoch": 1.5120022592487998, "grad_norm": 3.3109363589013046, "learning_rate": 1.0356646341595539e-05, "loss": 0.5261, "step": 10708 }, { "epoch": 1.5121434622987855, "grad_norm": 3.0423422362868537, "learning_rate": 1.0355122854335144e-05, "loss": 0.5373, "step": 10709 }, { "epoch": 1.5122846653487714, "grad_norm": 3.6766215321641496, "learning_rate": 1.0353599358821845e-05, "loss": 0.6584, "step": 10710 }, { "epoch": 1.5124258683987573, "grad_norm": 3.216887677273866, "learning_rate": 1.0352075855091048e-05, "loss": 0.5338, "step": 10711 }, { "epoch": 1.5125670714487431, "grad_norm": 4.278486606249884, "learning_rate": 1.0350552343178164e-05, "loss": 0.5288, "step": 10712 }, { "epoch": 1.512708274498729, "grad_norm": 4.0464516178499865, "learning_rate": 1.0349028823118593e-05, "loss": 0.646, "step": 10713 }, { "epoch": 1.512849477548715, "grad_norm": 3.783110691465586, "learning_rate": 1.0347505294947744e-05, "loss": 0.6808, "step": 10714 }, { "epoch": 1.5129906805987008, "grad_norm": 3.3582895211502892, "learning_rate": 1.0345981758701023e-05, "loss": 0.4965, "step": 10715 }, { "epoch": 1.5131318836486867, "grad_norm": 3.470690174407434, "learning_rate": 1.0344458214413833e-05, "loss": 0.552, "step": 10716 }, { "epoch": 1.5132730866986726, "grad_norm": 4.1388034077192355, "learning_rate": 1.0342934662121584e-05, "loss": 0.7951, "step": 10717 }, { "epoch": 1.5134142897486584, "grad_norm": 3.5167443944072168, "learning_rate": 1.034141110185968e-05, "loss": 0.5158, "step": 10718 }, { "epoch": 1.5135554927986443, "grad_norm": 3.386596360497544, "learning_rate": 1.033988753366353e-05, "loss": 0.5771, "step": 10719 }, { "epoch": 1.5136966958486302, "grad_norm": 4.08674948354086, "learning_rate": 1.0338363957568544e-05, "loss": 0.6265, "step": 10720 }, { "epoch": 1.513837898898616, "grad_norm": 4.21603526027606, "learning_rate": 1.033684037361012e-05, "loss": 0.5889, "step": 10721 }, { "epoch": 1.513979101948602, "grad_norm": 2.7706948592911713, "learning_rate": 1.0335316781823675e-05, "loss": 0.4133, "step": 10722 }, { "epoch": 1.5141203049985879, "grad_norm": 4.343415082959932, "learning_rate": 1.0333793182244612e-05, "loss": 0.683, "step": 10723 }, { "epoch": 1.5142615080485737, "grad_norm": 3.720052817946793, "learning_rate": 1.033226957490834e-05, "loss": 0.6025, "step": 10724 }, { "epoch": 1.5144027110985596, "grad_norm": 3.146998267936162, "learning_rate": 1.0330745959850266e-05, "loss": 0.5246, "step": 10725 }, { "epoch": 1.5145439141485455, "grad_norm": 4.085388540576302, "learning_rate": 1.03292223371058e-05, "loss": 0.7456, "step": 10726 }, { "epoch": 1.5146851171985314, "grad_norm": 3.5864847519340537, "learning_rate": 1.0327698706710346e-05, "loss": 0.5825, "step": 10727 }, { "epoch": 1.5148263202485173, "grad_norm": 3.1974208137908753, "learning_rate": 1.0326175068699316e-05, "loss": 0.6628, "step": 10728 }, { "epoch": 1.5149675232985031, "grad_norm": 4.202713992135985, "learning_rate": 1.0324651423108123e-05, "loss": 0.7062, "step": 10729 }, { "epoch": 1.515108726348489, "grad_norm": 3.0383304276233245, "learning_rate": 1.0323127769972165e-05, "loss": 0.537, "step": 10730 }, { "epoch": 1.515249929398475, "grad_norm": 3.081759142630451, "learning_rate": 1.032160410932686e-05, "loss": 0.5517, "step": 10731 }, { "epoch": 1.5153911324484608, "grad_norm": 4.180940621073941, "learning_rate": 1.0320080441207616e-05, "loss": 0.661, "step": 10732 }, { "epoch": 1.5155323354984467, "grad_norm": 4.248146844752272, "learning_rate": 1.0318556765649838e-05, "loss": 0.639, "step": 10733 }, { "epoch": 1.5156735385484326, "grad_norm": 3.4639169158200938, "learning_rate": 1.031703308268894e-05, "loss": 0.5262, "step": 10734 }, { "epoch": 1.5158147415984184, "grad_norm": 3.4236455513861674, "learning_rate": 1.031550939236033e-05, "loss": 0.6572, "step": 10735 }, { "epoch": 1.5159559446484043, "grad_norm": 3.549236424332188, "learning_rate": 1.0313985694699415e-05, "loss": 0.6291, "step": 10736 }, { "epoch": 1.5160971476983902, "grad_norm": 3.5870318232796, "learning_rate": 1.0312461989741614e-05, "loss": 0.5646, "step": 10737 }, { "epoch": 1.516238350748376, "grad_norm": 4.202163561871317, "learning_rate": 1.0310938277522326e-05, "loss": 0.8006, "step": 10738 }, { "epoch": 1.516379553798362, "grad_norm": 3.4356758639756726, "learning_rate": 1.030941455807697e-05, "loss": 0.5235, "step": 10739 }, { "epoch": 1.5165207568483479, "grad_norm": 4.061376312047332, "learning_rate": 1.030789083144095e-05, "loss": 0.5215, "step": 10740 }, { "epoch": 1.5166619598983337, "grad_norm": 3.8486771212211486, "learning_rate": 1.0306367097649683e-05, "loss": 0.5685, "step": 10741 }, { "epoch": 1.5168031629483196, "grad_norm": 3.2496066980241127, "learning_rate": 1.0304843356738576e-05, "loss": 0.5051, "step": 10742 }, { "epoch": 1.5169443659983055, "grad_norm": 4.02896534899286, "learning_rate": 1.030331960874304e-05, "loss": 0.5971, "step": 10743 }, { "epoch": 1.5170855690482914, "grad_norm": 3.473025024389618, "learning_rate": 1.0301795853698487e-05, "loss": 0.5011, "step": 10744 }, { "epoch": 1.5172267720982773, "grad_norm": 3.75897097477085, "learning_rate": 1.0300272091640332e-05, "loss": 0.5927, "step": 10745 }, { "epoch": 1.5173679751482632, "grad_norm": 3.3111728572930907, "learning_rate": 1.0298748322603982e-05, "loss": 0.5156, "step": 10746 }, { "epoch": 1.517509178198249, "grad_norm": 4.55302032380926, "learning_rate": 1.0297224546624846e-05, "loss": 0.7441, "step": 10747 }, { "epoch": 1.517650381248235, "grad_norm": 3.03316958153463, "learning_rate": 1.0295700763738345e-05, "loss": 0.4633, "step": 10748 }, { "epoch": 1.5177915842982208, "grad_norm": 3.371939652476338, "learning_rate": 1.0294176973979884e-05, "loss": 0.5287, "step": 10749 }, { "epoch": 1.5179327873482067, "grad_norm": 3.3982940062672817, "learning_rate": 1.0292653177384878e-05, "loss": 0.5868, "step": 10750 }, { "epoch": 1.5180739903981926, "grad_norm": 3.389805755980886, "learning_rate": 1.0291129373988737e-05, "loss": 0.5738, "step": 10751 }, { "epoch": 1.5182151934481785, "grad_norm": 3.70266849757621, "learning_rate": 1.0289605563826876e-05, "loss": 0.5883, "step": 10752 }, { "epoch": 1.5183563964981643, "grad_norm": 3.5792515947089765, "learning_rate": 1.0288081746934705e-05, "loss": 0.5138, "step": 10753 }, { "epoch": 1.5184975995481502, "grad_norm": 3.7331261556830593, "learning_rate": 1.0286557923347642e-05, "loss": 0.5252, "step": 10754 }, { "epoch": 1.518638802598136, "grad_norm": 4.0708941975279, "learning_rate": 1.0285034093101093e-05, "loss": 0.5433, "step": 10755 }, { "epoch": 1.518780005648122, "grad_norm": 3.66547379127143, "learning_rate": 1.0283510256230478e-05, "loss": 0.5746, "step": 10756 }, { "epoch": 1.5189212086981079, "grad_norm": 3.0468055115602373, "learning_rate": 1.0281986412771206e-05, "loss": 0.5533, "step": 10757 }, { "epoch": 1.5190624117480938, "grad_norm": 4.123068740239987, "learning_rate": 1.028046256275869e-05, "loss": 0.7104, "step": 10758 }, { "epoch": 1.5192036147980796, "grad_norm": 4.064910694859377, "learning_rate": 1.0278938706228348e-05, "loss": 0.6793, "step": 10759 }, { "epoch": 1.5193448178480655, "grad_norm": 2.654264781471614, "learning_rate": 1.027741484321559e-05, "loss": 0.398, "step": 10760 }, { "epoch": 1.5194860208980514, "grad_norm": 3.565654790322971, "learning_rate": 1.0275890973755827e-05, "loss": 0.5405, "step": 10761 }, { "epoch": 1.5196272239480373, "grad_norm": 3.1586589193095245, "learning_rate": 1.0274367097884483e-05, "loss": 0.5212, "step": 10762 }, { "epoch": 1.5197684269980232, "grad_norm": 3.4330366667742185, "learning_rate": 1.0272843215636964e-05, "loss": 0.4573, "step": 10763 }, { "epoch": 1.519909630048009, "grad_norm": 3.286642599153669, "learning_rate": 1.0271319327048684e-05, "loss": 0.4986, "step": 10764 }, { "epoch": 1.520050833097995, "grad_norm": 3.3746420725995456, "learning_rate": 1.026979543215506e-05, "loss": 0.5442, "step": 10765 }, { "epoch": 1.5201920361479808, "grad_norm": 2.8701017966810083, "learning_rate": 1.0268271530991509e-05, "loss": 0.5146, "step": 10766 }, { "epoch": 1.5203332391979667, "grad_norm": 3.054861134058723, "learning_rate": 1.0266747623593445e-05, "loss": 0.5298, "step": 10767 }, { "epoch": 1.5204744422479526, "grad_norm": 3.7143729809583363, "learning_rate": 1.026522370999628e-05, "loss": 0.6361, "step": 10768 }, { "epoch": 1.5206156452979385, "grad_norm": 3.050900645590824, "learning_rate": 1.0263699790235428e-05, "loss": 0.4242, "step": 10769 }, { "epoch": 1.5207568483479244, "grad_norm": 3.941076467403238, "learning_rate": 1.0262175864346307e-05, "loss": 0.6051, "step": 10770 }, { "epoch": 1.5208980513979102, "grad_norm": 4.799702962143171, "learning_rate": 1.0260651932364336e-05, "loss": 0.7998, "step": 10771 }, { "epoch": 1.5210392544478961, "grad_norm": 4.7578413410473175, "learning_rate": 1.0259127994324923e-05, "loss": 0.9242, "step": 10772 }, { "epoch": 1.521180457497882, "grad_norm": 3.3859820689312192, "learning_rate": 1.025760405026349e-05, "loss": 0.5839, "step": 10773 }, { "epoch": 1.5213216605478679, "grad_norm": 3.092771110797661, "learning_rate": 1.0256080100215448e-05, "loss": 0.5667, "step": 10774 }, { "epoch": 1.5214628635978538, "grad_norm": 3.781753774865506, "learning_rate": 1.0254556144216217e-05, "loss": 0.5657, "step": 10775 }, { "epoch": 1.5216040666478396, "grad_norm": 3.5143576569437984, "learning_rate": 1.025303218230121e-05, "loss": 0.626, "step": 10776 }, { "epoch": 1.5217452696978255, "grad_norm": 3.7430402918285535, "learning_rate": 1.0251508214505846e-05, "loss": 0.5763, "step": 10777 }, { "epoch": 1.5218864727478114, "grad_norm": 3.860088208280659, "learning_rate": 1.0249984240865534e-05, "loss": 0.5832, "step": 10778 }, { "epoch": 1.5220276757977973, "grad_norm": 3.722200154794035, "learning_rate": 1.0248460261415702e-05, "loss": 0.5673, "step": 10779 }, { "epoch": 1.5221688788477832, "grad_norm": 3.025008856647848, "learning_rate": 1.024693627619176e-05, "loss": 0.5136, "step": 10780 }, { "epoch": 1.522310081897769, "grad_norm": 3.5513118389981124, "learning_rate": 1.0245412285229124e-05, "loss": 0.5398, "step": 10781 }, { "epoch": 1.522451284947755, "grad_norm": 3.5335742702715565, "learning_rate": 1.0243888288563213e-05, "loss": 0.6397, "step": 10782 }, { "epoch": 1.5225924879977408, "grad_norm": 3.7466056213979533, "learning_rate": 1.0242364286229445e-05, "loss": 0.6529, "step": 10783 }, { "epoch": 1.5227336910477267, "grad_norm": 3.529590249552236, "learning_rate": 1.0240840278263233e-05, "loss": 0.6124, "step": 10784 }, { "epoch": 1.5228748940977126, "grad_norm": 4.947686941480129, "learning_rate": 1.0239316264699999e-05, "loss": 0.6988, "step": 10785 }, { "epoch": 1.5230160971476985, "grad_norm": 3.410002209791899, "learning_rate": 1.0237792245575158e-05, "loss": 0.5477, "step": 10786 }, { "epoch": 1.5231573001976844, "grad_norm": 3.60029846246348, "learning_rate": 1.0236268220924126e-05, "loss": 0.6168, "step": 10787 }, { "epoch": 1.5232985032476702, "grad_norm": 3.1893475343815605, "learning_rate": 1.0234744190782326e-05, "loss": 0.5347, "step": 10788 }, { "epoch": 1.5234397062976561, "grad_norm": 3.338189965567737, "learning_rate": 1.023322015518517e-05, "loss": 0.6376, "step": 10789 }, { "epoch": 1.523580909347642, "grad_norm": 3.5114590987758914, "learning_rate": 1.0231696114168077e-05, "loss": 0.6213, "step": 10790 }, { "epoch": 1.523722112397628, "grad_norm": 3.2079116904648983, "learning_rate": 1.0230172067766469e-05, "loss": 0.5426, "step": 10791 }, { "epoch": 1.5238633154476138, "grad_norm": 2.614659926126286, "learning_rate": 1.022864801601576e-05, "loss": 0.4739, "step": 10792 }, { "epoch": 1.5240045184975997, "grad_norm": 3.0033341586244533, "learning_rate": 1.0227123958951372e-05, "loss": 0.5594, "step": 10793 }, { "epoch": 1.5241457215475855, "grad_norm": 3.735375929696478, "learning_rate": 1.022559989660872e-05, "loss": 0.5841, "step": 10794 }, { "epoch": 1.5242869245975714, "grad_norm": 3.670593706331668, "learning_rate": 1.0224075829023225e-05, "loss": 0.6564, "step": 10795 }, { "epoch": 1.5244281276475573, "grad_norm": 4.339592549836884, "learning_rate": 1.0222551756230304e-05, "loss": 0.6286, "step": 10796 }, { "epoch": 1.5245693306975432, "grad_norm": 3.7122236862730857, "learning_rate": 1.0221027678265374e-05, "loss": 0.5415, "step": 10797 }, { "epoch": 1.524710533747529, "grad_norm": 3.006824213688265, "learning_rate": 1.0219503595163857e-05, "loss": 0.5853, "step": 10798 }, { "epoch": 1.524851736797515, "grad_norm": 2.7327168360891796, "learning_rate": 1.0217979506961171e-05, "loss": 0.437, "step": 10799 }, { "epoch": 1.5249929398475008, "grad_norm": 3.410055426051843, "learning_rate": 1.0216455413692738e-05, "loss": 0.5988, "step": 10800 }, { "epoch": 1.5251341428974867, "grad_norm": 3.7942416400220322, "learning_rate": 1.0214931315393972e-05, "loss": 0.6408, "step": 10801 }, { "epoch": 1.5252753459474726, "grad_norm": 4.179932471649775, "learning_rate": 1.0213407212100296e-05, "loss": 0.6958, "step": 10802 }, { "epoch": 1.5254165489974585, "grad_norm": 3.6348708571818844, "learning_rate": 1.0211883103847132e-05, "loss": 0.6412, "step": 10803 }, { "epoch": 1.5255577520474444, "grad_norm": 4.302144685172392, "learning_rate": 1.0210358990669889e-05, "loss": 0.5559, "step": 10804 }, { "epoch": 1.5256989550974303, "grad_norm": 2.9180947112451485, "learning_rate": 1.0208834872604e-05, "loss": 0.5373, "step": 10805 }, { "epoch": 1.5258401581474161, "grad_norm": 4.376543422439997, "learning_rate": 1.0207310749684877e-05, "loss": 0.6667, "step": 10806 }, { "epoch": 1.525981361197402, "grad_norm": 3.279232572524097, "learning_rate": 1.020578662194794e-05, "loss": 0.5411, "step": 10807 }, { "epoch": 1.526122564247388, "grad_norm": 4.640963051513512, "learning_rate": 1.0204262489428611e-05, "loss": 0.7202, "step": 10808 }, { "epoch": 1.5262637672973738, "grad_norm": 3.7365046219987947, "learning_rate": 1.0202738352162312e-05, "loss": 0.6673, "step": 10809 }, { "epoch": 1.5264049703473597, "grad_norm": 3.529370069058791, "learning_rate": 1.020121421018446e-05, "loss": 0.5743, "step": 10810 }, { "epoch": 1.5265461733973453, "grad_norm": 3.4899936980636697, "learning_rate": 1.0199690063530476e-05, "loss": 0.6175, "step": 10811 }, { "epoch": 1.5266873764473312, "grad_norm": 3.0779141600897613, "learning_rate": 1.0198165912235784e-05, "loss": 0.4678, "step": 10812 }, { "epoch": 1.526828579497317, "grad_norm": 3.4354560135490133, "learning_rate": 1.0196641756335799e-05, "loss": 0.5682, "step": 10813 }, { "epoch": 1.526969782547303, "grad_norm": 4.260733881178502, "learning_rate": 1.0195117595865947e-05, "loss": 0.707, "step": 10814 }, { "epoch": 1.5271109855972889, "grad_norm": 2.877082196128858, "learning_rate": 1.0193593430861641e-05, "loss": 0.4357, "step": 10815 }, { "epoch": 1.5272521886472747, "grad_norm": 3.389076909288225, "learning_rate": 1.0192069261358313e-05, "loss": 0.5568, "step": 10816 }, { "epoch": 1.5273933916972606, "grad_norm": 3.4653763484809947, "learning_rate": 1.0190545087391374e-05, "loss": 0.6226, "step": 10817 }, { "epoch": 1.5275345947472465, "grad_norm": 4.420364930901948, "learning_rate": 1.0189020908996255e-05, "loss": 0.6211, "step": 10818 }, { "epoch": 1.5276757977972324, "grad_norm": 3.60835856013331, "learning_rate": 1.0187496726208367e-05, "loss": 0.5551, "step": 10819 }, { "epoch": 1.5278170008472183, "grad_norm": 3.8806064405786644, "learning_rate": 1.0185972539063139e-05, "loss": 0.6031, "step": 10820 }, { "epoch": 1.5279582038972042, "grad_norm": 3.772379972771627, "learning_rate": 1.0184448347595986e-05, "loss": 0.5701, "step": 10821 }, { "epoch": 1.52809940694719, "grad_norm": 3.3146146513027634, "learning_rate": 1.0182924151842337e-05, "loss": 0.4833, "step": 10822 }, { "epoch": 1.528240609997176, "grad_norm": 3.1263709259459835, "learning_rate": 1.0181399951837607e-05, "loss": 0.6202, "step": 10823 }, { "epoch": 1.5283818130471618, "grad_norm": 2.985224087053555, "learning_rate": 1.0179875747617221e-05, "loss": 0.5199, "step": 10824 }, { "epoch": 1.5285230160971477, "grad_norm": 3.56091592489957, "learning_rate": 1.01783515392166e-05, "loss": 0.5115, "step": 10825 }, { "epoch": 1.5286642191471336, "grad_norm": 3.628655433667966, "learning_rate": 1.0176827326671168e-05, "loss": 0.6372, "step": 10826 }, { "epoch": 1.5288054221971195, "grad_norm": 3.6780830269367666, "learning_rate": 1.0175303110016343e-05, "loss": 0.6047, "step": 10827 }, { "epoch": 1.5289466252471053, "grad_norm": 3.872800740182754, "learning_rate": 1.017377888928755e-05, "loss": 0.5073, "step": 10828 }, { "epoch": 1.5290878282970912, "grad_norm": 4.571592073931463, "learning_rate": 1.0172254664520212e-05, "loss": 0.7177, "step": 10829 }, { "epoch": 1.529229031347077, "grad_norm": 3.123048871079358, "learning_rate": 1.017073043574975e-05, "loss": 0.5308, "step": 10830 }, { "epoch": 1.529370234397063, "grad_norm": 3.4301184161878258, "learning_rate": 1.0169206203011585e-05, "loss": 0.5928, "step": 10831 }, { "epoch": 1.5295114374470489, "grad_norm": 3.3770221408773113, "learning_rate": 1.016768196634114e-05, "loss": 0.5661, "step": 10832 }, { "epoch": 1.5296526404970348, "grad_norm": 3.564144779157971, "learning_rate": 1.016615772577384e-05, "loss": 0.5623, "step": 10833 }, { "epoch": 1.5297938435470206, "grad_norm": 3.9949911416556816, "learning_rate": 1.0164633481345108e-05, "loss": 0.5961, "step": 10834 }, { "epoch": 1.5299350465970065, "grad_norm": 3.4903135639271707, "learning_rate": 1.0163109233090362e-05, "loss": 0.6087, "step": 10835 }, { "epoch": 1.5300762496469924, "grad_norm": 3.7903213727824485, "learning_rate": 1.0161584981045029e-05, "loss": 0.6402, "step": 10836 }, { "epoch": 1.5302174526969783, "grad_norm": 3.7117570409018743, "learning_rate": 1.0160060725244531e-05, "loss": 0.6447, "step": 10837 }, { "epoch": 1.5303586557469642, "grad_norm": 3.8528055911004713, "learning_rate": 1.0158536465724291e-05, "loss": 0.5457, "step": 10838 }, { "epoch": 1.53049985879695, "grad_norm": 3.838165064205097, "learning_rate": 1.0157012202519732e-05, "loss": 0.6316, "step": 10839 }, { "epoch": 1.530641061846936, "grad_norm": 3.570666249953665, "learning_rate": 1.0155487935666277e-05, "loss": 0.519, "step": 10840 }, { "epoch": 1.5307822648969218, "grad_norm": 3.1061534079062816, "learning_rate": 1.0153963665199346e-05, "loss": 0.5304, "step": 10841 }, { "epoch": 1.5309234679469077, "grad_norm": 4.150516022070862, "learning_rate": 1.0152439391154372e-05, "loss": 0.7266, "step": 10842 }, { "epoch": 1.5310646709968936, "grad_norm": 4.389242780359409, "learning_rate": 1.015091511356677e-05, "loss": 0.5468, "step": 10843 }, { "epoch": 1.5312058740468795, "grad_norm": 3.628326860197344, "learning_rate": 1.0149390832471965e-05, "loss": 0.6314, "step": 10844 }, { "epoch": 1.5313470770968651, "grad_norm": 3.3240980300636638, "learning_rate": 1.0147866547905383e-05, "loss": 0.6569, "step": 10845 }, { "epoch": 1.531488280146851, "grad_norm": 3.2715836993123477, "learning_rate": 1.0146342259902446e-05, "loss": 0.5062, "step": 10846 }, { "epoch": 1.531629483196837, "grad_norm": 4.925787943714355, "learning_rate": 1.0144817968498578e-05, "loss": 0.6173, "step": 10847 }, { "epoch": 1.5317706862468228, "grad_norm": 3.050127053992394, "learning_rate": 1.0143293673729202e-05, "loss": 0.51, "step": 10848 }, { "epoch": 1.5319118892968087, "grad_norm": 4.0111594728175355, "learning_rate": 1.0141769375629744e-05, "loss": 0.6394, "step": 10849 }, { "epoch": 1.5320530923467945, "grad_norm": 3.4653401401748085, "learning_rate": 1.0140245074235624e-05, "loss": 0.5273, "step": 10850 }, { "epoch": 1.5321942953967804, "grad_norm": 3.749479047890687, "learning_rate": 1.0138720769582275e-05, "loss": 0.5919, "step": 10851 }, { "epoch": 1.5323354984467663, "grad_norm": 3.758078417935565, "learning_rate": 1.013719646170511e-05, "loss": 0.6169, "step": 10852 }, { "epoch": 1.5324767014967522, "grad_norm": 3.3955507907120395, "learning_rate": 1.013567215063956e-05, "loss": 0.6259, "step": 10853 }, { "epoch": 1.532617904546738, "grad_norm": 3.6475645910660535, "learning_rate": 1.013414783642105e-05, "loss": 0.582, "step": 10854 }, { "epoch": 1.532759107596724, "grad_norm": 3.8963786497547175, "learning_rate": 1.0132623519084999e-05, "loss": 0.6456, "step": 10855 }, { "epoch": 1.5329003106467098, "grad_norm": 3.4837219679644447, "learning_rate": 1.0131099198666835e-05, "loss": 0.5232, "step": 10856 }, { "epoch": 1.5330415136966957, "grad_norm": 4.315342235930342, "learning_rate": 1.0129574875201984e-05, "loss": 0.783, "step": 10857 }, { "epoch": 1.5331827167466816, "grad_norm": 3.649671292498803, "learning_rate": 1.0128050548725865e-05, "loss": 0.6658, "step": 10858 }, { "epoch": 1.5333239197966675, "grad_norm": 3.7723236022748474, "learning_rate": 1.012652621927391e-05, "loss": 0.5657, "step": 10859 }, { "epoch": 1.5334651228466534, "grad_norm": 3.857105580962735, "learning_rate": 1.0125001886881543e-05, "loss": 0.5787, "step": 10860 }, { "epoch": 1.5336063258966393, "grad_norm": 3.469997853997336, "learning_rate": 1.0123477551584182e-05, "loss": 0.5886, "step": 10861 }, { "epoch": 1.5337475289466251, "grad_norm": 3.5571859371648604, "learning_rate": 1.0121953213417256e-05, "loss": 0.5346, "step": 10862 }, { "epoch": 1.533888731996611, "grad_norm": 3.587325552848159, "learning_rate": 1.0120428872416192e-05, "loss": 0.5733, "step": 10863 }, { "epoch": 1.534029935046597, "grad_norm": 4.385468868983309, "learning_rate": 1.0118904528616411e-05, "loss": 0.7094, "step": 10864 }, { "epoch": 1.5341711380965828, "grad_norm": 3.220405323224042, "learning_rate": 1.0117380182053343e-05, "loss": 0.4289, "step": 10865 }, { "epoch": 1.5343123411465687, "grad_norm": 4.074739176308353, "learning_rate": 1.0115855832762408e-05, "loss": 0.6375, "step": 10866 }, { "epoch": 1.5344535441965546, "grad_norm": 2.8811642147672734, "learning_rate": 1.0114331480779032e-05, "loss": 0.4714, "step": 10867 }, { "epoch": 1.5345947472465404, "grad_norm": 3.9884244352780023, "learning_rate": 1.0112807126138646e-05, "loss": 0.7534, "step": 10868 }, { "epoch": 1.5347359502965263, "grad_norm": 3.000433603460855, "learning_rate": 1.0111282768876668e-05, "loss": 0.5358, "step": 10869 }, { "epoch": 1.5348771533465122, "grad_norm": 3.3341796864822375, "learning_rate": 1.0109758409028527e-05, "loss": 0.4796, "step": 10870 }, { "epoch": 1.535018356396498, "grad_norm": 3.8213246943267243, "learning_rate": 1.0108234046629649e-05, "loss": 0.6433, "step": 10871 }, { "epoch": 1.535159559446484, "grad_norm": 4.041295045368597, "learning_rate": 1.0106709681715456e-05, "loss": 0.5925, "step": 10872 }, { "epoch": 1.5353007624964699, "grad_norm": 3.1904937607155457, "learning_rate": 1.0105185314321379e-05, "loss": 0.4692, "step": 10873 }, { "epoch": 1.5354419655464557, "grad_norm": 3.368556222712298, "learning_rate": 1.0103660944482841e-05, "loss": 0.5227, "step": 10874 }, { "epoch": 1.5355831685964416, "grad_norm": 3.682840019916227, "learning_rate": 1.0102136572235264e-05, "loss": 0.7302, "step": 10875 }, { "epoch": 1.5357243716464275, "grad_norm": 3.7408249587892626, "learning_rate": 1.0100612197614076e-05, "loss": 0.5307, "step": 10876 }, { "epoch": 1.5358655746964134, "grad_norm": 3.4428531654573837, "learning_rate": 1.0099087820654712e-05, "loss": 0.5151, "step": 10877 }, { "epoch": 1.5360067777463993, "grad_norm": 3.9639534357029484, "learning_rate": 1.0097563441392582e-05, "loss": 0.6823, "step": 10878 }, { "epoch": 1.5361479807963851, "grad_norm": 4.277259759764523, "learning_rate": 1.0096039059863123e-05, "loss": 0.7096, "step": 10879 }, { "epoch": 1.536289183846371, "grad_norm": 3.5369173275654973, "learning_rate": 1.0094514676101759e-05, "loss": 0.6019, "step": 10880 }, { "epoch": 1.536430386896357, "grad_norm": 3.127945643973049, "learning_rate": 1.0092990290143912e-05, "loss": 0.5274, "step": 10881 }, { "epoch": 1.5365715899463428, "grad_norm": 3.995972761588996, "learning_rate": 1.0091465902025012e-05, "loss": 0.6019, "step": 10882 }, { "epoch": 1.5367127929963287, "grad_norm": 2.772659005933049, "learning_rate": 1.0089941511780485e-05, "loss": 0.4603, "step": 10883 }, { "epoch": 1.5368539960463146, "grad_norm": 4.108210193490873, "learning_rate": 1.0088417119445752e-05, "loss": 0.6784, "step": 10884 }, { "epoch": 1.5369951990963004, "grad_norm": 3.7366079617792307, "learning_rate": 1.008689272505625e-05, "loss": 0.545, "step": 10885 }, { "epoch": 1.5371364021462863, "grad_norm": 3.4338635343940407, "learning_rate": 1.0085368328647395e-05, "loss": 0.632, "step": 10886 }, { "epoch": 1.5372776051962722, "grad_norm": 3.532957904935381, "learning_rate": 1.0083843930254616e-05, "loss": 0.6199, "step": 10887 }, { "epoch": 1.537418808246258, "grad_norm": 4.099484117642051, "learning_rate": 1.008231952991334e-05, "loss": 0.5915, "step": 10888 }, { "epoch": 1.537560011296244, "grad_norm": 4.195137685907803, "learning_rate": 1.0080795127658995e-05, "loss": 0.5924, "step": 10889 }, { "epoch": 1.5377012143462299, "grad_norm": 3.287018401571734, "learning_rate": 1.0079270723527005e-05, "loss": 0.4986, "step": 10890 }, { "epoch": 1.5378424173962157, "grad_norm": 3.867946739072173, "learning_rate": 1.0077746317552802e-05, "loss": 0.5672, "step": 10891 }, { "epoch": 1.5379836204462016, "grad_norm": 3.9683482267674024, "learning_rate": 1.0076221909771805e-05, "loss": 0.6451, "step": 10892 }, { "epoch": 1.5381248234961875, "grad_norm": 3.9692343494778886, "learning_rate": 1.007469750021944e-05, "loss": 0.6517, "step": 10893 }, { "epoch": 1.5382660265461734, "grad_norm": 3.6171686861649275, "learning_rate": 1.0073173088931143e-05, "loss": 0.5841, "step": 10894 }, { "epoch": 1.5384072295961593, "grad_norm": 2.8364643047584783, "learning_rate": 1.007164867594233e-05, "loss": 0.4696, "step": 10895 }, { "epoch": 1.5385484326461452, "grad_norm": 3.1662089758908203, "learning_rate": 1.0070124261288437e-05, "loss": 0.5704, "step": 10896 }, { "epoch": 1.538689635696131, "grad_norm": 3.504347518699753, "learning_rate": 1.0068599845004885e-05, "loss": 0.4925, "step": 10897 }, { "epoch": 1.538830838746117, "grad_norm": 3.444490853475091, "learning_rate": 1.0067075427127103e-05, "loss": 0.5464, "step": 10898 }, { "epoch": 1.5389720417961028, "grad_norm": 3.1880651206828645, "learning_rate": 1.0065551007690515e-05, "loss": 0.5048, "step": 10899 }, { "epoch": 1.5391132448460887, "grad_norm": 3.6171025722897694, "learning_rate": 1.0064026586730553e-05, "loss": 0.5443, "step": 10900 }, { "epoch": 1.5392544478960746, "grad_norm": 3.893147381024192, "learning_rate": 1.0062502164282638e-05, "loss": 0.632, "step": 10901 }, { "epoch": 1.5393956509460605, "grad_norm": 2.7743224846134216, "learning_rate": 1.00609777403822e-05, "loss": 0.4383, "step": 10902 }, { "epoch": 1.5395368539960463, "grad_norm": 2.8162866545508973, "learning_rate": 1.0059453315064666e-05, "loss": 0.5759, "step": 10903 }, { "epoch": 1.5396780570460322, "grad_norm": 3.7527013171827748, "learning_rate": 1.0057928888365462e-05, "loss": 0.5241, "step": 10904 }, { "epoch": 1.539819260096018, "grad_norm": 4.002753136683803, "learning_rate": 1.0056404460320018e-05, "loss": 0.6505, "step": 10905 }, { "epoch": 1.539960463146004, "grad_norm": 4.29698550002353, "learning_rate": 1.0054880030963756e-05, "loss": 0.7404, "step": 10906 }, { "epoch": 1.5401016661959899, "grad_norm": 4.650099967825517, "learning_rate": 1.0053355600332109e-05, "loss": 0.6484, "step": 10907 }, { "epoch": 1.5402428692459758, "grad_norm": 3.1023745987678275, "learning_rate": 1.0051831168460497e-05, "loss": 0.4934, "step": 10908 }, { "epoch": 1.5403840722959616, "grad_norm": 3.805794474564786, "learning_rate": 1.0050306735384354e-05, "loss": 0.6202, "step": 10909 }, { "epoch": 1.5405252753459475, "grad_norm": 4.0769701433264185, "learning_rate": 1.0048782301139102e-05, "loss": 0.6403, "step": 10910 }, { "epoch": 1.5406664783959334, "grad_norm": 3.661659374219437, "learning_rate": 1.0047257865760173e-05, "loss": 0.6022, "step": 10911 }, { "epoch": 1.5408076814459193, "grad_norm": 2.9733222828040256, "learning_rate": 1.0045733429282989e-05, "loss": 0.4623, "step": 10912 }, { "epoch": 1.5409488844959052, "grad_norm": 3.4721395546704117, "learning_rate": 1.0044208991742981e-05, "loss": 0.6567, "step": 10913 }, { "epoch": 1.541090087545891, "grad_norm": 3.449648298353695, "learning_rate": 1.0042684553175575e-05, "loss": 0.604, "step": 10914 }, { "epoch": 1.541231290595877, "grad_norm": 3.9459203639663087, "learning_rate": 1.0041160113616199e-05, "loss": 0.6785, "step": 10915 }, { "epoch": 1.5413724936458628, "grad_norm": 4.241477861162531, "learning_rate": 1.0039635673100282e-05, "loss": 0.5823, "step": 10916 }, { "epoch": 1.5415136966958487, "grad_norm": 3.5979469168791915, "learning_rate": 1.0038111231663243e-05, "loss": 0.5902, "step": 10917 }, { "epoch": 1.5416548997458346, "grad_norm": 3.3145853515199946, "learning_rate": 1.0036586789340518e-05, "loss": 0.5782, "step": 10918 }, { "epoch": 1.5417961027958205, "grad_norm": 4.052384017712191, "learning_rate": 1.0035062346167535e-05, "loss": 0.6741, "step": 10919 }, { "epoch": 1.5419373058458063, "grad_norm": 4.067502849031871, "learning_rate": 1.0033537902179716e-05, "loss": 0.6779, "step": 10920 }, { "epoch": 1.5420785088957922, "grad_norm": 3.615298237457858, "learning_rate": 1.0032013457412488e-05, "loss": 0.6203, "step": 10921 }, { "epoch": 1.5422197119457781, "grad_norm": 3.36791644879669, "learning_rate": 1.0030489011901285e-05, "loss": 0.6217, "step": 10922 }, { "epoch": 1.542360914995764, "grad_norm": 3.9547204754781426, "learning_rate": 1.0028964565681531e-05, "loss": 0.7283, "step": 10923 }, { "epoch": 1.5425021180457499, "grad_norm": 3.6423541218236712, "learning_rate": 1.0027440118788649e-05, "loss": 0.5856, "step": 10924 }, { "epoch": 1.5426433210957358, "grad_norm": 3.591299023026102, "learning_rate": 1.0025915671258074e-05, "loss": 0.5306, "step": 10925 }, { "epoch": 1.5427845241457216, "grad_norm": 3.7569841752755098, "learning_rate": 1.0024391223125226e-05, "loss": 0.6327, "step": 10926 }, { "epoch": 1.5429257271957075, "grad_norm": 3.2999671243887367, "learning_rate": 1.002286677442554e-05, "loss": 0.5714, "step": 10927 }, { "epoch": 1.5430669302456934, "grad_norm": 3.5940693999363362, "learning_rate": 1.0021342325194441e-05, "loss": 0.5157, "step": 10928 }, { "epoch": 1.5432081332956793, "grad_norm": 3.280275048012509, "learning_rate": 1.0019817875467352e-05, "loss": 0.5629, "step": 10929 }, { "epoch": 1.5433493363456652, "grad_norm": 3.281031566404936, "learning_rate": 1.0018293425279706e-05, "loss": 0.4773, "step": 10930 }, { "epoch": 1.543490539395651, "grad_norm": 3.0563608862167, "learning_rate": 1.0016768974666929e-05, "loss": 0.5303, "step": 10931 }, { "epoch": 1.543631742445637, "grad_norm": 3.263004615260855, "learning_rate": 1.0015244523664447e-05, "loss": 0.5046, "step": 10932 }, { "epoch": 1.5437729454956228, "grad_norm": 3.3023444517668517, "learning_rate": 1.001372007230769e-05, "loss": 0.5476, "step": 10933 }, { "epoch": 1.5439141485456087, "grad_norm": 3.531429004575241, "learning_rate": 1.0012195620632084e-05, "loss": 0.4837, "step": 10934 }, { "epoch": 1.5440553515955946, "grad_norm": 4.128862736031032, "learning_rate": 1.0010671168673058e-05, "loss": 0.4588, "step": 10935 }, { "epoch": 1.5441965546455805, "grad_norm": 3.4545110107729484, "learning_rate": 1.0009146716466038e-05, "loss": 0.4562, "step": 10936 }, { "epoch": 1.5443377576955664, "grad_norm": 2.9724091653720506, "learning_rate": 1.0007622264046452e-05, "loss": 0.4546, "step": 10937 }, { "epoch": 1.5444789607455522, "grad_norm": 3.3485666957552604, "learning_rate": 1.0006097811449726e-05, "loss": 0.516, "step": 10938 }, { "epoch": 1.5446201637955381, "grad_norm": 4.883901709674883, "learning_rate": 1.0004573358711291e-05, "loss": 0.8398, "step": 10939 }, { "epoch": 1.544761366845524, "grad_norm": 3.6774825047371698, "learning_rate": 1.0003048905866577e-05, "loss": 0.651, "step": 10940 }, { "epoch": 1.54490256989551, "grad_norm": 3.6521124080385516, "learning_rate": 1.0001524452951002e-05, "loss": 0.5474, "step": 10941 }, { "epoch": 1.5450437729454958, "grad_norm": 3.850576827009961, "learning_rate": 1e-05, "loss": 0.4882, "step": 10942 }, { "epoch": 1.5451849759954817, "grad_norm": 3.9981016267525864, "learning_rate": 9.998475547049001e-06, "loss": 0.6308, "step": 10943 }, { "epoch": 1.5453261790454675, "grad_norm": 4.956245261273162, "learning_rate": 9.996951094133426e-06, "loss": 0.5662, "step": 10944 }, { "epoch": 1.5454673820954534, "grad_norm": 4.783976378252596, "learning_rate": 9.995426641288709e-06, "loss": 0.8168, "step": 10945 }, { "epoch": 1.5456085851454393, "grad_norm": 3.615977929741095, "learning_rate": 9.993902188550276e-06, "loss": 0.619, "step": 10946 }, { "epoch": 1.545749788195425, "grad_norm": 3.9095515198498942, "learning_rate": 9.992377735953553e-06, "loss": 0.5923, "step": 10947 }, { "epoch": 1.5458909912454109, "grad_norm": 3.1330331228393087, "learning_rate": 9.990853283533968e-06, "loss": 0.5671, "step": 10948 }, { "epoch": 1.5460321942953967, "grad_norm": 3.3054078991648543, "learning_rate": 9.989328831326945e-06, "loss": 0.5971, "step": 10949 }, { "epoch": 1.5461733973453826, "grad_norm": 3.390727016763065, "learning_rate": 9.987804379367919e-06, "loss": 0.5824, "step": 10950 }, { "epoch": 1.5463146003953685, "grad_norm": 4.229698253503014, "learning_rate": 9.986279927692312e-06, "loss": 0.6429, "step": 10951 }, { "epoch": 1.5464558034453544, "grad_norm": 3.8337285158981875, "learning_rate": 9.984755476335556e-06, "loss": 0.6007, "step": 10952 }, { "epoch": 1.5465970064953403, "grad_norm": 3.890871821218218, "learning_rate": 9.983231025333073e-06, "loss": 0.5898, "step": 10953 }, { "epoch": 1.5467382095453261, "grad_norm": 3.819259567439549, "learning_rate": 9.981706574720296e-06, "loss": 0.6331, "step": 10954 }, { "epoch": 1.546879412595312, "grad_norm": 3.5820446940855803, "learning_rate": 9.980182124532651e-06, "loss": 0.6529, "step": 10955 }, { "epoch": 1.547020615645298, "grad_norm": 4.260721031234754, "learning_rate": 9.978657674805564e-06, "loss": 0.5508, "step": 10956 }, { "epoch": 1.5471618186952838, "grad_norm": 3.223921431803935, "learning_rate": 9.977133225574464e-06, "loss": 0.429, "step": 10957 }, { "epoch": 1.5473030217452697, "grad_norm": 3.9918675782315147, "learning_rate": 9.975608776874775e-06, "loss": 0.4975, "step": 10958 }, { "epoch": 1.5474442247952556, "grad_norm": 3.9052264789490567, "learning_rate": 9.97408432874193e-06, "loss": 0.5224, "step": 10959 }, { "epoch": 1.5475854278452414, "grad_norm": 3.0299842304804283, "learning_rate": 9.972559881211353e-06, "loss": 0.5199, "step": 10960 }, { "epoch": 1.5477266308952273, "grad_norm": 4.543207033493665, "learning_rate": 9.971035434318472e-06, "loss": 0.6872, "step": 10961 }, { "epoch": 1.5478678339452132, "grad_norm": 3.5331514811431513, "learning_rate": 9.969510988098716e-06, "loss": 0.638, "step": 10962 }, { "epoch": 1.548009036995199, "grad_norm": 3.551643965214065, "learning_rate": 9.967986542587512e-06, "loss": 0.5838, "step": 10963 }, { "epoch": 1.548150240045185, "grad_norm": 3.660109173354012, "learning_rate": 9.966462097820289e-06, "loss": 0.6389, "step": 10964 }, { "epoch": 1.5482914430951709, "grad_norm": 4.720458932388365, "learning_rate": 9.96493765383247e-06, "loss": 0.7642, "step": 10965 }, { "epoch": 1.5484326461451567, "grad_norm": 3.9246607595384146, "learning_rate": 9.963413210659485e-06, "loss": 0.5275, "step": 10966 }, { "epoch": 1.5485738491951426, "grad_norm": 3.7851812279665027, "learning_rate": 9.961888768336758e-06, "loss": 0.6899, "step": 10967 }, { "epoch": 1.5487150522451285, "grad_norm": 3.998468331831382, "learning_rate": 9.960364326899723e-06, "loss": 0.81, "step": 10968 }, { "epoch": 1.5488562552951144, "grad_norm": 3.063442862938305, "learning_rate": 9.958839886383803e-06, "loss": 0.5054, "step": 10969 }, { "epoch": 1.5489974583451003, "grad_norm": 3.3488784109610217, "learning_rate": 9.957315446824425e-06, "loss": 0.5648, "step": 10970 }, { "epoch": 1.5491386613950862, "grad_norm": 3.9199635333857534, "learning_rate": 9.95579100825702e-06, "loss": 0.511, "step": 10971 }, { "epoch": 1.549279864445072, "grad_norm": 3.0739812756048384, "learning_rate": 9.954266570717014e-06, "loss": 0.5009, "step": 10972 }, { "epoch": 1.549421067495058, "grad_norm": 3.669459655383874, "learning_rate": 9.952742134239832e-06, "loss": 0.5425, "step": 10973 }, { "epoch": 1.5495622705450438, "grad_norm": 3.2824250677714235, "learning_rate": 9.951217698860902e-06, "loss": 0.5803, "step": 10974 }, { "epoch": 1.5497034735950297, "grad_norm": 3.5848429712375625, "learning_rate": 9.949693264615649e-06, "loss": 0.5386, "step": 10975 }, { "epoch": 1.5498446766450156, "grad_norm": 3.6956291615012256, "learning_rate": 9.948168831539505e-06, "loss": 0.6402, "step": 10976 }, { "epoch": 1.5499858796950015, "grad_norm": 3.2395045614414943, "learning_rate": 9.946644399667894e-06, "loss": 0.464, "step": 10977 }, { "epoch": 1.5501270827449873, "grad_norm": 3.530201912993151, "learning_rate": 9.945119969036245e-06, "loss": 0.5783, "step": 10978 }, { "epoch": 1.5502682857949732, "grad_norm": 3.0408780303815632, "learning_rate": 9.943595539679984e-06, "loss": 0.5223, "step": 10979 }, { "epoch": 1.550409488844959, "grad_norm": 3.308609321562329, "learning_rate": 9.942071111634538e-06, "loss": 0.4528, "step": 10980 }, { "epoch": 1.5505506918949448, "grad_norm": 3.2772635392157863, "learning_rate": 9.940546684935337e-06, "loss": 0.5212, "step": 10981 }, { "epoch": 1.5506918949449306, "grad_norm": 4.672306016172381, "learning_rate": 9.939022259617805e-06, "loss": 0.7533, "step": 10982 }, { "epoch": 1.5508330979949165, "grad_norm": 3.6361901857489656, "learning_rate": 9.937497835717367e-06, "loss": 0.586, "step": 10983 }, { "epoch": 1.5509743010449024, "grad_norm": 3.648131963915785, "learning_rate": 9.93597341326945e-06, "loss": 0.5248, "step": 10984 }, { "epoch": 1.5511155040948883, "grad_norm": 3.258287010760036, "learning_rate": 9.934448992309486e-06, "loss": 0.5992, "step": 10985 }, { "epoch": 1.5512567071448742, "grad_norm": 3.6078168521211778, "learning_rate": 9.9329245728729e-06, "loss": 0.5795, "step": 10986 }, { "epoch": 1.55139791019486, "grad_norm": 3.9607538668599616, "learning_rate": 9.931400154995116e-06, "loss": 0.6294, "step": 10987 }, { "epoch": 1.551539113244846, "grad_norm": 4.090456691991101, "learning_rate": 9.929875738711565e-06, "loss": 0.5916, "step": 10988 }, { "epoch": 1.5516803162948318, "grad_norm": 4.110139101600692, "learning_rate": 9.928351324057672e-06, "loss": 0.5982, "step": 10989 }, { "epoch": 1.5518215193448177, "grad_norm": 3.206756963829273, "learning_rate": 9.926826911068862e-06, "loss": 0.4756, "step": 10990 }, { "epoch": 1.5519627223948036, "grad_norm": 2.8032570694957144, "learning_rate": 9.925302499780564e-06, "loss": 0.4413, "step": 10991 }, { "epoch": 1.5521039254447895, "grad_norm": 4.168918041833502, "learning_rate": 9.9237780902282e-06, "loss": 0.6589, "step": 10992 }, { "epoch": 1.5522451284947754, "grad_norm": 3.636840634575385, "learning_rate": 9.922253682447203e-06, "loss": 0.5696, "step": 10993 }, { "epoch": 1.5523863315447612, "grad_norm": 3.2346667093673176, "learning_rate": 9.920729276472996e-06, "loss": 0.5692, "step": 10994 }, { "epoch": 1.5525275345947471, "grad_norm": 3.5787025885546284, "learning_rate": 9.919204872341007e-06, "loss": 0.6254, "step": 10995 }, { "epoch": 1.552668737644733, "grad_norm": 3.4985745493997555, "learning_rate": 9.91768047008666e-06, "loss": 0.5508, "step": 10996 }, { "epoch": 1.552809940694719, "grad_norm": 4.029881776238102, "learning_rate": 9.916156069745385e-06, "loss": 0.601, "step": 10997 }, { "epoch": 1.5529511437447048, "grad_norm": 3.3420150765935457, "learning_rate": 9.91463167135261e-06, "loss": 0.5619, "step": 10998 }, { "epoch": 1.5530923467946907, "grad_norm": 2.9321403450381474, "learning_rate": 9.913107274943755e-06, "loss": 0.4683, "step": 10999 }, { "epoch": 1.5532335498446765, "grad_norm": 3.7731773904722172, "learning_rate": 9.91158288055425e-06, "loss": 0.6206, "step": 11000 }, { "epoch": 1.5533747528946624, "grad_norm": 3.235745532869052, "learning_rate": 9.910058488219518e-06, "loss": 0.5215, "step": 11001 }, { "epoch": 1.5535159559446483, "grad_norm": 3.8544401915386244, "learning_rate": 9.90853409797499e-06, "loss": 0.5897, "step": 11002 }, { "epoch": 1.5536571589946342, "grad_norm": 3.347653121218531, "learning_rate": 9.90700970985609e-06, "loss": 0.5605, "step": 11003 }, { "epoch": 1.55379836204462, "grad_norm": 3.632005605306555, "learning_rate": 9.905485323898243e-06, "loss": 0.5682, "step": 11004 }, { "epoch": 1.553939565094606, "grad_norm": 3.383346874243086, "learning_rate": 9.903960940136877e-06, "loss": 0.5214, "step": 11005 }, { "epoch": 1.5540807681445918, "grad_norm": 4.325692870211108, "learning_rate": 9.90243655860742e-06, "loss": 0.631, "step": 11006 }, { "epoch": 1.5542219711945777, "grad_norm": 4.141956621011428, "learning_rate": 9.900912179345293e-06, "loss": 0.5596, "step": 11007 }, { "epoch": 1.5543631742445636, "grad_norm": 4.233159459021565, "learning_rate": 9.899387802385925e-06, "loss": 0.7085, "step": 11008 }, { "epoch": 1.5545043772945495, "grad_norm": 3.038362143285985, "learning_rate": 9.897863427764738e-06, "loss": 0.4693, "step": 11009 }, { "epoch": 1.5546455803445354, "grad_norm": 3.1925020125216217, "learning_rate": 9.896339055517164e-06, "loss": 0.5525, "step": 11010 }, { "epoch": 1.5547867833945213, "grad_norm": 3.4980041751199997, "learning_rate": 9.894814685678625e-06, "loss": 0.5529, "step": 11011 }, { "epoch": 1.5549279864445071, "grad_norm": 3.5712936072907557, "learning_rate": 9.893290318284546e-06, "loss": 0.5523, "step": 11012 }, { "epoch": 1.555069189494493, "grad_norm": 3.3102799881955214, "learning_rate": 9.891765953370353e-06, "loss": 0.4521, "step": 11013 }, { "epoch": 1.555210392544479, "grad_norm": 3.042106438110427, "learning_rate": 9.890241590971475e-06, "loss": 0.5406, "step": 11014 }, { "epoch": 1.5553515955944648, "grad_norm": 2.982570164844036, "learning_rate": 9.888717231123337e-06, "loss": 0.448, "step": 11015 }, { "epoch": 1.5554927986444507, "grad_norm": 3.377970794285306, "learning_rate": 9.88719287386136e-06, "loss": 0.561, "step": 11016 }, { "epoch": 1.5556340016944366, "grad_norm": 3.782535975722361, "learning_rate": 9.885668519220971e-06, "loss": 0.5911, "step": 11017 }, { "epoch": 1.5557752047444224, "grad_norm": 3.7704138366619504, "learning_rate": 9.884144167237595e-06, "loss": 0.6215, "step": 11018 }, { "epoch": 1.5559164077944083, "grad_norm": 4.06741716582298, "learning_rate": 9.88261981794666e-06, "loss": 0.6318, "step": 11019 }, { "epoch": 1.5560576108443942, "grad_norm": 4.094535282158565, "learning_rate": 9.88109547138359e-06, "loss": 0.5665, "step": 11020 }, { "epoch": 1.55619881389438, "grad_norm": 3.721235366169721, "learning_rate": 9.87957112758381e-06, "loss": 0.5356, "step": 11021 }, { "epoch": 1.556340016944366, "grad_norm": 3.2925217123105597, "learning_rate": 9.878046786582745e-06, "loss": 0.5113, "step": 11022 }, { "epoch": 1.5564812199943519, "grad_norm": 3.566011543151594, "learning_rate": 9.876522448415822e-06, "loss": 0.5336, "step": 11023 }, { "epoch": 1.5566224230443377, "grad_norm": 3.776967192416559, "learning_rate": 9.874998113118462e-06, "loss": 0.5697, "step": 11024 }, { "epoch": 1.5567636260943236, "grad_norm": 3.161591556825347, "learning_rate": 9.87347378072609e-06, "loss": 0.5177, "step": 11025 }, { "epoch": 1.5569048291443095, "grad_norm": 4.156065235815623, "learning_rate": 9.871949451274137e-06, "loss": 0.6063, "step": 11026 }, { "epoch": 1.5570460321942954, "grad_norm": 4.047820198423382, "learning_rate": 9.870425124798019e-06, "loss": 0.6944, "step": 11027 }, { "epoch": 1.5571872352442813, "grad_norm": 4.241571837314496, "learning_rate": 9.868900801333168e-06, "loss": 0.9151, "step": 11028 }, { "epoch": 1.5573284382942671, "grad_norm": 2.6854301111055756, "learning_rate": 9.867376480915005e-06, "loss": 0.4321, "step": 11029 }, { "epoch": 1.557469641344253, "grad_norm": 3.962780893883301, "learning_rate": 9.865852163578952e-06, "loss": 0.7327, "step": 11030 }, { "epoch": 1.557610844394239, "grad_norm": 3.2541450940530257, "learning_rate": 9.864327849360445e-06, "loss": 0.5365, "step": 11031 }, { "epoch": 1.5577520474442248, "grad_norm": 3.462869494157753, "learning_rate": 9.862803538294894e-06, "loss": 0.5682, "step": 11032 }, { "epoch": 1.5578932504942107, "grad_norm": 3.109962561887938, "learning_rate": 9.861279230417731e-06, "loss": 0.6087, "step": 11033 }, { "epoch": 1.5580344535441966, "grad_norm": 3.110390532331657, "learning_rate": 9.85975492576438e-06, "loss": 0.5075, "step": 11034 }, { "epoch": 1.5581756565941824, "grad_norm": 3.1082667953036367, "learning_rate": 9.85823062437026e-06, "loss": 0.5156, "step": 11035 }, { "epoch": 1.5583168596441683, "grad_norm": 3.124045467650272, "learning_rate": 9.856706326270801e-06, "loss": 0.4758, "step": 11036 }, { "epoch": 1.5584580626941542, "grad_norm": 3.1760384596278586, "learning_rate": 9.855182031501426e-06, "loss": 0.5358, "step": 11037 }, { "epoch": 1.55859926574414, "grad_norm": 3.643211183138645, "learning_rate": 9.853657740097558e-06, "loss": 0.6096, "step": 11038 }, { "epoch": 1.558740468794126, "grad_norm": 3.1600737490830504, "learning_rate": 9.852133452094617e-06, "loss": 0.5619, "step": 11039 }, { "epoch": 1.5588816718441119, "grad_norm": 4.742996082884778, "learning_rate": 9.850609167528038e-06, "loss": 0.5731, "step": 11040 }, { "epoch": 1.5590228748940977, "grad_norm": 4.2004756402495484, "learning_rate": 9.849084886433234e-06, "loss": 0.6578, "step": 11041 }, { "epoch": 1.5591640779440836, "grad_norm": 3.222498054947762, "learning_rate": 9.847560608845632e-06, "loss": 0.5417, "step": 11042 }, { "epoch": 1.5593052809940695, "grad_norm": 3.832533987296883, "learning_rate": 9.846036334800655e-06, "loss": 0.7, "step": 11043 }, { "epoch": 1.5594464840440554, "grad_norm": 3.566351945791429, "learning_rate": 9.844512064333726e-06, "loss": 0.5696, "step": 11044 }, { "epoch": 1.5595876870940413, "grad_norm": 3.5406809049398236, "learning_rate": 9.842987797480271e-06, "loss": 0.5348, "step": 11045 }, { "epoch": 1.5597288901440272, "grad_norm": 3.4329647204462046, "learning_rate": 9.841463534275712e-06, "loss": 0.5087, "step": 11046 }, { "epoch": 1.559870093194013, "grad_norm": 3.3824969857209575, "learning_rate": 9.83993927475547e-06, "loss": 0.5507, "step": 11047 }, { "epoch": 1.560011296243999, "grad_norm": 3.352357444763647, "learning_rate": 9.838415018954976e-06, "loss": 0.5445, "step": 11048 }, { "epoch": 1.5601524992939848, "grad_norm": 4.207332250125473, "learning_rate": 9.836890766909641e-06, "loss": 0.7608, "step": 11049 }, { "epoch": 1.5602937023439707, "grad_norm": 3.593039251468266, "learning_rate": 9.835366518654897e-06, "loss": 0.5754, "step": 11050 }, { "epoch": 1.5604349053939566, "grad_norm": 4.3556913021754795, "learning_rate": 9.833842274226163e-06, "loss": 0.6453, "step": 11051 }, { "epoch": 1.5605761084439425, "grad_norm": 3.4044251991903027, "learning_rate": 9.832318033658862e-06, "loss": 0.6171, "step": 11052 }, { "epoch": 1.5607173114939283, "grad_norm": 4.163850498836935, "learning_rate": 9.830793796988418e-06, "loss": 0.6102, "step": 11053 }, { "epoch": 1.5608585145439142, "grad_norm": 3.5490780090453344, "learning_rate": 9.829269564250254e-06, "loss": 0.5892, "step": 11054 }, { "epoch": 1.5609997175939, "grad_norm": 3.3306301601829427, "learning_rate": 9.827745335479791e-06, "loss": 0.5288, "step": 11055 }, { "epoch": 1.561140920643886, "grad_norm": 3.1771786079337283, "learning_rate": 9.826221110712451e-06, "loss": 0.4091, "step": 11056 }, { "epoch": 1.5612821236938719, "grad_norm": 4.080740606508169, "learning_rate": 9.824696889983662e-06, "loss": 0.635, "step": 11057 }, { "epoch": 1.5614233267438578, "grad_norm": 4.050565144137989, "learning_rate": 9.823172673328837e-06, "loss": 0.6903, "step": 11058 }, { "epoch": 1.5615645297938436, "grad_norm": 3.388371557857401, "learning_rate": 9.821648460783403e-06, "loss": 0.453, "step": 11059 }, { "epoch": 1.5617057328438295, "grad_norm": 3.3153858697018648, "learning_rate": 9.820124252382784e-06, "loss": 0.4818, "step": 11060 }, { "epoch": 1.5618469358938154, "grad_norm": 3.2259213453138877, "learning_rate": 9.818600048162396e-06, "loss": 0.4808, "step": 11061 }, { "epoch": 1.5619881389438013, "grad_norm": 4.604173032987742, "learning_rate": 9.817075848157666e-06, "loss": 0.8981, "step": 11062 }, { "epoch": 1.5621293419937872, "grad_norm": 3.7249339310392604, "learning_rate": 9.815551652404016e-06, "loss": 0.731, "step": 11063 }, { "epoch": 1.562270545043773, "grad_norm": 4.082670268471065, "learning_rate": 9.814027460936863e-06, "loss": 0.6932, "step": 11064 }, { "epoch": 1.562411748093759, "grad_norm": 2.7667242503987195, "learning_rate": 9.812503273791638e-06, "loss": 0.4621, "step": 11065 }, { "epoch": 1.5625529511437448, "grad_norm": 4.682839506723859, "learning_rate": 9.81097909100375e-06, "loss": 0.741, "step": 11066 }, { "epoch": 1.5626941541937307, "grad_norm": 3.928630660299773, "learning_rate": 9.809454912608628e-06, "loss": 0.7171, "step": 11067 }, { "epoch": 1.5628353572437166, "grad_norm": 2.9733643039082853, "learning_rate": 9.807930738641692e-06, "loss": 0.4513, "step": 11068 }, { "epoch": 1.5629765602937025, "grad_norm": 3.304227200806797, "learning_rate": 9.806406569138362e-06, "loss": 0.5797, "step": 11069 }, { "epoch": 1.5631177633436883, "grad_norm": 3.319060721062751, "learning_rate": 9.804882404134057e-06, "loss": 0.6071, "step": 11070 }, { "epoch": 1.5632589663936742, "grad_norm": 3.5603658602722246, "learning_rate": 9.803358243664203e-06, "loss": 0.6179, "step": 11071 }, { "epoch": 1.5634001694436601, "grad_norm": 3.108087480974047, "learning_rate": 9.801834087764219e-06, "loss": 0.4617, "step": 11072 }, { "epoch": 1.563541372493646, "grad_norm": 3.4097711734676683, "learning_rate": 9.800309936469523e-06, "loss": 0.5444, "step": 11073 }, { "epoch": 1.5636825755436319, "grad_norm": 4.846372969974859, "learning_rate": 9.798785789815545e-06, "loss": 0.62, "step": 11074 }, { "epoch": 1.5638237785936178, "grad_norm": 2.989650454762499, "learning_rate": 9.797261647837692e-06, "loss": 0.5, "step": 11075 }, { "epoch": 1.5639649816436036, "grad_norm": 3.918781523490743, "learning_rate": 9.79573751057139e-06, "loss": 0.6029, "step": 11076 }, { "epoch": 1.5641061846935895, "grad_norm": 3.8081648351780832, "learning_rate": 9.794213378052064e-06, "loss": 0.6746, "step": 11077 }, { "epoch": 1.5642473877435754, "grad_norm": 3.3897958126792673, "learning_rate": 9.792689250315126e-06, "loss": 0.5509, "step": 11078 }, { "epoch": 1.5643885907935613, "grad_norm": 3.05927618414925, "learning_rate": 9.791165127396003e-06, "loss": 0.5128, "step": 11079 }, { "epoch": 1.5645297938435472, "grad_norm": 3.1117284038679647, "learning_rate": 9.789641009330113e-06, "loss": 0.4977, "step": 11080 }, { "epoch": 1.564670996893533, "grad_norm": 3.167531385226866, "learning_rate": 9.788116896152873e-06, "loss": 0.5963, "step": 11081 }, { "epoch": 1.564812199943519, "grad_norm": 3.502735151067711, "learning_rate": 9.786592787899707e-06, "loss": 0.5845, "step": 11082 }, { "epoch": 1.5649534029935046, "grad_norm": 2.8889574180827404, "learning_rate": 9.78506868460603e-06, "loss": 0.4726, "step": 11083 }, { "epoch": 1.5650946060434905, "grad_norm": 3.2941953116583957, "learning_rate": 9.783544586307266e-06, "loss": 0.5367, "step": 11084 }, { "epoch": 1.5652358090934764, "grad_norm": 2.9708544972067394, "learning_rate": 9.78202049303883e-06, "loss": 0.4422, "step": 11085 }, { "epoch": 1.5653770121434623, "grad_norm": 3.052020590486898, "learning_rate": 9.780496404836146e-06, "loss": 0.4874, "step": 11086 }, { "epoch": 1.5655182151934481, "grad_norm": 3.280149907349971, "learning_rate": 9.778972321734627e-06, "loss": 0.5076, "step": 11087 }, { "epoch": 1.565659418243434, "grad_norm": 3.868921145375212, "learning_rate": 9.7774482437697e-06, "loss": 0.6917, "step": 11088 }, { "epoch": 1.56580062129342, "grad_norm": 3.358341175569998, "learning_rate": 9.775924170976778e-06, "loss": 0.5377, "step": 11089 }, { "epoch": 1.5659418243434058, "grad_norm": 3.75340899765257, "learning_rate": 9.77440010339128e-06, "loss": 0.5067, "step": 11090 }, { "epoch": 1.5660830273933917, "grad_norm": 4.191111917903983, "learning_rate": 9.772876041048633e-06, "loss": 0.5928, "step": 11091 }, { "epoch": 1.5662242304433776, "grad_norm": 3.914241081686757, "learning_rate": 9.771351983984241e-06, "loss": 0.6284, "step": 11092 }, { "epoch": 1.5663654334933634, "grad_norm": 3.647270443355041, "learning_rate": 9.769827932233533e-06, "loss": 0.5623, "step": 11093 }, { "epoch": 1.5665066365433493, "grad_norm": 2.874033044616517, "learning_rate": 9.768303885831924e-06, "loss": 0.4243, "step": 11094 }, { "epoch": 1.5666478395933352, "grad_norm": 4.0481746159929, "learning_rate": 9.766779844814833e-06, "loss": 0.5955, "step": 11095 }, { "epoch": 1.566789042643321, "grad_norm": 3.725418615696769, "learning_rate": 9.765255809217676e-06, "loss": 0.6349, "step": 11096 }, { "epoch": 1.566930245693307, "grad_norm": 3.630195606492286, "learning_rate": 9.763731779075874e-06, "loss": 0.5769, "step": 11097 }, { "epoch": 1.5670714487432928, "grad_norm": 3.964939976753005, "learning_rate": 9.762207754424845e-06, "loss": 0.6642, "step": 11098 }, { "epoch": 1.5672126517932787, "grad_norm": 3.107827388510048, "learning_rate": 9.760683735300006e-06, "loss": 0.4407, "step": 11099 }, { "epoch": 1.5673538548432646, "grad_norm": 3.1076273487729758, "learning_rate": 9.759159721736772e-06, "loss": 0.5139, "step": 11100 }, { "epoch": 1.5674950578932505, "grad_norm": 3.432455941254076, "learning_rate": 9.757635713770558e-06, "loss": 0.6043, "step": 11101 }, { "epoch": 1.5676362609432364, "grad_norm": 3.4368827045144537, "learning_rate": 9.75611171143679e-06, "loss": 0.5597, "step": 11102 }, { "epoch": 1.5677774639932223, "grad_norm": 3.8622686984056083, "learning_rate": 9.75458771477088e-06, "loss": 0.6613, "step": 11103 }, { "epoch": 1.5679186670432081, "grad_norm": 4.320354146873605, "learning_rate": 9.753063723808243e-06, "loss": 0.6806, "step": 11104 }, { "epoch": 1.568059870093194, "grad_norm": 4.237852916650013, "learning_rate": 9.7515397385843e-06, "loss": 0.611, "step": 11105 }, { "epoch": 1.56820107314318, "grad_norm": 3.7235754967647066, "learning_rate": 9.750015759134466e-06, "loss": 0.4292, "step": 11106 }, { "epoch": 1.5683422761931658, "grad_norm": 3.7837946213250353, "learning_rate": 9.748491785494156e-06, "loss": 0.6491, "step": 11107 }, { "epoch": 1.5684834792431517, "grad_norm": 4.106287930409589, "learning_rate": 9.746967817698795e-06, "loss": 0.5968, "step": 11108 }, { "epoch": 1.5686246822931376, "grad_norm": 3.5460591945646063, "learning_rate": 9.745443855783786e-06, "loss": 0.5926, "step": 11109 }, { "epoch": 1.5687658853431234, "grad_norm": 3.242944652347343, "learning_rate": 9.743919899784555e-06, "loss": 0.5926, "step": 11110 }, { "epoch": 1.5689070883931093, "grad_norm": 3.4633127855378625, "learning_rate": 9.742395949736513e-06, "loss": 0.5304, "step": 11111 }, { "epoch": 1.5690482914430952, "grad_norm": 3.409286833019807, "learning_rate": 9.740872005675079e-06, "loss": 0.5793, "step": 11112 }, { "epoch": 1.569189494493081, "grad_norm": 3.2526394310511493, "learning_rate": 9.739348067635667e-06, "loss": 0.512, "step": 11113 }, { "epoch": 1.569330697543067, "grad_norm": 2.6577685046314397, "learning_rate": 9.737824135653693e-06, "loss": 0.4477, "step": 11114 }, { "epoch": 1.5694719005930529, "grad_norm": 2.947304831597821, "learning_rate": 9.736300209764574e-06, "loss": 0.5018, "step": 11115 }, { "epoch": 1.5696131036430387, "grad_norm": 3.466513350019249, "learning_rate": 9.734776290003727e-06, "loss": 0.632, "step": 11116 }, { "epoch": 1.5697543066930244, "grad_norm": 3.862780294273257, "learning_rate": 9.733252376406562e-06, "loss": 0.5438, "step": 11117 }, { "epoch": 1.5698955097430103, "grad_norm": 3.6429530295074284, "learning_rate": 9.731728469008493e-06, "loss": 0.6274, "step": 11118 }, { "epoch": 1.5700367127929962, "grad_norm": 3.677838987890372, "learning_rate": 9.730204567844941e-06, "loss": 0.5255, "step": 11119 }, { "epoch": 1.570177915842982, "grad_norm": 3.1903519921937185, "learning_rate": 9.72868067295132e-06, "loss": 0.5241, "step": 11120 }, { "epoch": 1.570319118892968, "grad_norm": 3.137795872426895, "learning_rate": 9.727156784363038e-06, "loss": 0.5198, "step": 11121 }, { "epoch": 1.5704603219429538, "grad_norm": 4.029186221763382, "learning_rate": 9.72563290211552e-06, "loss": 0.7672, "step": 11122 }, { "epoch": 1.5706015249929397, "grad_norm": 3.111058921337772, "learning_rate": 9.724109026244173e-06, "loss": 0.5642, "step": 11123 }, { "epoch": 1.5707427280429256, "grad_norm": 3.129992160510293, "learning_rate": 9.72258515678441e-06, "loss": 0.5152, "step": 11124 }, { "epoch": 1.5708839310929115, "grad_norm": 3.605233650657966, "learning_rate": 9.721061293771657e-06, "loss": 0.5778, "step": 11125 }, { "epoch": 1.5710251341428974, "grad_norm": 3.163063785445116, "learning_rate": 9.719537437241311e-06, "loss": 0.4806, "step": 11126 }, { "epoch": 1.5711663371928832, "grad_norm": 3.6579257866096664, "learning_rate": 9.718013587228797e-06, "loss": 0.5214, "step": 11127 }, { "epoch": 1.5713075402428691, "grad_norm": 3.4923877038648308, "learning_rate": 9.716489743769525e-06, "loss": 0.5846, "step": 11128 }, { "epoch": 1.571448743292855, "grad_norm": 3.209850086627863, "learning_rate": 9.714965906898909e-06, "loss": 0.5516, "step": 11129 }, { "epoch": 1.5715899463428409, "grad_norm": 3.0696041405533574, "learning_rate": 9.713442076652359e-06, "loss": 0.4932, "step": 11130 }, { "epoch": 1.5717311493928268, "grad_norm": 4.3294174459222035, "learning_rate": 9.711918253065296e-06, "loss": 0.6063, "step": 11131 }, { "epoch": 1.5718723524428126, "grad_norm": 3.6518725136188688, "learning_rate": 9.710394436173126e-06, "loss": 0.6039, "step": 11132 }, { "epoch": 1.5720135554927985, "grad_norm": 3.3717098400414933, "learning_rate": 9.708870626011268e-06, "loss": 0.5964, "step": 11133 }, { "epoch": 1.5721547585427844, "grad_norm": 3.5199314382898446, "learning_rate": 9.707346822615127e-06, "loss": 0.5676, "step": 11134 }, { "epoch": 1.5722959615927703, "grad_norm": 3.1102686674681395, "learning_rate": 9.70582302602012e-06, "loss": 0.4788, "step": 11135 }, { "epoch": 1.5724371646427562, "grad_norm": 4.099911587272551, "learning_rate": 9.704299236261658e-06, "loss": 0.5582, "step": 11136 }, { "epoch": 1.572578367692742, "grad_norm": 3.7927158025024026, "learning_rate": 9.702775453375155e-06, "loss": 0.6746, "step": 11137 }, { "epoch": 1.572719570742728, "grad_norm": 4.5018704852279265, "learning_rate": 9.701251677396021e-06, "loss": 0.7546, "step": 11138 }, { "epoch": 1.5728607737927138, "grad_norm": 3.9411871448001734, "learning_rate": 9.699727908359671e-06, "loss": 0.6672, "step": 11139 }, { "epoch": 1.5730019768426997, "grad_norm": 3.610970430498073, "learning_rate": 9.698204146301513e-06, "loss": 0.6281, "step": 11140 }, { "epoch": 1.5731431798926856, "grad_norm": 3.3860477921863312, "learning_rate": 9.696680391256961e-06, "loss": 0.5119, "step": 11141 }, { "epoch": 1.5732843829426715, "grad_norm": 4.062895077140409, "learning_rate": 9.69515664326143e-06, "loss": 0.6455, "step": 11142 }, { "epoch": 1.5734255859926574, "grad_norm": 4.261209456210413, "learning_rate": 9.69363290235032e-06, "loss": 0.7464, "step": 11143 }, { "epoch": 1.5735667890426432, "grad_norm": 3.4530757703896007, "learning_rate": 9.692109168559051e-06, "loss": 0.5825, "step": 11144 }, { "epoch": 1.5737079920926291, "grad_norm": 3.4063931372513676, "learning_rate": 9.690585441923036e-06, "loss": 0.5211, "step": 11145 }, { "epoch": 1.573849195142615, "grad_norm": 3.7405373204807475, "learning_rate": 9.689061722477677e-06, "loss": 0.5495, "step": 11146 }, { "epoch": 1.573990398192601, "grad_norm": 3.5261268228183735, "learning_rate": 9.68753801025839e-06, "loss": 0.5739, "step": 11147 }, { "epoch": 1.5741316012425868, "grad_norm": 3.3466286137571433, "learning_rate": 9.686014305300585e-06, "loss": 0.5063, "step": 11148 }, { "epoch": 1.5742728042925727, "grad_norm": 3.8482521502124176, "learning_rate": 9.684490607639672e-06, "loss": 0.6319, "step": 11149 }, { "epoch": 1.5744140073425585, "grad_norm": 4.18327629071738, "learning_rate": 9.682966917311065e-06, "loss": 0.7063, "step": 11150 }, { "epoch": 1.5745552103925444, "grad_norm": 3.418719044047853, "learning_rate": 9.681443234350167e-06, "loss": 0.6457, "step": 11151 }, { "epoch": 1.5746964134425303, "grad_norm": 3.3037662419981975, "learning_rate": 9.679919558792388e-06, "loss": 0.5478, "step": 11152 }, { "epoch": 1.5748376164925162, "grad_norm": 4.338057534805917, "learning_rate": 9.678395890673142e-06, "loss": 0.6632, "step": 11153 }, { "epoch": 1.574978819542502, "grad_norm": 3.7126404460331313, "learning_rate": 9.676872230027837e-06, "loss": 0.5725, "step": 11154 }, { "epoch": 1.575120022592488, "grad_norm": 3.606340342762098, "learning_rate": 9.67534857689188e-06, "loss": 0.5665, "step": 11155 }, { "epoch": 1.5752612256424738, "grad_norm": 3.854393756602364, "learning_rate": 9.673824931300684e-06, "loss": 0.6855, "step": 11156 }, { "epoch": 1.5754024286924597, "grad_norm": 3.1031294234621134, "learning_rate": 9.672301293289656e-06, "loss": 0.5219, "step": 11157 }, { "epoch": 1.5755436317424456, "grad_norm": 3.4147551400569562, "learning_rate": 9.670777662894205e-06, "loss": 0.5893, "step": 11158 }, { "epoch": 1.5756848347924315, "grad_norm": 3.670778798066699, "learning_rate": 9.66925404014974e-06, "loss": 0.561, "step": 11159 }, { "epoch": 1.5758260378424174, "grad_norm": 3.6823617503628574, "learning_rate": 9.667730425091666e-06, "loss": 0.5661, "step": 11160 }, { "epoch": 1.5759672408924033, "grad_norm": 2.663994636569955, "learning_rate": 9.666206817755391e-06, "loss": 0.4268, "step": 11161 }, { "epoch": 1.5761084439423891, "grad_norm": 3.6624857098084296, "learning_rate": 9.664683218176328e-06, "loss": 0.5835, "step": 11162 }, { "epoch": 1.576249646992375, "grad_norm": 3.694992782382996, "learning_rate": 9.663159626389882e-06, "loss": 0.7569, "step": 11163 }, { "epoch": 1.576390850042361, "grad_norm": 3.294604595579466, "learning_rate": 9.661636042431459e-06, "loss": 0.5882, "step": 11164 }, { "epoch": 1.5765320530923468, "grad_norm": 4.027183948534172, "learning_rate": 9.660112466336471e-06, "loss": 0.6504, "step": 11165 }, { "epoch": 1.5766732561423327, "grad_norm": 3.42165857973242, "learning_rate": 9.658588898140322e-06, "loss": 0.551, "step": 11166 }, { "epoch": 1.5768144591923186, "grad_norm": 3.754473194621875, "learning_rate": 9.65706533787842e-06, "loss": 0.5888, "step": 11167 }, { "epoch": 1.5769556622423044, "grad_norm": 4.009656175088606, "learning_rate": 9.655541785586172e-06, "loss": 0.6271, "step": 11168 }, { "epoch": 1.5770968652922903, "grad_norm": 3.6150160875133777, "learning_rate": 9.65401824129898e-06, "loss": 0.5772, "step": 11169 }, { "epoch": 1.5772380683422762, "grad_norm": 4.253786203302503, "learning_rate": 9.65249470505226e-06, "loss": 0.6682, "step": 11170 }, { "epoch": 1.577379271392262, "grad_norm": 4.259862563088656, "learning_rate": 9.65097117688141e-06, "loss": 0.5373, "step": 11171 }, { "epoch": 1.577520474442248, "grad_norm": 4.146078169472862, "learning_rate": 9.64944765682184e-06, "loss": 0.5498, "step": 11172 }, { "epoch": 1.5776616774922338, "grad_norm": 5.016607537104329, "learning_rate": 9.647924144908952e-06, "loss": 0.7466, "step": 11173 }, { "epoch": 1.5778028805422197, "grad_norm": 3.748765179592891, "learning_rate": 9.646400641178157e-06, "loss": 0.5581, "step": 11174 }, { "epoch": 1.5779440835922056, "grad_norm": 3.727112596077149, "learning_rate": 9.64487714566486e-06, "loss": 0.5806, "step": 11175 }, { "epoch": 1.5780852866421915, "grad_norm": 4.187195240927972, "learning_rate": 9.643353658404466e-06, "loss": 0.6831, "step": 11176 }, { "epoch": 1.5782264896921774, "grad_norm": 3.2746960458220205, "learning_rate": 9.641830179432375e-06, "loss": 0.4916, "step": 11177 }, { "epoch": 1.5783676927421633, "grad_norm": 3.758015195895359, "learning_rate": 9.640306708783997e-06, "loss": 0.6816, "step": 11178 }, { "epoch": 1.5785088957921491, "grad_norm": 3.0461198064250086, "learning_rate": 9.638783246494736e-06, "loss": 0.523, "step": 11179 }, { "epoch": 1.578650098842135, "grad_norm": 3.9453197221816376, "learning_rate": 9.637259792599997e-06, "loss": 0.6465, "step": 11180 }, { "epoch": 1.578791301892121, "grad_norm": 3.9675220811399545, "learning_rate": 9.635736347135181e-06, "loss": 0.5948, "step": 11181 }, { "epoch": 1.5789325049421068, "grad_norm": 3.035051689085451, "learning_rate": 9.634212910135697e-06, "loss": 0.4234, "step": 11182 }, { "epoch": 1.5790737079920927, "grad_norm": 4.378491587236108, "learning_rate": 9.632689481636947e-06, "loss": 0.6151, "step": 11183 }, { "epoch": 1.5792149110420786, "grad_norm": 3.547151520873231, "learning_rate": 9.631166061674338e-06, "loss": 0.5934, "step": 11184 }, { "epoch": 1.5793561140920644, "grad_norm": 3.399086534804731, "learning_rate": 9.629642650283269e-06, "loss": 0.5356, "step": 11185 }, { "epoch": 1.5794973171420503, "grad_norm": 3.712493103682115, "learning_rate": 9.62811924749914e-06, "loss": 0.6129, "step": 11186 }, { "epoch": 1.5796385201920362, "grad_norm": 4.265492252758575, "learning_rate": 9.626595853357363e-06, "loss": 0.7135, "step": 11187 }, { "epoch": 1.579779723242022, "grad_norm": 4.6190798214618685, "learning_rate": 9.625072467893337e-06, "loss": 0.6967, "step": 11188 }, { "epoch": 1.579920926292008, "grad_norm": 3.1428948457095816, "learning_rate": 9.623549091142466e-06, "loss": 0.4825, "step": 11189 }, { "epoch": 1.5800621293419939, "grad_norm": 3.3399242049377142, "learning_rate": 9.622025723140147e-06, "loss": 0.5189, "step": 11190 }, { "epoch": 1.5802033323919797, "grad_norm": 4.0164075983890255, "learning_rate": 9.620502363921791e-06, "loss": 0.6711, "step": 11191 }, { "epoch": 1.5803445354419656, "grad_norm": 3.818673653770434, "learning_rate": 9.618979013522796e-06, "loss": 0.7537, "step": 11192 }, { "epoch": 1.5804857384919515, "grad_norm": 4.165124339632033, "learning_rate": 9.617455671978565e-06, "loss": 0.7325, "step": 11193 }, { "epoch": 1.5806269415419374, "grad_norm": 3.270997018985407, "learning_rate": 9.615932339324497e-06, "loss": 0.5079, "step": 11194 }, { "epoch": 1.5807681445919233, "grad_norm": 3.245054925774382, "learning_rate": 9.614409015595994e-06, "loss": 0.5557, "step": 11195 }, { "epoch": 1.5809093476419092, "grad_norm": 2.9501316070334944, "learning_rate": 9.612885700828462e-06, "loss": 0.4902, "step": 11196 }, { "epoch": 1.581050550691895, "grad_norm": 3.8085980136800974, "learning_rate": 9.611362395057298e-06, "loss": 0.6442, "step": 11197 }, { "epoch": 1.581191753741881, "grad_norm": 4.012298162017754, "learning_rate": 9.609839098317902e-06, "loss": 0.5711, "step": 11198 }, { "epoch": 1.5813329567918668, "grad_norm": 3.7920180602476417, "learning_rate": 9.60831581064568e-06, "loss": 0.5392, "step": 11199 }, { "epoch": 1.5814741598418527, "grad_norm": 3.9036343173034123, "learning_rate": 9.606792532076028e-06, "loss": 0.6025, "step": 11200 }, { "epoch": 1.5816153628918386, "grad_norm": 3.7275581323273106, "learning_rate": 9.60526926264435e-06, "loss": 0.5693, "step": 11201 }, { "epoch": 1.5817565659418245, "grad_norm": 3.0866456604471995, "learning_rate": 9.603746002386043e-06, "loss": 0.522, "step": 11202 }, { "epoch": 1.5818977689918103, "grad_norm": 3.7452058794964427, "learning_rate": 9.602222751336506e-06, "loss": 0.607, "step": 11203 }, { "epoch": 1.5820389720417962, "grad_norm": 4.289463779249865, "learning_rate": 9.60069950953114e-06, "loss": 0.7462, "step": 11204 }, { "epoch": 1.582180175091782, "grad_norm": 3.0533154656328336, "learning_rate": 9.599176277005346e-06, "loss": 0.486, "step": 11205 }, { "epoch": 1.582321378141768, "grad_norm": 3.3330127820416524, "learning_rate": 9.597653053794521e-06, "loss": 0.5786, "step": 11206 }, { "epoch": 1.5824625811917539, "grad_norm": 3.544312837164597, "learning_rate": 9.596129839934066e-06, "loss": 0.6925, "step": 11207 }, { "epoch": 1.5826037842417398, "grad_norm": 3.226260401780507, "learning_rate": 9.59460663545938e-06, "loss": 0.5123, "step": 11208 }, { "epoch": 1.5827449872917256, "grad_norm": 3.335073771056366, "learning_rate": 9.593083440405863e-06, "loss": 0.5729, "step": 11209 }, { "epoch": 1.5828861903417115, "grad_norm": 4.17714860613498, "learning_rate": 9.591560254808909e-06, "loss": 0.6956, "step": 11210 }, { "epoch": 1.5830273933916974, "grad_norm": 3.277945067769901, "learning_rate": 9.590037078703919e-06, "loss": 0.5296, "step": 11211 }, { "epoch": 1.5831685964416833, "grad_norm": 3.522995047203345, "learning_rate": 9.588513912126286e-06, "loss": 0.5386, "step": 11212 }, { "epoch": 1.5833097994916692, "grad_norm": 3.2949595741317563, "learning_rate": 9.586990755111416e-06, "loss": 0.525, "step": 11213 }, { "epoch": 1.583451002541655, "grad_norm": 3.0444153711657393, "learning_rate": 9.585467607694702e-06, "loss": 0.5005, "step": 11214 }, { "epoch": 1.583592205591641, "grad_norm": 3.630987814798, "learning_rate": 9.583944469911541e-06, "loss": 0.5716, "step": 11215 }, { "epoch": 1.5837334086416268, "grad_norm": 3.4885849727715206, "learning_rate": 9.58242134179733e-06, "loss": 0.6652, "step": 11216 }, { "epoch": 1.5838746116916127, "grad_norm": 3.919528878245229, "learning_rate": 9.580898223387468e-06, "loss": 0.5567, "step": 11217 }, { "epoch": 1.5840158147415986, "grad_norm": 2.9326633353070504, "learning_rate": 9.579375114717352e-06, "loss": 0.4851, "step": 11218 }, { "epoch": 1.5841570177915842, "grad_norm": 3.623847998141257, "learning_rate": 9.577852015822376e-06, "loss": 0.5845, "step": 11219 }, { "epoch": 1.5842982208415701, "grad_norm": 3.4264844825491347, "learning_rate": 9.576328926737936e-06, "loss": 0.5093, "step": 11220 }, { "epoch": 1.584439423891556, "grad_norm": 3.67255249128012, "learning_rate": 9.574805847499426e-06, "loss": 0.5804, "step": 11221 }, { "epoch": 1.584580626941542, "grad_norm": 3.6172447286762335, "learning_rate": 9.573282778142246e-06, "loss": 0.5891, "step": 11222 }, { "epoch": 1.5847218299915278, "grad_norm": 3.9521573932919942, "learning_rate": 9.571759718701792e-06, "loss": 0.6431, "step": 11223 }, { "epoch": 1.5848630330415137, "grad_norm": 4.4825441058194375, "learning_rate": 9.570236669213454e-06, "loss": 0.6971, "step": 11224 }, { "epoch": 1.5850042360914995, "grad_norm": 3.854757549437913, "learning_rate": 9.568713629712632e-06, "loss": 0.5865, "step": 11225 }, { "epoch": 1.5851454391414854, "grad_norm": 3.060590796833434, "learning_rate": 9.567190600234722e-06, "loss": 0.4982, "step": 11226 }, { "epoch": 1.5852866421914713, "grad_norm": 2.9079771007440933, "learning_rate": 9.565667580815114e-06, "loss": 0.4857, "step": 11227 }, { "epoch": 1.5854278452414572, "grad_norm": 2.9279652359640287, "learning_rate": 9.564144571489202e-06, "loss": 0.4696, "step": 11228 }, { "epoch": 1.585569048291443, "grad_norm": 3.4162371607903106, "learning_rate": 9.562621572292381e-06, "loss": 0.4991, "step": 11229 }, { "epoch": 1.585710251341429, "grad_norm": 3.542799640516143, "learning_rate": 9.561098583260047e-06, "loss": 0.5903, "step": 11230 }, { "epoch": 1.5858514543914148, "grad_norm": 3.5202184455330525, "learning_rate": 9.559575604427594e-06, "loss": 0.5898, "step": 11231 }, { "epoch": 1.5859926574414007, "grad_norm": 3.476806909504271, "learning_rate": 9.558052635830413e-06, "loss": 0.5919, "step": 11232 }, { "epoch": 1.5861338604913866, "grad_norm": 3.615478786416845, "learning_rate": 9.556529677503896e-06, "loss": 0.6464, "step": 11233 }, { "epoch": 1.5862750635413725, "grad_norm": 3.59967894584123, "learning_rate": 9.555006729483438e-06, "loss": 0.5725, "step": 11234 }, { "epoch": 1.5864162665913584, "grad_norm": 3.640724900589293, "learning_rate": 9.553483791804435e-06, "loss": 0.703, "step": 11235 }, { "epoch": 1.5865574696413443, "grad_norm": 3.5607970765438646, "learning_rate": 9.551960864502275e-06, "loss": 0.5689, "step": 11236 }, { "epoch": 1.5866986726913301, "grad_norm": 3.898890531768452, "learning_rate": 9.550437947612349e-06, "loss": 0.6552, "step": 11237 }, { "epoch": 1.586839875741316, "grad_norm": 2.670820209539343, "learning_rate": 9.548915041170049e-06, "loss": 0.4229, "step": 11238 }, { "epoch": 1.586981078791302, "grad_norm": 2.964087986383132, "learning_rate": 9.54739214521077e-06, "loss": 0.4412, "step": 11239 }, { "epoch": 1.5871222818412878, "grad_norm": 3.7599311535691875, "learning_rate": 9.545869259769904e-06, "loss": 0.6676, "step": 11240 }, { "epoch": 1.5872634848912737, "grad_norm": 3.676183150198646, "learning_rate": 9.544346384882837e-06, "loss": 0.6329, "step": 11241 }, { "epoch": 1.5874046879412596, "grad_norm": 3.3132097751079783, "learning_rate": 9.542823520584968e-06, "loss": 0.5407, "step": 11242 }, { "epoch": 1.5875458909912454, "grad_norm": 3.534256889945538, "learning_rate": 9.541300666911682e-06, "loss": 0.5544, "step": 11243 }, { "epoch": 1.5876870940412313, "grad_norm": 4.137219170842502, "learning_rate": 9.539777823898368e-06, "loss": 0.5389, "step": 11244 }, { "epoch": 1.5878282970912172, "grad_norm": 3.853430190032524, "learning_rate": 9.538254991580421e-06, "loss": 0.6068, "step": 11245 }, { "epoch": 1.587969500141203, "grad_norm": 3.4239930765006727, "learning_rate": 9.536732169993225e-06, "loss": 0.5557, "step": 11246 }, { "epoch": 1.588110703191189, "grad_norm": 3.5258178039665267, "learning_rate": 9.535209359172176e-06, "loss": 0.5205, "step": 11247 }, { "epoch": 1.5882519062411748, "grad_norm": 3.3719082604687114, "learning_rate": 9.53368655915266e-06, "loss": 0.6185, "step": 11248 }, { "epoch": 1.5883931092911607, "grad_norm": 3.3092244330069644, "learning_rate": 9.532163769970068e-06, "loss": 0.5054, "step": 11249 }, { "epoch": 1.5885343123411466, "grad_norm": 3.4176099050286464, "learning_rate": 9.530640991659785e-06, "loss": 0.5105, "step": 11250 }, { "epoch": 1.5886755153911325, "grad_norm": 3.0915700140611584, "learning_rate": 9.529118224257205e-06, "loss": 0.551, "step": 11251 }, { "epoch": 1.5888167184411184, "grad_norm": 4.156022780626483, "learning_rate": 9.527595467797716e-06, "loss": 0.6111, "step": 11252 }, { "epoch": 1.588957921491104, "grad_norm": 3.5468453458356266, "learning_rate": 9.526072722316702e-06, "loss": 0.4939, "step": 11253 }, { "epoch": 1.58909912454109, "grad_norm": 4.193239879000057, "learning_rate": 9.524549987849555e-06, "loss": 0.5581, "step": 11254 }, { "epoch": 1.5892403275910758, "grad_norm": 5.497588720526756, "learning_rate": 9.523027264431657e-06, "loss": 0.7577, "step": 11255 }, { "epoch": 1.5893815306410617, "grad_norm": 3.0229178836483577, "learning_rate": 9.521504552098402e-06, "loss": 0.5255, "step": 11256 }, { "epoch": 1.5895227336910476, "grad_norm": 3.4180075130263288, "learning_rate": 9.519981850885175e-06, "loss": 0.5485, "step": 11257 }, { "epoch": 1.5896639367410335, "grad_norm": 3.6110206683336545, "learning_rate": 9.518459160827359e-06, "loss": 0.5432, "step": 11258 }, { "epoch": 1.5898051397910193, "grad_norm": 3.2566560559147923, "learning_rate": 9.516936481960346e-06, "loss": 0.4577, "step": 11259 }, { "epoch": 1.5899463428410052, "grad_norm": 4.519051168853164, "learning_rate": 9.515413814319524e-06, "loss": 0.8019, "step": 11260 }, { "epoch": 1.590087545890991, "grad_norm": 2.4722158494120205, "learning_rate": 9.513891157940275e-06, "loss": 0.3847, "step": 11261 }, { "epoch": 1.590228748940977, "grad_norm": 4.504250911409081, "learning_rate": 9.512368512857983e-06, "loss": 0.7845, "step": 11262 }, { "epoch": 1.5903699519909629, "grad_norm": 3.269899112068281, "learning_rate": 9.510845879108039e-06, "loss": 0.5187, "step": 11263 }, { "epoch": 1.5905111550409488, "grad_norm": 3.0655292570020545, "learning_rate": 9.50932325672582e-06, "loss": 0.5643, "step": 11264 }, { "epoch": 1.5906523580909346, "grad_norm": 3.405556567171546, "learning_rate": 9.50780064574672e-06, "loss": 0.6112, "step": 11265 }, { "epoch": 1.5907935611409205, "grad_norm": 3.8013483328947495, "learning_rate": 9.506278046206122e-06, "loss": 0.5363, "step": 11266 }, { "epoch": 1.5909347641909064, "grad_norm": 3.3801577086628796, "learning_rate": 9.504755458139406e-06, "loss": 0.5288, "step": 11267 }, { "epoch": 1.5910759672408923, "grad_norm": 3.3056317004240108, "learning_rate": 9.50323288158196e-06, "loss": 0.5937, "step": 11268 }, { "epoch": 1.5912171702908782, "grad_norm": 3.237177588336155, "learning_rate": 9.50171031656917e-06, "loss": 0.4068, "step": 11269 }, { "epoch": 1.591358373340864, "grad_norm": 3.112666842772039, "learning_rate": 9.500187763136416e-06, "loss": 0.4845, "step": 11270 }, { "epoch": 1.59149957639085, "grad_norm": 3.8532064532825747, "learning_rate": 9.498665221319083e-06, "loss": 0.6531, "step": 11271 }, { "epoch": 1.5916407794408358, "grad_norm": 3.4707956218516527, "learning_rate": 9.49714269115255e-06, "loss": 0.575, "step": 11272 }, { "epoch": 1.5917819824908217, "grad_norm": 2.6317156917970794, "learning_rate": 9.495620172672205e-06, "loss": 0.4932, "step": 11273 }, { "epoch": 1.5919231855408076, "grad_norm": 3.196866364480932, "learning_rate": 9.494097665913432e-06, "loss": 0.584, "step": 11274 }, { "epoch": 1.5920643885907935, "grad_norm": 3.954481929356727, "learning_rate": 9.492575170911609e-06, "loss": 0.6039, "step": 11275 }, { "epoch": 1.5922055916407793, "grad_norm": 4.709897176294105, "learning_rate": 9.491052687702118e-06, "loss": 0.6215, "step": 11276 }, { "epoch": 1.5923467946907652, "grad_norm": 3.5664540669062403, "learning_rate": 9.489530216320348e-06, "loss": 0.6339, "step": 11277 }, { "epoch": 1.5924879977407511, "grad_norm": 3.1019887139590883, "learning_rate": 9.488007756801672e-06, "loss": 0.524, "step": 11278 }, { "epoch": 1.592629200790737, "grad_norm": 3.2178717963217918, "learning_rate": 9.486485309181475e-06, "loss": 0.5405, "step": 11279 }, { "epoch": 1.5927704038407229, "grad_norm": 3.2189293881127554, "learning_rate": 9.484962873495137e-06, "loss": 0.5748, "step": 11280 }, { "epoch": 1.5929116068907088, "grad_norm": 3.5072974984185397, "learning_rate": 9.483440449778038e-06, "loss": 0.5385, "step": 11281 }, { "epoch": 1.5930528099406946, "grad_norm": 3.803119612856957, "learning_rate": 9.481918038065561e-06, "loss": 0.49, "step": 11282 }, { "epoch": 1.5931940129906805, "grad_norm": 3.115319673748481, "learning_rate": 9.480395638393087e-06, "loss": 0.5605, "step": 11283 }, { "epoch": 1.5933352160406664, "grad_norm": 3.3315665959347904, "learning_rate": 9.478873250795991e-06, "loss": 0.5677, "step": 11284 }, { "epoch": 1.5934764190906523, "grad_norm": 4.457979646366254, "learning_rate": 9.477350875309656e-06, "loss": 0.7286, "step": 11285 }, { "epoch": 1.5936176221406382, "grad_norm": 4.1469190795336415, "learning_rate": 9.475828511969466e-06, "loss": 0.7007, "step": 11286 }, { "epoch": 1.593758825190624, "grad_norm": 4.620226721679112, "learning_rate": 9.47430616081079e-06, "loss": 0.7525, "step": 11287 }, { "epoch": 1.59390002824061, "grad_norm": 3.5736974815355422, "learning_rate": 9.472783821869015e-06, "loss": 0.5487, "step": 11288 }, { "epoch": 1.5940412312905958, "grad_norm": 3.5230912502195846, "learning_rate": 9.471261495179512e-06, "loss": 0.5829, "step": 11289 }, { "epoch": 1.5941824343405817, "grad_norm": 3.715534105824283, "learning_rate": 9.469739180777666e-06, "loss": 0.5941, "step": 11290 }, { "epoch": 1.5943236373905676, "grad_norm": 4.207111218858554, "learning_rate": 9.468216878698853e-06, "loss": 0.5703, "step": 11291 }, { "epoch": 1.5944648404405535, "grad_norm": 4.503600661328856, "learning_rate": 9.466694588978448e-06, "loss": 0.6384, "step": 11292 }, { "epoch": 1.5946060434905394, "grad_norm": 4.424046018806635, "learning_rate": 9.46517231165183e-06, "loss": 0.7301, "step": 11293 }, { "epoch": 1.5947472465405252, "grad_norm": 4.249249333654446, "learning_rate": 9.463650046754383e-06, "loss": 0.6142, "step": 11294 }, { "epoch": 1.5948884495905111, "grad_norm": 3.4305991884933817, "learning_rate": 9.462127794321471e-06, "loss": 0.6643, "step": 11295 }, { "epoch": 1.595029652640497, "grad_norm": 4.318204878908951, "learning_rate": 9.460605554388479e-06, "loss": 0.8148, "step": 11296 }, { "epoch": 1.595170855690483, "grad_norm": 4.91535074562223, "learning_rate": 9.45908332699078e-06, "loss": 0.7256, "step": 11297 }, { "epoch": 1.5953120587404688, "grad_norm": 3.469379108464661, "learning_rate": 9.45756111216375e-06, "loss": 0.6175, "step": 11298 }, { "epoch": 1.5954532617904547, "grad_norm": 3.7556653844580588, "learning_rate": 9.456038909942766e-06, "loss": 0.7179, "step": 11299 }, { "epoch": 1.5955944648404405, "grad_norm": 3.409670120788086, "learning_rate": 9.454516720363203e-06, "loss": 0.5579, "step": 11300 }, { "epoch": 1.5957356678904264, "grad_norm": 3.7682275718258795, "learning_rate": 9.452994543460435e-06, "loss": 0.6658, "step": 11301 }, { "epoch": 1.5958768709404123, "grad_norm": 3.678026855501289, "learning_rate": 9.45147237926984e-06, "loss": 0.5511, "step": 11302 }, { "epoch": 1.5960180739903982, "grad_norm": 4.565926952994871, "learning_rate": 9.449950227826792e-06, "loss": 0.7009, "step": 11303 }, { "epoch": 1.596159277040384, "grad_norm": 3.4482186951381606, "learning_rate": 9.44842808916666e-06, "loss": 0.6643, "step": 11304 }, { "epoch": 1.59630048009037, "grad_norm": 3.5742434835774683, "learning_rate": 9.446905963324821e-06, "loss": 0.4886, "step": 11305 }, { "epoch": 1.5964416831403558, "grad_norm": 3.084608618110162, "learning_rate": 9.445383850336648e-06, "loss": 0.4382, "step": 11306 }, { "epoch": 1.5965828861903417, "grad_norm": 3.115246318805226, "learning_rate": 9.443861750237515e-06, "loss": 0.4847, "step": 11307 }, { "epoch": 1.5967240892403276, "grad_norm": 4.418877262295845, "learning_rate": 9.442339663062795e-06, "loss": 0.7691, "step": 11308 }, { "epoch": 1.5968652922903135, "grad_norm": 3.8277079376677214, "learning_rate": 9.44081758884786e-06, "loss": 0.6911, "step": 11309 }, { "epoch": 1.5970064953402994, "grad_norm": 3.758596591826772, "learning_rate": 9.439295527628083e-06, "loss": 0.6841, "step": 11310 }, { "epoch": 1.5971476983902853, "grad_norm": 3.740000724715808, "learning_rate": 9.437773479438838e-06, "loss": 0.558, "step": 11311 }, { "epoch": 1.5972889014402711, "grad_norm": 3.4617649812541225, "learning_rate": 9.436251444315492e-06, "loss": 0.5579, "step": 11312 }, { "epoch": 1.597430104490257, "grad_norm": 3.72300367802169, "learning_rate": 9.43472942229342e-06, "loss": 0.6297, "step": 11313 }, { "epoch": 1.597571307540243, "grad_norm": 3.944197716187972, "learning_rate": 9.433207413407991e-06, "loss": 0.5866, "step": 11314 }, { "epoch": 1.5977125105902288, "grad_norm": 3.6508968740511154, "learning_rate": 9.431685417694576e-06, "loss": 0.6011, "step": 11315 }, { "epoch": 1.5978537136402147, "grad_norm": 3.170941060307321, "learning_rate": 9.430163435188549e-06, "loss": 0.6122, "step": 11316 }, { "epoch": 1.5979949166902006, "grad_norm": 3.6737008594756433, "learning_rate": 9.428641465925277e-06, "loss": 0.6578, "step": 11317 }, { "epoch": 1.5981361197401864, "grad_norm": 3.44023474313468, "learning_rate": 9.42711950994013e-06, "loss": 0.5273, "step": 11318 }, { "epoch": 1.5982773227901723, "grad_norm": 3.093778540179183, "learning_rate": 9.425597567268477e-06, "loss": 0.5018, "step": 11319 }, { "epoch": 1.5984185258401582, "grad_norm": 3.2711134313869255, "learning_rate": 9.424075637945692e-06, "loss": 0.5057, "step": 11320 }, { "epoch": 1.598559728890144, "grad_norm": 3.682204619364394, "learning_rate": 9.422553722007139e-06, "loss": 0.6009, "step": 11321 }, { "epoch": 1.59870093194013, "grad_norm": 3.7237461847377795, "learning_rate": 9.421031819488188e-06, "loss": 0.572, "step": 11322 }, { "epoch": 1.5988421349901158, "grad_norm": 3.001718263432985, "learning_rate": 9.419509930424206e-06, "loss": 0.5149, "step": 11323 }, { "epoch": 1.5989833380401017, "grad_norm": 3.4167324887857506, "learning_rate": 9.417988054850561e-06, "loss": 0.5746, "step": 11324 }, { "epoch": 1.5991245410900876, "grad_norm": 4.187941240630047, "learning_rate": 9.416466192802626e-06, "loss": 0.6145, "step": 11325 }, { "epoch": 1.5992657441400735, "grad_norm": 3.841395202067567, "learning_rate": 9.414944344315765e-06, "loss": 0.5932, "step": 11326 }, { "epoch": 1.5994069471900594, "grad_norm": 3.8614867760777054, "learning_rate": 9.41342250942534e-06, "loss": 0.7972, "step": 11327 }, { "epoch": 1.5995481502400453, "grad_norm": 3.337319612051983, "learning_rate": 9.411900688166731e-06, "loss": 0.6081, "step": 11328 }, { "epoch": 1.5996893532900311, "grad_norm": 4.3641814300421435, "learning_rate": 9.410378880575288e-06, "loss": 0.5812, "step": 11329 }, { "epoch": 1.599830556340017, "grad_norm": 3.835037756548726, "learning_rate": 9.40885708668639e-06, "loss": 0.4974, "step": 11330 }, { "epoch": 1.599971759390003, "grad_norm": 4.222625326106407, "learning_rate": 9.407335306535396e-06, "loss": 0.6487, "step": 11331 }, { "epoch": 1.6001129624399888, "grad_norm": 3.632956254439459, "learning_rate": 9.40581354015767e-06, "loss": 0.5592, "step": 11332 }, { "epoch": 1.6002541654899747, "grad_norm": 3.8621024996093722, "learning_rate": 9.404291787588586e-06, "loss": 0.624, "step": 11333 }, { "epoch": 1.6003953685399606, "grad_norm": 3.718051762722175, "learning_rate": 9.402770048863502e-06, "loss": 0.5687, "step": 11334 }, { "epoch": 1.6005365715899464, "grad_norm": 3.6064803951845836, "learning_rate": 9.401248324017784e-06, "loss": 0.6267, "step": 11335 }, { "epoch": 1.6006777746399323, "grad_norm": 2.9429865359547476, "learning_rate": 9.399726613086794e-06, "loss": 0.3927, "step": 11336 }, { "epoch": 1.6008189776899182, "grad_norm": 4.2227226964052855, "learning_rate": 9.398204916105906e-06, "loss": 0.6092, "step": 11337 }, { "epoch": 1.600960180739904, "grad_norm": 3.36063975570979, "learning_rate": 9.396683233110468e-06, "loss": 0.4865, "step": 11338 }, { "epoch": 1.60110138378989, "grad_norm": 4.213171044545515, "learning_rate": 9.395161564135853e-06, "loss": 0.6979, "step": 11339 }, { "epoch": 1.6012425868398759, "grad_norm": 3.635254239973643, "learning_rate": 9.393639909217423e-06, "loss": 0.622, "step": 11340 }, { "epoch": 1.6013837898898617, "grad_norm": 3.1645457720667025, "learning_rate": 9.392118268390538e-06, "loss": 0.4899, "step": 11341 }, { "epoch": 1.6015249929398476, "grad_norm": 3.9798424444459815, "learning_rate": 9.390596641690563e-06, "loss": 0.6185, "step": 11342 }, { "epoch": 1.6016661959898335, "grad_norm": 3.2134414640467743, "learning_rate": 9.38907502915286e-06, "loss": 0.5834, "step": 11343 }, { "epoch": 1.6018073990398194, "grad_norm": 3.607365884488, "learning_rate": 9.387553430812786e-06, "loss": 0.5146, "step": 11344 }, { "epoch": 1.6019486020898053, "grad_norm": 3.391152273275, "learning_rate": 9.386031846705712e-06, "loss": 0.57, "step": 11345 }, { "epoch": 1.6020898051397912, "grad_norm": 4.158000755099663, "learning_rate": 9.384510276866988e-06, "loss": 0.7712, "step": 11346 }, { "epoch": 1.602231008189777, "grad_norm": 3.572838696066777, "learning_rate": 9.382988721331981e-06, "loss": 0.5174, "step": 11347 }, { "epoch": 1.602372211239763, "grad_norm": 3.156490738543254, "learning_rate": 9.381467180136049e-06, "loss": 0.4081, "step": 11348 }, { "epoch": 1.6025134142897488, "grad_norm": 3.6446906427129817, "learning_rate": 9.379945653314553e-06, "loss": 0.5105, "step": 11349 }, { "epoch": 1.6026546173397347, "grad_norm": 3.2513041554379236, "learning_rate": 9.37842414090285e-06, "loss": 0.5657, "step": 11350 }, { "epoch": 1.6027958203897206, "grad_norm": 4.003797374091856, "learning_rate": 9.376902642936303e-06, "loss": 0.6413, "step": 11351 }, { "epoch": 1.6029370234397065, "grad_norm": 3.3978747099673456, "learning_rate": 9.375381159450271e-06, "loss": 0.6529, "step": 11352 }, { "epoch": 1.6030782264896923, "grad_norm": 3.8537107665402095, "learning_rate": 9.373859690480113e-06, "loss": 0.6417, "step": 11353 }, { "epoch": 1.6032194295396782, "grad_norm": 4.361763831636235, "learning_rate": 9.372338236061183e-06, "loss": 0.6116, "step": 11354 }, { "epoch": 1.603360632589664, "grad_norm": 3.0522082725796325, "learning_rate": 9.370816796228838e-06, "loss": 0.5423, "step": 11355 }, { "epoch": 1.6035018356396498, "grad_norm": 3.2973094462014156, "learning_rate": 9.369295371018442e-06, "loss": 0.5917, "step": 11356 }, { "epoch": 1.6036430386896356, "grad_norm": 3.455909044713918, "learning_rate": 9.36777396046535e-06, "loss": 0.5263, "step": 11357 }, { "epoch": 1.6037842417396215, "grad_norm": 3.7412227522211428, "learning_rate": 9.366252564604914e-06, "loss": 0.5576, "step": 11358 }, { "epoch": 1.6039254447896074, "grad_norm": 4.081817331951791, "learning_rate": 9.364731183472497e-06, "loss": 0.689, "step": 11359 }, { "epoch": 1.6040666478395933, "grad_norm": 4.124961631371808, "learning_rate": 9.363209817103455e-06, "loss": 0.7466, "step": 11360 }, { "epoch": 1.6042078508895792, "grad_norm": 3.825424559044004, "learning_rate": 9.361688465533139e-06, "loss": 0.6208, "step": 11361 }, { "epoch": 1.604349053939565, "grad_norm": 3.9542263475677992, "learning_rate": 9.360167128796913e-06, "loss": 0.5741, "step": 11362 }, { "epoch": 1.604490256989551, "grad_norm": 4.432923820622169, "learning_rate": 9.35864580693012e-06, "loss": 0.7222, "step": 11363 }, { "epoch": 1.6046314600395368, "grad_norm": 3.8668004930869726, "learning_rate": 9.357124499968124e-06, "loss": 0.5995, "step": 11364 }, { "epoch": 1.6047726630895227, "grad_norm": 3.1745374549704177, "learning_rate": 9.355603207946277e-06, "loss": 0.5082, "step": 11365 }, { "epoch": 1.6049138661395086, "grad_norm": 3.8753301613352633, "learning_rate": 9.354081930899935e-06, "loss": 0.5369, "step": 11366 }, { "epoch": 1.6050550691894945, "grad_norm": 3.4399737349204003, "learning_rate": 9.352560668864445e-06, "loss": 0.5899, "step": 11367 }, { "epoch": 1.6051962722394804, "grad_norm": 4.171357801868226, "learning_rate": 9.351039421875169e-06, "loss": 0.6639, "step": 11368 }, { "epoch": 1.6053374752894662, "grad_norm": 4.040160376176862, "learning_rate": 9.349518189967455e-06, "loss": 0.6296, "step": 11369 }, { "epoch": 1.6054786783394521, "grad_norm": 3.2155511270363504, "learning_rate": 9.347996973176661e-06, "loss": 0.6098, "step": 11370 }, { "epoch": 1.605619881389438, "grad_norm": 4.399564604852058, "learning_rate": 9.346475771538135e-06, "loss": 0.8228, "step": 11371 }, { "epoch": 1.605761084439424, "grad_norm": 3.805017827852861, "learning_rate": 9.344954585087226e-06, "loss": 0.6104, "step": 11372 }, { "epoch": 1.6059022874894098, "grad_norm": 3.499509402164501, "learning_rate": 9.343433413859291e-06, "loss": 0.5769, "step": 11373 }, { "epoch": 1.6060434905393957, "grad_norm": 3.4205755272621223, "learning_rate": 9.34191225788968e-06, "loss": 0.5757, "step": 11374 }, { "epoch": 1.6061846935893815, "grad_norm": 3.279612718323163, "learning_rate": 9.340391117213742e-06, "loss": 0.5788, "step": 11375 }, { "epoch": 1.6063258966393674, "grad_norm": 3.809012473573495, "learning_rate": 9.338869991866833e-06, "loss": 0.6492, "step": 11376 }, { "epoch": 1.6064670996893533, "grad_norm": 3.6212294217801646, "learning_rate": 9.3373488818843e-06, "loss": 0.6535, "step": 11377 }, { "epoch": 1.6066083027393392, "grad_norm": 3.1915815566669785, "learning_rate": 9.335827787301492e-06, "loss": 0.5438, "step": 11378 }, { "epoch": 1.606749505789325, "grad_norm": 4.033966809319184, "learning_rate": 9.33430670815376e-06, "loss": 0.6405, "step": 11379 }, { "epoch": 1.606890708839311, "grad_norm": 3.1291980312090106, "learning_rate": 9.332785644476452e-06, "loss": 0.4917, "step": 11380 }, { "epoch": 1.6070319118892968, "grad_norm": 3.249701665199538, "learning_rate": 9.331264596304916e-06, "loss": 0.5861, "step": 11381 }, { "epoch": 1.6071731149392827, "grad_norm": 3.447666218549116, "learning_rate": 9.329743563674505e-06, "loss": 0.583, "step": 11382 }, { "epoch": 1.6073143179892686, "grad_norm": 3.409616850094076, "learning_rate": 9.328222546620561e-06, "loss": 0.4972, "step": 11383 }, { "epoch": 1.6074555210392545, "grad_norm": 3.598417721879181, "learning_rate": 9.326701545178434e-06, "loss": 0.4803, "step": 11384 }, { "epoch": 1.6075967240892404, "grad_norm": 3.4418914789527477, "learning_rate": 9.325180559383474e-06, "loss": 0.5268, "step": 11385 }, { "epoch": 1.6077379271392263, "grad_norm": 3.69581986894758, "learning_rate": 9.323659589271028e-06, "loss": 0.5857, "step": 11386 }, { "epoch": 1.6078791301892121, "grad_norm": 2.8864170161221487, "learning_rate": 9.322138634876441e-06, "loss": 0.4341, "step": 11387 }, { "epoch": 1.608020333239198, "grad_norm": 4.247618024445971, "learning_rate": 9.320617696235058e-06, "loss": 0.6131, "step": 11388 }, { "epoch": 1.608161536289184, "grad_norm": 2.9540149238773794, "learning_rate": 9.319096773382226e-06, "loss": 0.4708, "step": 11389 }, { "epoch": 1.6083027393391696, "grad_norm": 3.049907652025075, "learning_rate": 9.317575866353293e-06, "loss": 0.4741, "step": 11390 }, { "epoch": 1.6084439423891554, "grad_norm": 3.301496593915166, "learning_rate": 9.316054975183599e-06, "loss": 0.5341, "step": 11391 }, { "epoch": 1.6085851454391413, "grad_norm": 3.480463194768759, "learning_rate": 9.314534099908492e-06, "loss": 0.5381, "step": 11392 }, { "epoch": 1.6087263484891272, "grad_norm": 3.134143740734321, "learning_rate": 9.31301324056332e-06, "loss": 0.5905, "step": 11393 }, { "epoch": 1.608867551539113, "grad_norm": 3.231908372735029, "learning_rate": 9.31149239718342e-06, "loss": 0.5409, "step": 11394 }, { "epoch": 1.609008754589099, "grad_norm": 4.84584740111127, "learning_rate": 9.309971569804142e-06, "loss": 0.6683, "step": 11395 }, { "epoch": 1.6091499576390849, "grad_norm": 4.040890264827595, "learning_rate": 9.308450758460828e-06, "loss": 0.7536, "step": 11396 }, { "epoch": 1.6092911606890707, "grad_norm": 3.754019868984418, "learning_rate": 9.306929963188818e-06, "loss": 0.6148, "step": 11397 }, { "epoch": 1.6094323637390566, "grad_norm": 3.494932812279176, "learning_rate": 9.305409184023455e-06, "loss": 0.5682, "step": 11398 }, { "epoch": 1.6095735667890425, "grad_norm": 2.9425859888938763, "learning_rate": 9.303888421000082e-06, "loss": 0.4495, "step": 11399 }, { "epoch": 1.6097147698390284, "grad_norm": 4.422906114266241, "learning_rate": 9.302367674154043e-06, "loss": 0.7421, "step": 11400 }, { "epoch": 1.6098559728890143, "grad_norm": 3.8083903789148814, "learning_rate": 9.300846943520678e-06, "loss": 0.4867, "step": 11401 }, { "epoch": 1.6099971759390002, "grad_norm": 3.2633686037987117, "learning_rate": 9.299326229135326e-06, "loss": 0.5135, "step": 11402 }, { "epoch": 1.610138378988986, "grad_norm": 3.591668166439758, "learning_rate": 9.297805531033333e-06, "loss": 0.6042, "step": 11403 }, { "epoch": 1.610279582038972, "grad_norm": 3.604416964885435, "learning_rate": 9.296284849250038e-06, "loss": 0.5568, "step": 11404 }, { "epoch": 1.6104207850889578, "grad_norm": 3.8535030340212693, "learning_rate": 9.294764183820775e-06, "loss": 0.5374, "step": 11405 }, { "epoch": 1.6105619881389437, "grad_norm": 3.2605163307514546, "learning_rate": 9.293243534780887e-06, "loss": 0.6078, "step": 11406 }, { "epoch": 1.6107031911889296, "grad_norm": 3.34578317061908, "learning_rate": 9.291722902165717e-06, "loss": 0.5268, "step": 11407 }, { "epoch": 1.6108443942389155, "grad_norm": 3.0058356451552197, "learning_rate": 9.290202286010602e-06, "loss": 0.4501, "step": 11408 }, { "epoch": 1.6109855972889013, "grad_norm": 3.247636865015121, "learning_rate": 9.288681686350876e-06, "loss": 0.4609, "step": 11409 }, { "epoch": 1.6111268003388872, "grad_norm": 3.6188566002023905, "learning_rate": 9.28716110322188e-06, "loss": 0.6016, "step": 11410 }, { "epoch": 1.611268003388873, "grad_norm": 3.7784196370461354, "learning_rate": 9.285640536658955e-06, "loss": 0.6666, "step": 11411 }, { "epoch": 1.611409206438859, "grad_norm": 3.179085127501755, "learning_rate": 9.284119986697433e-06, "loss": 0.5825, "step": 11412 }, { "epoch": 1.6115504094888449, "grad_norm": 3.433480749698441, "learning_rate": 9.282599453372658e-06, "loss": 0.5404, "step": 11413 }, { "epoch": 1.6116916125388308, "grad_norm": 3.7289997230840375, "learning_rate": 9.281078936719958e-06, "loss": 0.616, "step": 11414 }, { "epoch": 1.6118328155888166, "grad_norm": 3.549414000986242, "learning_rate": 9.279558436774672e-06, "loss": 0.5333, "step": 11415 }, { "epoch": 1.6119740186388025, "grad_norm": 3.3018646040711634, "learning_rate": 9.278037953572138e-06, "loss": 0.6019, "step": 11416 }, { "epoch": 1.6121152216887884, "grad_norm": 3.6310317938466303, "learning_rate": 9.27651748714769e-06, "loss": 0.5761, "step": 11417 }, { "epoch": 1.6122564247387743, "grad_norm": 3.0430218341922384, "learning_rate": 9.274997037536663e-06, "loss": 0.4497, "step": 11418 }, { "epoch": 1.6123976277887602, "grad_norm": 3.6105981359621397, "learning_rate": 9.273476604774392e-06, "loss": 0.5825, "step": 11419 }, { "epoch": 1.612538830838746, "grad_norm": 3.3723956007794773, "learning_rate": 9.271956188896211e-06, "loss": 0.5061, "step": 11420 }, { "epoch": 1.612680033888732, "grad_norm": 3.712957273039877, "learning_rate": 9.270435789937456e-06, "loss": 0.5988, "step": 11421 }, { "epoch": 1.6128212369387178, "grad_norm": 3.264288332216479, "learning_rate": 9.268915407933457e-06, "loss": 0.5252, "step": 11422 }, { "epoch": 1.6129624399887037, "grad_norm": 3.5506279295417396, "learning_rate": 9.267395042919546e-06, "loss": 0.5547, "step": 11423 }, { "epoch": 1.6131036430386896, "grad_norm": 3.5301058525034796, "learning_rate": 9.265874694931059e-06, "loss": 0.4538, "step": 11424 }, { "epoch": 1.6132448460886755, "grad_norm": 3.830680095046199, "learning_rate": 9.264354364003327e-06, "loss": 0.5508, "step": 11425 }, { "epoch": 1.6133860491386613, "grad_norm": 3.3410712446951423, "learning_rate": 9.262834050171683e-06, "loss": 0.5843, "step": 11426 }, { "epoch": 1.6135272521886472, "grad_norm": 2.682275796130066, "learning_rate": 9.261313753471454e-06, "loss": 0.4682, "step": 11427 }, { "epoch": 1.6136684552386331, "grad_norm": 3.799234468620082, "learning_rate": 9.259793473937977e-06, "loss": 0.619, "step": 11428 }, { "epoch": 1.613809658288619, "grad_norm": 3.8764690052946817, "learning_rate": 9.25827321160658e-06, "loss": 0.4993, "step": 11429 }, { "epoch": 1.6139508613386049, "grad_norm": 3.085789962010113, "learning_rate": 9.256752966512595e-06, "loss": 0.4504, "step": 11430 }, { "epoch": 1.6140920643885908, "grad_norm": 3.0717316672305826, "learning_rate": 9.255232738691348e-06, "loss": 0.5065, "step": 11431 }, { "epoch": 1.6142332674385766, "grad_norm": 4.056763950383977, "learning_rate": 9.253712528178169e-06, "loss": 0.7655, "step": 11432 }, { "epoch": 1.6143744704885625, "grad_norm": 4.117490789867247, "learning_rate": 9.25219233500839e-06, "loss": 0.6416, "step": 11433 }, { "epoch": 1.6145156735385484, "grad_norm": 3.9538518593169014, "learning_rate": 9.25067215921734e-06, "loss": 0.5429, "step": 11434 }, { "epoch": 1.6146568765885343, "grad_norm": 3.6452557084965314, "learning_rate": 9.249152000840341e-06, "loss": 0.5865, "step": 11435 }, { "epoch": 1.6147980796385202, "grad_norm": 3.2645830097819757, "learning_rate": 9.24763185991273e-06, "loss": 0.5082, "step": 11436 }, { "epoch": 1.614939282688506, "grad_norm": 3.8684664232524058, "learning_rate": 9.246111736469826e-06, "loss": 0.5291, "step": 11437 }, { "epoch": 1.615080485738492, "grad_norm": 4.0221868977221895, "learning_rate": 9.244591630546964e-06, "loss": 0.5926, "step": 11438 }, { "epoch": 1.6152216887884778, "grad_norm": 3.6557608759755236, "learning_rate": 9.243071542179464e-06, "loss": 0.6402, "step": 11439 }, { "epoch": 1.6153628918384637, "grad_norm": 3.10503470597323, "learning_rate": 9.241551471402654e-06, "loss": 0.4938, "step": 11440 }, { "epoch": 1.6155040948884496, "grad_norm": 3.800180310819801, "learning_rate": 9.240031418251858e-06, "loss": 0.5428, "step": 11441 }, { "epoch": 1.6156452979384355, "grad_norm": 3.663123094946641, "learning_rate": 9.238511382762408e-06, "loss": 0.572, "step": 11442 }, { "epoch": 1.6157865009884214, "grad_norm": 4.389651237357117, "learning_rate": 9.236991364969623e-06, "loss": 0.6495, "step": 11443 }, { "epoch": 1.6159277040384072, "grad_norm": 2.7317224530034707, "learning_rate": 9.235471364908826e-06, "loss": 0.4348, "step": 11444 }, { "epoch": 1.6160689070883931, "grad_norm": 3.147665021137086, "learning_rate": 9.233951382615346e-06, "loss": 0.501, "step": 11445 }, { "epoch": 1.616210110138379, "grad_norm": 3.045467137559147, "learning_rate": 9.232431418124507e-06, "loss": 0.5739, "step": 11446 }, { "epoch": 1.616351313188365, "grad_norm": 3.3426136222229363, "learning_rate": 9.230911471471632e-06, "loss": 0.5815, "step": 11447 }, { "epoch": 1.6164925162383508, "grad_norm": 3.8046112094099285, "learning_rate": 9.22939154269204e-06, "loss": 0.6096, "step": 11448 }, { "epoch": 1.6166337192883367, "grad_norm": 3.669521717658185, "learning_rate": 9.227871631821053e-06, "loss": 0.6184, "step": 11449 }, { "epoch": 1.6167749223383225, "grad_norm": 3.8456507281238417, "learning_rate": 9.226351738893999e-06, "loss": 0.5203, "step": 11450 }, { "epoch": 1.6169161253883084, "grad_norm": 3.836827050704987, "learning_rate": 9.224831863946196e-06, "loss": 0.6337, "step": 11451 }, { "epoch": 1.6170573284382943, "grad_norm": 3.9307164889203117, "learning_rate": 9.223312007012965e-06, "loss": 0.6902, "step": 11452 }, { "epoch": 1.6171985314882802, "grad_norm": 3.0580807837150896, "learning_rate": 9.221792168129626e-06, "loss": 0.5204, "step": 11453 }, { "epoch": 1.617339734538266, "grad_norm": 3.9779124386815434, "learning_rate": 9.220272347331502e-06, "loss": 0.494, "step": 11454 }, { "epoch": 1.617480937588252, "grad_norm": 3.5493775760252224, "learning_rate": 9.218752544653916e-06, "loss": 0.578, "step": 11455 }, { "epoch": 1.6176221406382378, "grad_norm": 3.2265586483290236, "learning_rate": 9.217232760132181e-06, "loss": 0.5154, "step": 11456 }, { "epoch": 1.6177633436882237, "grad_norm": 3.9466579985462764, "learning_rate": 9.215712993801617e-06, "loss": 0.6769, "step": 11457 }, { "epoch": 1.6179045467382096, "grad_norm": 4.0867450809848105, "learning_rate": 9.214193245697544e-06, "loss": 0.6085, "step": 11458 }, { "epoch": 1.6180457497881955, "grad_norm": 3.957474546915718, "learning_rate": 9.212673515855281e-06, "loss": 0.7352, "step": 11459 }, { "epoch": 1.6181869528381814, "grad_norm": 4.165411482349635, "learning_rate": 9.211153804310146e-06, "loss": 0.7516, "step": 11460 }, { "epoch": 1.6183281558881673, "grad_norm": 5.6516395621115025, "learning_rate": 9.209634111097455e-06, "loss": 0.8028, "step": 11461 }, { "epoch": 1.6184693589381531, "grad_norm": 2.974021278236827, "learning_rate": 9.208114436252528e-06, "loss": 0.4429, "step": 11462 }, { "epoch": 1.618610561988139, "grad_norm": 3.682497681930752, "learning_rate": 9.206594779810677e-06, "loss": 0.6529, "step": 11463 }, { "epoch": 1.618751765038125, "grad_norm": 3.1232952207524796, "learning_rate": 9.205075141807226e-06, "loss": 0.5425, "step": 11464 }, { "epoch": 1.6188929680881108, "grad_norm": 3.343716844711635, "learning_rate": 9.203555522277483e-06, "loss": 0.5365, "step": 11465 }, { "epoch": 1.6190341711380967, "grad_norm": 3.3630860294803266, "learning_rate": 9.20203592125676e-06, "loss": 0.4743, "step": 11466 }, { "epoch": 1.6191753741880826, "grad_norm": 3.688116287881678, "learning_rate": 9.200516338780383e-06, "loss": 0.7152, "step": 11467 }, { "epoch": 1.6193165772380684, "grad_norm": 3.4295213211326048, "learning_rate": 9.19899677488366e-06, "loss": 0.6646, "step": 11468 }, { "epoch": 1.6194577802880543, "grad_norm": 3.3350479990407624, "learning_rate": 9.197477229601906e-06, "loss": 0.4779, "step": 11469 }, { "epoch": 1.6195989833380402, "grad_norm": 3.430215564111373, "learning_rate": 9.195957702970434e-06, "loss": 0.5263, "step": 11470 }, { "epoch": 1.619740186388026, "grad_norm": 3.4262831811850236, "learning_rate": 9.194438195024557e-06, "loss": 0.4742, "step": 11471 }, { "epoch": 1.619881389438012, "grad_norm": 3.8027703088907785, "learning_rate": 9.192918705799594e-06, "loss": 0.6977, "step": 11472 }, { "epoch": 1.6200225924879978, "grad_norm": 2.9707886375376646, "learning_rate": 9.191399235330847e-06, "loss": 0.4985, "step": 11473 }, { "epoch": 1.6201637955379837, "grad_norm": 3.5345818460417284, "learning_rate": 9.189879783653633e-06, "loss": 0.5932, "step": 11474 }, { "epoch": 1.6203049985879696, "grad_norm": 4.130455434434312, "learning_rate": 9.188360350803261e-06, "loss": 0.6321, "step": 11475 }, { "epoch": 1.6204462016379555, "grad_norm": 3.5187610272191314, "learning_rate": 9.186840936815047e-06, "loss": 0.5363, "step": 11476 }, { "epoch": 1.6205874046879414, "grad_norm": 3.3976777417583066, "learning_rate": 9.185321541724296e-06, "loss": 0.5981, "step": 11477 }, { "epoch": 1.6207286077379273, "grad_norm": 3.624689606861724, "learning_rate": 9.18380216556632e-06, "loss": 0.5188, "step": 11478 }, { "epoch": 1.6208698107879131, "grad_norm": 3.999733421540868, "learning_rate": 9.182282808376433e-06, "loss": 0.5602, "step": 11479 }, { "epoch": 1.621011013837899, "grad_norm": 3.1191943046581554, "learning_rate": 9.180763470189938e-06, "loss": 0.5492, "step": 11480 }, { "epoch": 1.621152216887885, "grad_norm": 3.710152350327996, "learning_rate": 9.17924415104215e-06, "loss": 0.5354, "step": 11481 }, { "epoch": 1.6212934199378708, "grad_norm": 4.134203799647902, "learning_rate": 9.17772485096837e-06, "loss": 0.7752, "step": 11482 }, { "epoch": 1.6214346229878567, "grad_norm": 2.8361081331420857, "learning_rate": 9.176205570003907e-06, "loss": 0.4266, "step": 11483 }, { "epoch": 1.6215758260378426, "grad_norm": 3.9558270783895146, "learning_rate": 9.174686308184075e-06, "loss": 0.574, "step": 11484 }, { "epoch": 1.6217170290878284, "grad_norm": 4.542474100839712, "learning_rate": 9.173167065544174e-06, "loss": 0.647, "step": 11485 }, { "epoch": 1.6218582321378143, "grad_norm": 5.0557416143056475, "learning_rate": 9.171647842119515e-06, "loss": 0.7258, "step": 11486 }, { "epoch": 1.6219994351878002, "grad_norm": 3.711286680256037, "learning_rate": 9.170128637945399e-06, "loss": 0.5109, "step": 11487 }, { "epoch": 1.622140638237786, "grad_norm": 3.540540716650895, "learning_rate": 9.16860945305714e-06, "loss": 0.5174, "step": 11488 }, { "epoch": 1.622281841287772, "grad_norm": 2.846590798212462, "learning_rate": 9.167090287490036e-06, "loss": 0.4981, "step": 11489 }, { "epoch": 1.6224230443377579, "grad_norm": 3.914974483468677, "learning_rate": 9.165571141279397e-06, "loss": 0.5963, "step": 11490 }, { "epoch": 1.6225642473877437, "grad_norm": 5.143665342149202, "learning_rate": 9.16405201446052e-06, "loss": 0.9135, "step": 11491 }, { "epoch": 1.6227054504377294, "grad_norm": 3.8344111981329867, "learning_rate": 9.162532907068713e-06, "loss": 0.5533, "step": 11492 }, { "epoch": 1.6228466534877153, "grad_norm": 3.583887879163046, "learning_rate": 9.161013819139281e-06, "loss": 0.6299, "step": 11493 }, { "epoch": 1.6229878565377012, "grad_norm": 3.111689455447244, "learning_rate": 9.159494750707527e-06, "loss": 0.4288, "step": 11494 }, { "epoch": 1.623129059587687, "grad_norm": 3.16934714658762, "learning_rate": 9.157975701808748e-06, "loss": 0.5186, "step": 11495 }, { "epoch": 1.623270262637673, "grad_norm": 3.4067801570415814, "learning_rate": 9.156456672478252e-06, "loss": 0.5885, "step": 11496 }, { "epoch": 1.6234114656876588, "grad_norm": 2.995649718460358, "learning_rate": 9.15493766275134e-06, "loss": 0.5654, "step": 11497 }, { "epoch": 1.6235526687376447, "grad_norm": 4.166573088486783, "learning_rate": 9.153418672663313e-06, "loss": 0.6494, "step": 11498 }, { "epoch": 1.6236938717876306, "grad_norm": 4.068773996161427, "learning_rate": 9.151899702249469e-06, "loss": 0.7747, "step": 11499 }, { "epoch": 1.6238350748376165, "grad_norm": 3.8135481642863645, "learning_rate": 9.15038075154511e-06, "loss": 0.7104, "step": 11500 }, { "epoch": 1.6239762778876023, "grad_norm": 3.88478767465394, "learning_rate": 9.148861820585532e-06, "loss": 0.6726, "step": 11501 }, { "epoch": 1.6241174809375882, "grad_norm": 4.068399318013497, "learning_rate": 9.14734290940604e-06, "loss": 0.5918, "step": 11502 }, { "epoch": 1.6242586839875741, "grad_norm": 2.893845222872589, "learning_rate": 9.145824018041933e-06, "loss": 0.4831, "step": 11503 }, { "epoch": 1.62439988703756, "grad_norm": 3.3631636664365847, "learning_rate": 9.144305146528502e-06, "loss": 0.5335, "step": 11504 }, { "epoch": 1.6245410900875459, "grad_norm": 3.471024391545202, "learning_rate": 9.142786294901053e-06, "loss": 0.6186, "step": 11505 }, { "epoch": 1.6246822931375318, "grad_norm": 3.318218049031006, "learning_rate": 9.141267463194883e-06, "loss": 0.5074, "step": 11506 }, { "epoch": 1.6248234961875176, "grad_norm": 3.6813297910775575, "learning_rate": 9.139748651445282e-06, "loss": 0.5535, "step": 11507 }, { "epoch": 1.6249646992375035, "grad_norm": 3.0385854338410576, "learning_rate": 9.138229859687553e-06, "loss": 0.5154, "step": 11508 }, { "epoch": 1.6251059022874894, "grad_norm": 3.8153256753532383, "learning_rate": 9.136711087956987e-06, "loss": 0.6295, "step": 11509 }, { "epoch": 1.6252471053374753, "grad_norm": 3.6525156994543693, "learning_rate": 9.135192336288885e-06, "loss": 0.6167, "step": 11510 }, { "epoch": 1.6253883083874612, "grad_norm": 3.5299607342751114, "learning_rate": 9.133673604718539e-06, "loss": 0.5329, "step": 11511 }, { "epoch": 1.625529511437447, "grad_norm": 3.9193799244784713, "learning_rate": 9.132154893281244e-06, "loss": 0.6109, "step": 11512 }, { "epoch": 1.625670714487433, "grad_norm": 3.1626437893624346, "learning_rate": 9.130636202012295e-06, "loss": 0.5648, "step": 11513 }, { "epoch": 1.6258119175374188, "grad_norm": 3.4153261556785726, "learning_rate": 9.129117530946986e-06, "loss": 0.6096, "step": 11514 }, { "epoch": 1.6259531205874047, "grad_norm": 2.991248413613067, "learning_rate": 9.127598880120609e-06, "loss": 0.4923, "step": 11515 }, { "epoch": 1.6260943236373906, "grad_norm": 3.892538536476327, "learning_rate": 9.126080249568457e-06, "loss": 0.7174, "step": 11516 }, { "epoch": 1.6262355266873765, "grad_norm": 4.4163355885953015, "learning_rate": 9.124561639325822e-06, "loss": 0.8466, "step": 11517 }, { "epoch": 1.6263767297373624, "grad_norm": 3.4759339470906423, "learning_rate": 9.123043049427996e-06, "loss": 0.5959, "step": 11518 }, { "epoch": 1.6265179327873482, "grad_norm": 3.7615902482097234, "learning_rate": 9.12152447991027e-06, "loss": 0.5919, "step": 11519 }, { "epoch": 1.6266591358373341, "grad_norm": 3.394861259063789, "learning_rate": 9.120005930807939e-06, "loss": 0.6215, "step": 11520 }, { "epoch": 1.62680033888732, "grad_norm": 3.6331528178504437, "learning_rate": 9.118487402156287e-06, "loss": 0.6082, "step": 11521 }, { "epoch": 1.626941541937306, "grad_norm": 3.4316714509501645, "learning_rate": 9.116968893990609e-06, "loss": 0.5372, "step": 11522 }, { "epoch": 1.6270827449872918, "grad_norm": 4.217321140647538, "learning_rate": 9.115450406346193e-06, "loss": 0.6689, "step": 11523 }, { "epoch": 1.6272239480372777, "grad_norm": 3.8668776711298958, "learning_rate": 9.113931939258327e-06, "loss": 0.6212, "step": 11524 }, { "epoch": 1.6273651510872635, "grad_norm": 3.5111067330113457, "learning_rate": 9.112413492762301e-06, "loss": 0.6579, "step": 11525 }, { "epoch": 1.6275063541372492, "grad_norm": 3.2136366491056325, "learning_rate": 9.110895066893398e-06, "loss": 0.5303, "step": 11526 }, { "epoch": 1.627647557187235, "grad_norm": 3.3408156993630524, "learning_rate": 9.109376661686913e-06, "loss": 0.5375, "step": 11527 }, { "epoch": 1.627788760237221, "grad_norm": 3.90658722124255, "learning_rate": 9.10785827717813e-06, "loss": 0.6226, "step": 11528 }, { "epoch": 1.6279299632872068, "grad_norm": 3.319663934444403, "learning_rate": 9.106339913402334e-06, "loss": 0.6134, "step": 11529 }, { "epoch": 1.6280711663371927, "grad_norm": 3.5857409642918836, "learning_rate": 9.104821570394811e-06, "loss": 0.6019, "step": 11530 }, { "epoch": 1.6282123693871786, "grad_norm": 3.8296707886010366, "learning_rate": 9.103303248190855e-06, "loss": 0.5249, "step": 11531 }, { "epoch": 1.6283535724371645, "grad_norm": 3.6073856317893176, "learning_rate": 9.101784946825739e-06, "loss": 0.6535, "step": 11532 }, { "epoch": 1.6284947754871504, "grad_norm": 3.292959027186167, "learning_rate": 9.100266666334753e-06, "loss": 0.5453, "step": 11533 }, { "epoch": 1.6286359785371363, "grad_norm": 3.18509472060752, "learning_rate": 9.098748406753181e-06, "loss": 0.4927, "step": 11534 }, { "epoch": 1.6287771815871221, "grad_norm": 3.386844312718868, "learning_rate": 9.097230168116306e-06, "loss": 0.5204, "step": 11535 }, { "epoch": 1.628918384637108, "grad_norm": 3.8018359683657916, "learning_rate": 9.095711950459412e-06, "loss": 0.5145, "step": 11536 }, { "epoch": 1.629059587687094, "grad_norm": 3.4178554819877536, "learning_rate": 9.094193753817784e-06, "loss": 0.5508, "step": 11537 }, { "epoch": 1.6292007907370798, "grad_norm": 3.8400718774567584, "learning_rate": 9.0926755782267e-06, "loss": 0.5979, "step": 11538 }, { "epoch": 1.6293419937870657, "grad_norm": 4.148113618299864, "learning_rate": 9.091157423721445e-06, "loss": 0.7121, "step": 11539 }, { "epoch": 1.6294831968370516, "grad_norm": 3.866072511509366, "learning_rate": 9.0896392903373e-06, "loss": 0.5941, "step": 11540 }, { "epoch": 1.6296243998870374, "grad_norm": 3.5857225572190377, "learning_rate": 9.088121178109544e-06, "loss": 0.5914, "step": 11541 }, { "epoch": 1.6297656029370233, "grad_norm": 3.4846823477917597, "learning_rate": 9.086603087073457e-06, "loss": 0.5053, "step": 11542 }, { "epoch": 1.6299068059870092, "grad_norm": 3.374143095991624, "learning_rate": 9.085085017264322e-06, "loss": 0.5092, "step": 11543 }, { "epoch": 1.630048009036995, "grad_norm": 4.889668855369143, "learning_rate": 9.083566968717412e-06, "loss": 0.5402, "step": 11544 }, { "epoch": 1.630189212086981, "grad_norm": 3.8638760272976844, "learning_rate": 9.082048941468012e-06, "loss": 0.6618, "step": 11545 }, { "epoch": 1.6303304151369669, "grad_norm": 2.662091944305651, "learning_rate": 9.080530935551398e-06, "loss": 0.44, "step": 11546 }, { "epoch": 1.6304716181869527, "grad_norm": 3.1556897719783517, "learning_rate": 9.079012951002847e-06, "loss": 0.4738, "step": 11547 }, { "epoch": 1.6306128212369386, "grad_norm": 3.7095184243235564, "learning_rate": 9.077494987857644e-06, "loss": 0.4831, "step": 11548 }, { "epoch": 1.6307540242869245, "grad_norm": 3.747975553703809, "learning_rate": 9.07597704615105e-06, "loss": 0.5575, "step": 11549 }, { "epoch": 1.6308952273369104, "grad_norm": 3.6433056037429115, "learning_rate": 9.074459125918356e-06, "loss": 0.5153, "step": 11550 }, { "epoch": 1.6310364303868963, "grad_norm": 3.784166155695553, "learning_rate": 9.07294122719483e-06, "loss": 0.5383, "step": 11551 }, { "epoch": 1.6311776334368822, "grad_norm": 3.0263482787969034, "learning_rate": 9.071423350015747e-06, "loss": 0.4786, "step": 11552 }, { "epoch": 1.631318836486868, "grad_norm": 3.6197375099960096, "learning_rate": 9.069905494416387e-06, "loss": 0.6055, "step": 11553 }, { "epoch": 1.631460039536854, "grad_norm": 3.7938306032590714, "learning_rate": 9.068387660432023e-06, "loss": 0.6526, "step": 11554 }, { "epoch": 1.6316012425868398, "grad_norm": 2.911314886297144, "learning_rate": 9.066869848097925e-06, "loss": 0.5598, "step": 11555 }, { "epoch": 1.6317424456368257, "grad_norm": 3.3981094269155734, "learning_rate": 9.06535205744937e-06, "loss": 0.5442, "step": 11556 }, { "epoch": 1.6318836486868116, "grad_norm": 3.0223971206067364, "learning_rate": 9.063834288521632e-06, "loss": 0.4559, "step": 11557 }, { "epoch": 1.6320248517367975, "grad_norm": 3.5633770183814453, "learning_rate": 9.062316541349978e-06, "loss": 0.6565, "step": 11558 }, { "epoch": 1.6321660547867833, "grad_norm": 3.396417179006909, "learning_rate": 9.060798815969682e-06, "loss": 0.6375, "step": 11559 }, { "epoch": 1.6323072578367692, "grad_norm": 4.163513774326099, "learning_rate": 9.059281112416017e-06, "loss": 0.6467, "step": 11560 }, { "epoch": 1.632448460886755, "grad_norm": 3.406094622860866, "learning_rate": 9.057763430724252e-06, "loss": 0.6088, "step": 11561 }, { "epoch": 1.632589663936741, "grad_norm": 3.9509449518622857, "learning_rate": 9.056245770929659e-06, "loss": 0.5203, "step": 11562 }, { "epoch": 1.6327308669867269, "grad_norm": 3.397514115868934, "learning_rate": 9.054728133067505e-06, "loss": 0.5057, "step": 11563 }, { "epoch": 1.6328720700367128, "grad_norm": 3.6975593244433655, "learning_rate": 9.053210517173061e-06, "loss": 0.6212, "step": 11564 }, { "epoch": 1.6330132730866986, "grad_norm": 4.674956106627507, "learning_rate": 9.051692923281601e-06, "loss": 0.6475, "step": 11565 }, { "epoch": 1.6331544761366845, "grad_norm": 3.4501371469836437, "learning_rate": 9.050175351428381e-06, "loss": 0.531, "step": 11566 }, { "epoch": 1.6332956791866704, "grad_norm": 3.443554838270805, "learning_rate": 9.048657801648679e-06, "loss": 0.5842, "step": 11567 }, { "epoch": 1.6334368822366563, "grad_norm": 3.5089365997365007, "learning_rate": 9.04714027397776e-06, "loss": 0.5598, "step": 11568 }, { "epoch": 1.6335780852866422, "grad_norm": 3.385298515528883, "learning_rate": 9.045622768450884e-06, "loss": 0.53, "step": 11569 }, { "epoch": 1.633719288336628, "grad_norm": 3.8451569369150347, "learning_rate": 9.044105285103327e-06, "loss": 0.5548, "step": 11570 }, { "epoch": 1.633860491386614, "grad_norm": 4.186020276868135, "learning_rate": 9.04258782397035e-06, "loss": 0.7828, "step": 11571 }, { "epoch": 1.6340016944365998, "grad_norm": 3.485194153066215, "learning_rate": 9.04107038508722e-06, "loss": 0.6646, "step": 11572 }, { "epoch": 1.6341428974865857, "grad_norm": 3.779725659166283, "learning_rate": 9.039552968489196e-06, "loss": 0.596, "step": 11573 }, { "epoch": 1.6342841005365716, "grad_norm": 4.337594000633033, "learning_rate": 9.038035574211553e-06, "loss": 0.7644, "step": 11574 }, { "epoch": 1.6344253035865575, "grad_norm": 3.5664909953956654, "learning_rate": 9.036518202289542e-06, "loss": 0.5933, "step": 11575 }, { "epoch": 1.6345665066365433, "grad_norm": 3.90140336096162, "learning_rate": 9.035000852758433e-06, "loss": 0.6046, "step": 11576 }, { "epoch": 1.6347077096865292, "grad_norm": 4.037443451269912, "learning_rate": 9.033483525653488e-06, "loss": 0.5509, "step": 11577 }, { "epoch": 1.6348489127365151, "grad_norm": 3.6012277490470765, "learning_rate": 9.031966221009966e-06, "loss": 0.6167, "step": 11578 }, { "epoch": 1.634990115786501, "grad_norm": 3.5972015434588958, "learning_rate": 9.030448938863134e-06, "loss": 0.5026, "step": 11579 }, { "epoch": 1.6351313188364869, "grad_norm": 3.4807323234758, "learning_rate": 9.028931679248249e-06, "loss": 0.5067, "step": 11580 }, { "epoch": 1.6352725218864728, "grad_norm": 4.48096648096107, "learning_rate": 9.027414442200571e-06, "loss": 0.6753, "step": 11581 }, { "epoch": 1.6354137249364586, "grad_norm": 3.4129221459465886, "learning_rate": 9.025897227755367e-06, "loss": 0.4925, "step": 11582 }, { "epoch": 1.6355549279864445, "grad_norm": 3.7662343387655923, "learning_rate": 9.024380035947883e-06, "loss": 0.6248, "step": 11583 }, { "epoch": 1.6356961310364304, "grad_norm": 4.342212575471232, "learning_rate": 9.022862866813392e-06, "loss": 0.7819, "step": 11584 }, { "epoch": 1.6358373340864163, "grad_norm": 3.735964799923185, "learning_rate": 9.021345720387142e-06, "loss": 0.5661, "step": 11585 }, { "epoch": 1.6359785371364022, "grad_norm": 3.373516409355235, "learning_rate": 9.019828596704394e-06, "loss": 0.5485, "step": 11586 }, { "epoch": 1.636119740186388, "grad_norm": 4.039193743491625, "learning_rate": 9.018311495800408e-06, "loss": 0.5988, "step": 11587 }, { "epoch": 1.636260943236374, "grad_norm": 3.4935358871002813, "learning_rate": 9.016794417710439e-06, "loss": 0.56, "step": 11588 }, { "epoch": 1.6364021462863598, "grad_norm": 3.3572630398661523, "learning_rate": 9.015277362469744e-06, "loss": 0.5261, "step": 11589 }, { "epoch": 1.6365433493363457, "grad_norm": 3.646575002307307, "learning_rate": 9.013760330113575e-06, "loss": 0.5867, "step": 11590 }, { "epoch": 1.6366845523863316, "grad_norm": 3.2243823301475487, "learning_rate": 9.012243320677196e-06, "loss": 0.4536, "step": 11591 }, { "epoch": 1.6368257554363175, "grad_norm": 3.4989008214476933, "learning_rate": 9.010726334195851e-06, "loss": 0.4807, "step": 11592 }, { "epoch": 1.6369669584863034, "grad_norm": 3.278247928794583, "learning_rate": 9.009209370704799e-06, "loss": 0.5297, "step": 11593 }, { "epoch": 1.6371081615362892, "grad_norm": 3.249729718127036, "learning_rate": 9.007692430239294e-06, "loss": 0.4263, "step": 11594 }, { "epoch": 1.6372493645862751, "grad_norm": 3.3352889872241014, "learning_rate": 9.006175512834587e-06, "loss": 0.5223, "step": 11595 }, { "epoch": 1.637390567636261, "grad_norm": 3.3257139773722484, "learning_rate": 9.004658618525932e-06, "loss": 0.5041, "step": 11596 }, { "epoch": 1.637531770686247, "grad_norm": 3.8197145481940344, "learning_rate": 9.003141747348583e-06, "loss": 0.5644, "step": 11597 }, { "epoch": 1.6376729737362328, "grad_norm": 3.8281063187059647, "learning_rate": 9.001624899337785e-06, "loss": 0.6946, "step": 11598 }, { "epoch": 1.6378141767862187, "grad_norm": 4.611649663642448, "learning_rate": 9.000108074528802e-06, "loss": 0.7563, "step": 11599 }, { "epoch": 1.6379553798362045, "grad_norm": 3.2407059015567685, "learning_rate": 8.998591272956866e-06, "loss": 0.5158, "step": 11600 }, { "epoch": 1.6380965828861904, "grad_norm": 3.744898450661428, "learning_rate": 8.997074494657242e-06, "loss": 0.6524, "step": 11601 }, { "epoch": 1.6382377859361763, "grad_norm": 3.492412176349796, "learning_rate": 8.995557739665172e-06, "loss": 0.6129, "step": 11602 }, { "epoch": 1.6383789889861622, "grad_norm": 3.8879497258602815, "learning_rate": 8.994041008015906e-06, "loss": 0.6497, "step": 11603 }, { "epoch": 1.638520192036148, "grad_norm": 3.6641362988110395, "learning_rate": 8.99252429974469e-06, "loss": 0.589, "step": 11604 }, { "epoch": 1.638661395086134, "grad_norm": 2.5586819885690337, "learning_rate": 8.991007614886778e-06, "loss": 0.3787, "step": 11605 }, { "epoch": 1.6388025981361198, "grad_norm": 3.2974941042258696, "learning_rate": 8.989490953477413e-06, "loss": 0.5852, "step": 11606 }, { "epoch": 1.6389438011861057, "grad_norm": 3.7120917522293926, "learning_rate": 8.987974315551838e-06, "loss": 0.5774, "step": 11607 }, { "epoch": 1.6390850042360916, "grad_norm": 3.8831475074416586, "learning_rate": 8.98645770114531e-06, "loss": 0.5806, "step": 11608 }, { "epoch": 1.6392262072860775, "grad_norm": 3.140453945275205, "learning_rate": 8.984941110293061e-06, "loss": 0.4618, "step": 11609 }, { "epoch": 1.6393674103360634, "grad_norm": 3.567525637544053, "learning_rate": 8.983424543030344e-06, "loss": 0.6199, "step": 11610 }, { "epoch": 1.6395086133860493, "grad_norm": 3.173021321515205, "learning_rate": 8.981907999392403e-06, "loss": 0.4828, "step": 11611 }, { "epoch": 1.6396498164360351, "grad_norm": 3.507700498808961, "learning_rate": 8.980391479414478e-06, "loss": 0.5557, "step": 11612 }, { "epoch": 1.639791019486021, "grad_norm": 3.8604184832843202, "learning_rate": 8.978874983131816e-06, "loss": 0.5883, "step": 11613 }, { "epoch": 1.639932222536007, "grad_norm": 4.217393082613377, "learning_rate": 8.977358510579658e-06, "loss": 0.6407, "step": 11614 }, { "epoch": 1.6400734255859928, "grad_norm": 2.904952332249058, "learning_rate": 8.975842061793247e-06, "loss": 0.4347, "step": 11615 }, { "epoch": 1.6402146286359787, "grad_norm": 3.7249708979730327, "learning_rate": 8.974325636807826e-06, "loss": 0.5295, "step": 11616 }, { "epoch": 1.6403558316859645, "grad_norm": 3.2386950885444206, "learning_rate": 8.972809235658631e-06, "loss": 0.6062, "step": 11617 }, { "epoch": 1.6404970347359504, "grad_norm": 3.6741575389414862, "learning_rate": 8.971292858380908e-06, "loss": 0.5964, "step": 11618 }, { "epoch": 1.6406382377859363, "grad_norm": 3.028847217426916, "learning_rate": 8.969776505009894e-06, "loss": 0.5197, "step": 11619 }, { "epoch": 1.6407794408359222, "grad_norm": 3.6453250109596493, "learning_rate": 8.96826017558083e-06, "loss": 0.6152, "step": 11620 }, { "epoch": 1.640920643885908, "grad_norm": 3.75253356038471, "learning_rate": 8.96674387012895e-06, "loss": 0.5456, "step": 11621 }, { "epoch": 1.641061846935894, "grad_norm": 3.0418476777132364, "learning_rate": 8.9652275886895e-06, "loss": 0.465, "step": 11622 }, { "epoch": 1.6412030499858798, "grad_norm": 3.3677574073742202, "learning_rate": 8.963711331297713e-06, "loss": 0.5191, "step": 11623 }, { "epoch": 1.6413442530358657, "grad_norm": 4.186626254873003, "learning_rate": 8.962195097988825e-06, "loss": 0.6796, "step": 11624 }, { "epoch": 1.6414854560858516, "grad_norm": 3.905694083038733, "learning_rate": 8.960678888798082e-06, "loss": 0.5876, "step": 11625 }, { "epoch": 1.6416266591358375, "grad_norm": 3.3703755527821873, "learning_rate": 8.959162703760706e-06, "loss": 0.5089, "step": 11626 }, { "epoch": 1.6417678621858234, "grad_norm": 3.381486405865705, "learning_rate": 8.95764654291194e-06, "loss": 0.5659, "step": 11627 }, { "epoch": 1.641909065235809, "grad_norm": 3.4977429865652176, "learning_rate": 8.95613040628702e-06, "loss": 0.4606, "step": 11628 }, { "epoch": 1.642050268285795, "grad_norm": 3.368360610302634, "learning_rate": 8.954614293921175e-06, "loss": 0.4917, "step": 11629 }, { "epoch": 1.6421914713357808, "grad_norm": 3.4681809707327536, "learning_rate": 8.953098205849647e-06, "loss": 0.5574, "step": 11630 }, { "epoch": 1.6423326743857667, "grad_norm": 3.5086713162334395, "learning_rate": 8.951582142107663e-06, "loss": 0.5032, "step": 11631 }, { "epoch": 1.6424738774357526, "grad_norm": 4.321532822876115, "learning_rate": 8.950066102730456e-06, "loss": 0.5961, "step": 11632 }, { "epoch": 1.6426150804857385, "grad_norm": 3.032603171434424, "learning_rate": 8.948550087753263e-06, "loss": 0.4251, "step": 11633 }, { "epoch": 1.6427562835357243, "grad_norm": 4.386802401397069, "learning_rate": 8.947034097211309e-06, "loss": 0.6617, "step": 11634 }, { "epoch": 1.6428974865857102, "grad_norm": 3.0251192919397756, "learning_rate": 8.945518131139826e-06, "loss": 0.4673, "step": 11635 }, { "epoch": 1.643038689635696, "grad_norm": 4.079550771367353, "learning_rate": 8.944002189574047e-06, "loss": 0.6717, "step": 11636 }, { "epoch": 1.643179892685682, "grad_norm": 3.626939632349534, "learning_rate": 8.942486272549203e-06, "loss": 0.5937, "step": 11637 }, { "epoch": 1.6433210957356679, "grad_norm": 3.413849097910186, "learning_rate": 8.94097038010052e-06, "loss": 0.582, "step": 11638 }, { "epoch": 1.6434622987856538, "grad_norm": 3.4935634423786794, "learning_rate": 8.939454512263228e-06, "loss": 0.5863, "step": 11639 }, { "epoch": 1.6436035018356396, "grad_norm": 3.563872413321058, "learning_rate": 8.937938669072557e-06, "loss": 0.5334, "step": 11640 }, { "epoch": 1.6437447048856255, "grad_norm": 3.492020843098611, "learning_rate": 8.936422850563728e-06, "loss": 0.5588, "step": 11641 }, { "epoch": 1.6438859079356114, "grad_norm": 3.7166154543145944, "learning_rate": 8.93490705677198e-06, "loss": 0.5949, "step": 11642 }, { "epoch": 1.6440271109855973, "grad_norm": 2.864905491591639, "learning_rate": 8.933391287732527e-06, "loss": 0.5244, "step": 11643 }, { "epoch": 1.6441683140355832, "grad_norm": 3.5001341231649548, "learning_rate": 8.931875543480601e-06, "loss": 0.4966, "step": 11644 }, { "epoch": 1.644309517085569, "grad_norm": 3.673204516114982, "learning_rate": 8.930359824051427e-06, "loss": 0.5514, "step": 11645 }, { "epoch": 1.644450720135555, "grad_norm": 3.212162899392053, "learning_rate": 8.928844129480228e-06, "loss": 0.4351, "step": 11646 }, { "epoch": 1.6445919231855408, "grad_norm": 3.8350700444536425, "learning_rate": 8.927328459802227e-06, "loss": 0.6544, "step": 11647 }, { "epoch": 1.6447331262355267, "grad_norm": 3.390031999498588, "learning_rate": 8.92581281505265e-06, "loss": 0.4987, "step": 11648 }, { "epoch": 1.6448743292855126, "grad_norm": 3.842945748515383, "learning_rate": 8.924297195266721e-06, "loss": 0.6313, "step": 11649 }, { "epoch": 1.6450155323354985, "grad_norm": 3.5834684887877595, "learning_rate": 8.922781600479663e-06, "loss": 0.5912, "step": 11650 }, { "epoch": 1.6451567353854843, "grad_norm": 3.68302714092901, "learning_rate": 8.92126603072669e-06, "loss": 0.5515, "step": 11651 }, { "epoch": 1.6452979384354702, "grad_norm": 3.2596205954047557, "learning_rate": 8.91975048604303e-06, "loss": 0.5811, "step": 11652 }, { "epoch": 1.6454391414854561, "grad_norm": 4.130513608607756, "learning_rate": 8.918234966463902e-06, "loss": 0.6278, "step": 11653 }, { "epoch": 1.645580344535442, "grad_norm": 3.190049769827903, "learning_rate": 8.916719472024528e-06, "loss": 0.5296, "step": 11654 }, { "epoch": 1.6457215475854279, "grad_norm": 3.3935515505738016, "learning_rate": 8.915204002760123e-06, "loss": 0.5551, "step": 11655 }, { "epoch": 1.6458627506354138, "grad_norm": 5.031111589292081, "learning_rate": 8.91368855870591e-06, "loss": 0.862, "step": 11656 }, { "epoch": 1.6460039536853996, "grad_norm": 4.309704140982303, "learning_rate": 8.912173139897107e-06, "loss": 0.6718, "step": 11657 }, { "epoch": 1.6461451567353855, "grad_norm": 3.4717234683437352, "learning_rate": 8.91065774636893e-06, "loss": 0.5295, "step": 11658 }, { "epoch": 1.6462863597853714, "grad_norm": 3.73692920359234, "learning_rate": 8.909142378156596e-06, "loss": 0.6556, "step": 11659 }, { "epoch": 1.6464275628353573, "grad_norm": 3.728525017818683, "learning_rate": 8.90762703529532e-06, "loss": 0.593, "step": 11660 }, { "epoch": 1.6465687658853432, "grad_norm": 3.9774084573431465, "learning_rate": 8.906111717820322e-06, "loss": 0.5665, "step": 11661 }, { "epoch": 1.6467099689353288, "grad_norm": 4.791998849361074, "learning_rate": 8.904596425766817e-06, "loss": 0.9872, "step": 11662 }, { "epoch": 1.6468511719853147, "grad_norm": 3.125707216582831, "learning_rate": 8.903081159170016e-06, "loss": 0.5142, "step": 11663 }, { "epoch": 1.6469923750353006, "grad_norm": 3.5455043418769008, "learning_rate": 8.901565918065134e-06, "loss": 0.5645, "step": 11664 }, { "epoch": 1.6471335780852865, "grad_norm": 3.8101465924274005, "learning_rate": 8.900050702487386e-06, "loss": 0.658, "step": 11665 }, { "epoch": 1.6472747811352724, "grad_norm": 3.543847367677812, "learning_rate": 8.898535512471986e-06, "loss": 0.6415, "step": 11666 }, { "epoch": 1.6474159841852583, "grad_norm": 3.354424342347385, "learning_rate": 8.897020348054147e-06, "loss": 0.5487, "step": 11667 }, { "epoch": 1.6475571872352441, "grad_norm": 3.35012041619421, "learning_rate": 8.895505209269078e-06, "loss": 0.5568, "step": 11668 }, { "epoch": 1.64769839028523, "grad_norm": 3.593323874560674, "learning_rate": 8.893990096151986e-06, "loss": 0.6238, "step": 11669 }, { "epoch": 1.647839593335216, "grad_norm": 3.766345056822593, "learning_rate": 8.89247500873809e-06, "loss": 0.6142, "step": 11670 }, { "epoch": 1.6479807963852018, "grad_norm": 3.7747338031600552, "learning_rate": 8.890959947062598e-06, "loss": 0.572, "step": 11671 }, { "epoch": 1.6481219994351877, "grad_norm": 3.1198525118866662, "learning_rate": 8.889444911160713e-06, "loss": 0.4696, "step": 11672 }, { "epoch": 1.6482632024851736, "grad_norm": 3.595561607069267, "learning_rate": 8.887929901067652e-06, "loss": 0.5346, "step": 11673 }, { "epoch": 1.6484044055351594, "grad_norm": 3.2421217537564884, "learning_rate": 8.88641491681862e-06, "loss": 0.5262, "step": 11674 }, { "epoch": 1.6485456085851453, "grad_norm": 3.868888269725596, "learning_rate": 8.884899958448828e-06, "loss": 0.5866, "step": 11675 }, { "epoch": 1.6486868116351312, "grad_norm": 4.077322405823822, "learning_rate": 8.883385025993474e-06, "loss": 0.6784, "step": 11676 }, { "epoch": 1.648828014685117, "grad_norm": 2.962325793892677, "learning_rate": 8.881870119487772e-06, "loss": 0.4685, "step": 11677 }, { "epoch": 1.648969217735103, "grad_norm": 4.32559887484316, "learning_rate": 8.880355238966923e-06, "loss": 0.6224, "step": 11678 }, { "epoch": 1.6491104207850888, "grad_norm": 3.7800636436025745, "learning_rate": 8.878840384466137e-06, "loss": 0.5824, "step": 11679 }, { "epoch": 1.6492516238350747, "grad_norm": 3.070711491410998, "learning_rate": 8.877325556020615e-06, "loss": 0.4662, "step": 11680 }, { "epoch": 1.6493928268850606, "grad_norm": 3.6052152716855383, "learning_rate": 8.87581075366556e-06, "loss": 0.504, "step": 11681 }, { "epoch": 1.6495340299350465, "grad_norm": 4.023284854431615, "learning_rate": 8.874295977436182e-06, "loss": 0.67, "step": 11682 }, { "epoch": 1.6496752329850324, "grad_norm": 3.756213994290018, "learning_rate": 8.872781227367679e-06, "loss": 0.6613, "step": 11683 }, { "epoch": 1.6498164360350183, "grad_norm": 3.0313103004437907, "learning_rate": 8.871266503495255e-06, "loss": 0.3971, "step": 11684 }, { "epoch": 1.6499576390850041, "grad_norm": 3.678960670940138, "learning_rate": 8.869751805854107e-06, "loss": 0.5223, "step": 11685 }, { "epoch": 1.65009884213499, "grad_norm": 3.9941849530176667, "learning_rate": 8.868237134479437e-06, "loss": 0.6456, "step": 11686 }, { "epoch": 1.650240045184976, "grad_norm": 3.295370068304338, "learning_rate": 8.86672248940645e-06, "loss": 0.4848, "step": 11687 }, { "epoch": 1.6503812482349618, "grad_norm": 3.7490172750075104, "learning_rate": 8.865207870670342e-06, "loss": 0.4955, "step": 11688 }, { "epoch": 1.6505224512849477, "grad_norm": 4.505626935469949, "learning_rate": 8.863693278306314e-06, "loss": 0.6965, "step": 11689 }, { "epoch": 1.6506636543349336, "grad_norm": 4.2173405537704, "learning_rate": 8.862178712349562e-06, "loss": 0.6328, "step": 11690 }, { "epoch": 1.6508048573849194, "grad_norm": 3.878776562027165, "learning_rate": 8.860664172835285e-06, "loss": 0.632, "step": 11691 }, { "epoch": 1.6509460604349053, "grad_norm": 3.4190722635904525, "learning_rate": 8.859149659798685e-06, "loss": 0.4884, "step": 11692 }, { "epoch": 1.6510872634848912, "grad_norm": 3.6837269323365742, "learning_rate": 8.857635173274952e-06, "loss": 0.5828, "step": 11693 }, { "epoch": 1.651228466534877, "grad_norm": 4.891267917574343, "learning_rate": 8.856120713299284e-06, "loss": 0.6136, "step": 11694 }, { "epoch": 1.651369669584863, "grad_norm": 3.5567123812305823, "learning_rate": 8.854606279906874e-06, "loss": 0.6207, "step": 11695 }, { "epoch": 1.6515108726348489, "grad_norm": 3.6809829824806513, "learning_rate": 8.853091873132921e-06, "loss": 0.5735, "step": 11696 }, { "epoch": 1.6516520756848347, "grad_norm": 2.8735230315058202, "learning_rate": 8.851577493012617e-06, "loss": 0.3971, "step": 11697 }, { "epoch": 1.6517932787348206, "grad_norm": 3.3477124892043664, "learning_rate": 8.850063139581156e-06, "loss": 0.546, "step": 11698 }, { "epoch": 1.6519344817848065, "grad_norm": 3.8339880435060785, "learning_rate": 8.848548812873731e-06, "loss": 0.7709, "step": 11699 }, { "epoch": 1.6520756848347924, "grad_norm": 3.6966808059271954, "learning_rate": 8.847034512925536e-06, "loss": 0.5491, "step": 11700 }, { "epoch": 1.6522168878847783, "grad_norm": 4.173640064468473, "learning_rate": 8.845520239771763e-06, "loss": 0.541, "step": 11701 }, { "epoch": 1.6523580909347642, "grad_norm": 3.8786066277582245, "learning_rate": 8.844005993447599e-06, "loss": 0.6749, "step": 11702 }, { "epoch": 1.65249929398475, "grad_norm": 3.5816801338100057, "learning_rate": 8.842491773988234e-06, "loss": 0.4931, "step": 11703 }, { "epoch": 1.652640497034736, "grad_norm": 2.9550820136021345, "learning_rate": 8.840977581428863e-06, "loss": 0.4373, "step": 11704 }, { "epoch": 1.6527817000847218, "grad_norm": 3.9351023844619992, "learning_rate": 8.839463415804672e-06, "loss": 0.6827, "step": 11705 }, { "epoch": 1.6529229031347077, "grad_norm": 3.080026457094292, "learning_rate": 8.837949277150849e-06, "loss": 0.4974, "step": 11706 }, { "epoch": 1.6530641061846936, "grad_norm": 3.650675087012112, "learning_rate": 8.836435165502582e-06, "loss": 0.6208, "step": 11707 }, { "epoch": 1.6532053092346795, "grad_norm": 3.9578076835115925, "learning_rate": 8.83492108089506e-06, "loss": 0.6134, "step": 11708 }, { "epoch": 1.6533465122846653, "grad_norm": 3.6011046259563826, "learning_rate": 8.833407023363471e-06, "loss": 0.5831, "step": 11709 }, { "epoch": 1.6534877153346512, "grad_norm": 3.902404361282688, "learning_rate": 8.831892992943e-06, "loss": 0.6151, "step": 11710 }, { "epoch": 1.653628918384637, "grad_norm": 3.9496105402365167, "learning_rate": 8.83037898966883e-06, "loss": 0.6642, "step": 11711 }, { "epoch": 1.653770121434623, "grad_norm": 2.9187954365707043, "learning_rate": 8.828865013576143e-06, "loss": 0.5007, "step": 11712 }, { "epoch": 1.6539113244846089, "grad_norm": 3.917949187995686, "learning_rate": 8.827351064700131e-06, "loss": 0.7263, "step": 11713 }, { "epoch": 1.6540525275345948, "grad_norm": 5.595735320794007, "learning_rate": 8.825837143075973e-06, "loss": 0.8281, "step": 11714 }, { "epoch": 1.6541937305845806, "grad_norm": 3.8497917584624224, "learning_rate": 8.82432324873885e-06, "loss": 0.6201, "step": 11715 }, { "epoch": 1.6543349336345665, "grad_norm": 3.508345806215123, "learning_rate": 8.822809381723952e-06, "loss": 0.5706, "step": 11716 }, { "epoch": 1.6544761366845524, "grad_norm": 3.5817064557877147, "learning_rate": 8.821295542066452e-06, "loss": 0.5832, "step": 11717 }, { "epoch": 1.6546173397345383, "grad_norm": 3.226557789293554, "learning_rate": 8.81978172980154e-06, "loss": 0.4655, "step": 11718 }, { "epoch": 1.6547585427845242, "grad_norm": 5.145284125784873, "learning_rate": 8.818267944964387e-06, "loss": 0.7388, "step": 11719 }, { "epoch": 1.65489974583451, "grad_norm": 3.4128155067648263, "learning_rate": 8.816754187590175e-06, "loss": 0.5788, "step": 11720 }, { "epoch": 1.655040948884496, "grad_norm": 3.7915122683491105, "learning_rate": 8.815240457714086e-06, "loss": 0.5236, "step": 11721 }, { "epoch": 1.6551821519344818, "grad_norm": 3.669524406192264, "learning_rate": 8.813726755371298e-06, "loss": 0.5782, "step": 11722 }, { "epoch": 1.6553233549844677, "grad_norm": 3.1938244798423856, "learning_rate": 8.812213080596988e-06, "loss": 0.6074, "step": 11723 }, { "epoch": 1.6554645580344536, "grad_norm": 3.135169879581096, "learning_rate": 8.81069943342633e-06, "loss": 0.4707, "step": 11724 }, { "epoch": 1.6556057610844395, "grad_norm": 2.9475954738749115, "learning_rate": 8.809185813894507e-06, "loss": 0.5006, "step": 11725 }, { "epoch": 1.6557469641344253, "grad_norm": 3.396585108213299, "learning_rate": 8.807672222036692e-06, "loss": 0.535, "step": 11726 }, { "epoch": 1.6558881671844112, "grad_norm": 3.695789634824404, "learning_rate": 8.806158657888058e-06, "loss": 0.5993, "step": 11727 }, { "epoch": 1.6560293702343971, "grad_norm": 4.480429315692619, "learning_rate": 8.804645121483781e-06, "loss": 0.6998, "step": 11728 }, { "epoch": 1.656170573284383, "grad_norm": 3.976790134445273, "learning_rate": 8.803131612859034e-06, "loss": 0.5743, "step": 11729 }, { "epoch": 1.6563117763343689, "grad_norm": 3.199168761040424, "learning_rate": 8.801618132048992e-06, "loss": 0.4855, "step": 11730 }, { "epoch": 1.6564529793843548, "grad_norm": 3.847428251853948, "learning_rate": 8.80010467908883e-06, "loss": 0.6054, "step": 11731 }, { "epoch": 1.6565941824343406, "grad_norm": 4.088439338516689, "learning_rate": 8.798591254013712e-06, "loss": 0.7225, "step": 11732 }, { "epoch": 1.6567353854843265, "grad_norm": 3.9550393005834454, "learning_rate": 8.797077856858817e-06, "loss": 0.5615, "step": 11733 }, { "epoch": 1.6568765885343124, "grad_norm": 3.7456608460058947, "learning_rate": 8.795564487659313e-06, "loss": 0.7042, "step": 11734 }, { "epoch": 1.6570177915842983, "grad_norm": 3.3174618500262616, "learning_rate": 8.794051146450374e-06, "loss": 0.6028, "step": 11735 }, { "epoch": 1.6571589946342842, "grad_norm": 3.262161968653483, "learning_rate": 8.792537833267161e-06, "loss": 0.5298, "step": 11736 }, { "epoch": 1.65730019768427, "grad_norm": 3.49527466674295, "learning_rate": 8.79102454814485e-06, "loss": 0.5388, "step": 11737 }, { "epoch": 1.657441400734256, "grad_norm": 3.223092387332322, "learning_rate": 8.789511291118601e-06, "loss": 0.5795, "step": 11738 }, { "epoch": 1.6575826037842418, "grad_norm": 3.6545271326870457, "learning_rate": 8.787998062223593e-06, "loss": 0.5544, "step": 11739 }, { "epoch": 1.6577238068342277, "grad_norm": 3.3542400054261834, "learning_rate": 8.786484861494984e-06, "loss": 0.6543, "step": 11740 }, { "epoch": 1.6578650098842136, "grad_norm": 3.796930617899137, "learning_rate": 8.78497168896794e-06, "loss": 0.6668, "step": 11741 }, { "epoch": 1.6580062129341995, "grad_norm": 3.715921028684624, "learning_rate": 8.783458544677633e-06, "loss": 0.5827, "step": 11742 }, { "epoch": 1.6581474159841854, "grad_norm": 3.297186870601694, "learning_rate": 8.781945428659225e-06, "loss": 0.622, "step": 11743 }, { "epoch": 1.6582886190341712, "grad_norm": 3.017646598608677, "learning_rate": 8.780432340947879e-06, "loss": 0.504, "step": 11744 }, { "epoch": 1.6584298220841571, "grad_norm": 3.261233009803769, "learning_rate": 8.778919281578758e-06, "loss": 0.5652, "step": 11745 }, { "epoch": 1.658571025134143, "grad_norm": 3.4242993367409182, "learning_rate": 8.777406250587021e-06, "loss": 0.5675, "step": 11746 }, { "epoch": 1.658712228184129, "grad_norm": 3.5736461229099943, "learning_rate": 8.77589324800784e-06, "loss": 0.5426, "step": 11747 }, { "epoch": 1.6588534312341148, "grad_norm": 3.608884599784015, "learning_rate": 8.77438027387637e-06, "loss": 0.6978, "step": 11748 }, { "epoch": 1.6589946342841007, "grad_norm": 3.551761593462726, "learning_rate": 8.772867328227773e-06, "loss": 0.5274, "step": 11749 }, { "epoch": 1.6591358373340865, "grad_norm": 2.7374876868902667, "learning_rate": 8.771354411097207e-06, "loss": 0.4503, "step": 11750 }, { "epoch": 1.6592770403840724, "grad_norm": 3.2416659758678805, "learning_rate": 8.769841522519835e-06, "loss": 0.5158, "step": 11751 }, { "epoch": 1.6594182434340583, "grad_norm": 3.8179168465472646, "learning_rate": 8.768328662530818e-06, "loss": 0.5645, "step": 11752 }, { "epoch": 1.6595594464840442, "grad_norm": 3.2572112471660035, "learning_rate": 8.76681583116531e-06, "loss": 0.4985, "step": 11753 }, { "epoch": 1.65970064953403, "grad_norm": 2.7813644352470885, "learning_rate": 8.765303028458468e-06, "loss": 0.4021, "step": 11754 }, { "epoch": 1.659841852584016, "grad_norm": 3.285496379366319, "learning_rate": 8.763790254445448e-06, "loss": 0.5305, "step": 11755 }, { "epoch": 1.6599830556340018, "grad_norm": 4.624296195793952, "learning_rate": 8.762277509161413e-06, "loss": 0.5827, "step": 11756 }, { "epoch": 1.6601242586839877, "grad_norm": 2.9375287558686876, "learning_rate": 8.760764792641512e-06, "loss": 0.5053, "step": 11757 }, { "epoch": 1.6602654617339736, "grad_norm": 3.0139769776682126, "learning_rate": 8.7592521049209e-06, "loss": 0.4673, "step": 11758 }, { "epoch": 1.6604066647839595, "grad_norm": 3.011707378070552, "learning_rate": 8.757739446034737e-06, "loss": 0.482, "step": 11759 }, { "epoch": 1.6605478678339454, "grad_norm": 3.7547121265718975, "learning_rate": 8.756226816018172e-06, "loss": 0.6058, "step": 11760 }, { "epoch": 1.6606890708839313, "grad_norm": 3.5372775443242768, "learning_rate": 8.75471421490636e-06, "loss": 0.5591, "step": 11761 }, { "epoch": 1.6608302739339171, "grad_norm": 3.153396252381114, "learning_rate": 8.75320164273445e-06, "loss": 0.4684, "step": 11762 }, { "epoch": 1.660971476983903, "grad_norm": 3.1864200232606636, "learning_rate": 8.751689099537592e-06, "loss": 0.4585, "step": 11763 }, { "epoch": 1.6611126800338887, "grad_norm": 3.1863992697492174, "learning_rate": 8.750176585350945e-06, "loss": 0.5074, "step": 11764 }, { "epoch": 1.6612538830838746, "grad_norm": 3.258180484799217, "learning_rate": 8.748664100209652e-06, "loss": 0.4792, "step": 11765 }, { "epoch": 1.6613950861338604, "grad_norm": 3.3518429115544275, "learning_rate": 8.747151644148867e-06, "loss": 0.4493, "step": 11766 }, { "epoch": 1.6615362891838463, "grad_norm": 3.1954167115768985, "learning_rate": 8.745639217203733e-06, "loss": 0.4653, "step": 11767 }, { "epoch": 1.6616774922338322, "grad_norm": 3.888409935697885, "learning_rate": 8.744126819409405e-06, "loss": 0.6661, "step": 11768 }, { "epoch": 1.661818695283818, "grad_norm": 3.927085264786611, "learning_rate": 8.74261445080103e-06, "loss": 0.5647, "step": 11769 }, { "epoch": 1.661959898333804, "grad_norm": 4.236553920520013, "learning_rate": 8.741102111413749e-06, "loss": 0.609, "step": 11770 }, { "epoch": 1.6621011013837899, "grad_norm": 3.9622932380178275, "learning_rate": 8.73958980128271e-06, "loss": 0.6006, "step": 11771 }, { "epoch": 1.6622423044337757, "grad_norm": 3.9180595869253225, "learning_rate": 8.738077520443061e-06, "loss": 0.6383, "step": 11772 }, { "epoch": 1.6623835074837616, "grad_norm": 3.644559501290641, "learning_rate": 8.736565268929943e-06, "loss": 0.556, "step": 11773 }, { "epoch": 1.6625247105337475, "grad_norm": 3.072015072593699, "learning_rate": 8.735053046778506e-06, "loss": 0.4923, "step": 11774 }, { "epoch": 1.6626659135837334, "grad_norm": 3.3318947569572863, "learning_rate": 8.733540854023888e-06, "loss": 0.4738, "step": 11775 }, { "epoch": 1.6628071166337193, "grad_norm": 2.651235984828113, "learning_rate": 8.732028690701235e-06, "loss": 0.4197, "step": 11776 }, { "epoch": 1.6629483196837052, "grad_norm": 3.2354418894991457, "learning_rate": 8.730516556845688e-06, "loss": 0.4439, "step": 11777 }, { "epoch": 1.663089522733691, "grad_norm": 3.2226979105164872, "learning_rate": 8.729004452492388e-06, "loss": 0.5676, "step": 11778 }, { "epoch": 1.663230725783677, "grad_norm": 3.5426747653551582, "learning_rate": 8.727492377676474e-06, "loss": 0.6309, "step": 11779 }, { "epoch": 1.6633719288336628, "grad_norm": 3.7405809400779253, "learning_rate": 8.725980332433089e-06, "loss": 0.7124, "step": 11780 }, { "epoch": 1.6635131318836487, "grad_norm": 3.463851573396787, "learning_rate": 8.724468316797368e-06, "loss": 0.5822, "step": 11781 }, { "epoch": 1.6636543349336346, "grad_norm": 3.0301599087570037, "learning_rate": 8.722956330804456e-06, "loss": 0.4579, "step": 11782 }, { "epoch": 1.6637955379836205, "grad_norm": 4.149373868745879, "learning_rate": 8.721444374489485e-06, "loss": 0.6328, "step": 11783 }, { "epoch": 1.6639367410336063, "grad_norm": 3.4125901348547596, "learning_rate": 8.719932447887594e-06, "loss": 0.5859, "step": 11784 }, { "epoch": 1.6640779440835922, "grad_norm": 3.752275530820653, "learning_rate": 8.718420551033922e-06, "loss": 0.5885, "step": 11785 }, { "epoch": 1.664219147133578, "grad_norm": 3.578626486806983, "learning_rate": 8.716908683963602e-06, "loss": 0.521, "step": 11786 }, { "epoch": 1.664360350183564, "grad_norm": 2.8279108463052802, "learning_rate": 8.715396846711773e-06, "loss": 0.4255, "step": 11787 }, { "epoch": 1.6645015532335499, "grad_norm": 3.8576496499375263, "learning_rate": 8.713885039313562e-06, "loss": 0.6049, "step": 11788 }, { "epoch": 1.6646427562835358, "grad_norm": 2.6867857445439745, "learning_rate": 8.712373261804109e-06, "loss": 0.3965, "step": 11789 }, { "epoch": 1.6647839593335216, "grad_norm": 3.810580367951582, "learning_rate": 8.710861514218545e-06, "loss": 0.6289, "step": 11790 }, { "epoch": 1.6649251623835075, "grad_norm": 3.281358819549739, "learning_rate": 8.709349796592004e-06, "loss": 0.5257, "step": 11791 }, { "epoch": 1.6650663654334934, "grad_norm": 3.508721648434874, "learning_rate": 8.707838108959617e-06, "loss": 0.5488, "step": 11792 }, { "epoch": 1.6652075684834793, "grad_norm": 3.351240840136458, "learning_rate": 8.70632645135651e-06, "loss": 0.5486, "step": 11793 }, { "epoch": 1.6653487715334652, "grad_norm": 3.9447916300630856, "learning_rate": 8.704814823817822e-06, "loss": 0.5449, "step": 11794 }, { "epoch": 1.665489974583451, "grad_norm": 2.722985529121054, "learning_rate": 8.703303226378678e-06, "loss": 0.3912, "step": 11795 }, { "epoch": 1.665631177633437, "grad_norm": 3.731282588483059, "learning_rate": 8.701791659074206e-06, "loss": 0.5677, "step": 11796 }, { "epoch": 1.6657723806834228, "grad_norm": 3.13868205764393, "learning_rate": 8.700280121939535e-06, "loss": 0.445, "step": 11797 }, { "epoch": 1.6659135837334085, "grad_norm": 3.3665634805518803, "learning_rate": 8.698768615009789e-06, "loss": 0.4251, "step": 11798 }, { "epoch": 1.6660547867833944, "grad_norm": 3.4888800001658624, "learning_rate": 8.697257138320104e-06, "loss": 0.5908, "step": 11799 }, { "epoch": 1.6661959898333802, "grad_norm": 3.549288175500463, "learning_rate": 8.695745691905599e-06, "loss": 0.679, "step": 11800 }, { "epoch": 1.6663371928833661, "grad_norm": 3.8249142152586346, "learning_rate": 8.694234275801397e-06, "loss": 0.563, "step": 11801 }, { "epoch": 1.666478395933352, "grad_norm": 3.1795698417392746, "learning_rate": 8.692722890042632e-06, "loss": 0.5097, "step": 11802 }, { "epoch": 1.666619598983338, "grad_norm": 4.380338714011936, "learning_rate": 8.69121153466442e-06, "loss": 0.7053, "step": 11803 }, { "epoch": 1.6667608020333238, "grad_norm": 3.332757915953628, "learning_rate": 8.689700209701887e-06, "loss": 0.5294, "step": 11804 }, { "epoch": 1.6669020050833097, "grad_norm": 4.206320113036879, "learning_rate": 8.688188915190156e-06, "loss": 0.5536, "step": 11805 }, { "epoch": 1.6670432081332955, "grad_norm": 3.901885080466035, "learning_rate": 8.686677651164345e-06, "loss": 0.6814, "step": 11806 }, { "epoch": 1.6671844111832814, "grad_norm": 4.1663138300122435, "learning_rate": 8.685166417659581e-06, "loss": 0.5174, "step": 11807 }, { "epoch": 1.6673256142332673, "grad_norm": 3.339407189127907, "learning_rate": 8.683655214710982e-06, "loss": 0.5206, "step": 11808 }, { "epoch": 1.6674668172832532, "grad_norm": 3.29607884414573, "learning_rate": 8.682144042353666e-06, "loss": 0.589, "step": 11809 }, { "epoch": 1.667608020333239, "grad_norm": 4.072754256106878, "learning_rate": 8.680632900622752e-06, "loss": 0.6087, "step": 11810 }, { "epoch": 1.667749223383225, "grad_norm": 3.3176699801185885, "learning_rate": 8.679121789553366e-06, "loss": 0.4965, "step": 11811 }, { "epoch": 1.6678904264332108, "grad_norm": 3.534854414817696, "learning_rate": 8.677610709180612e-06, "loss": 0.5173, "step": 11812 }, { "epoch": 1.6680316294831967, "grad_norm": 3.8125347488209584, "learning_rate": 8.676099659539618e-06, "loss": 0.5781, "step": 11813 }, { "epoch": 1.6681728325331826, "grad_norm": 3.534691932673833, "learning_rate": 8.674588640665495e-06, "loss": 0.5626, "step": 11814 }, { "epoch": 1.6683140355831685, "grad_norm": 4.018937437100556, "learning_rate": 8.673077652593357e-06, "loss": 0.4826, "step": 11815 }, { "epoch": 1.6684552386331544, "grad_norm": 4.214688270018037, "learning_rate": 8.671566695358324e-06, "loss": 0.6256, "step": 11816 }, { "epoch": 1.6685964416831403, "grad_norm": 4.120202320830355, "learning_rate": 8.670055768995508e-06, "loss": 0.7755, "step": 11817 }, { "epoch": 1.6687376447331261, "grad_norm": 3.9327198720882652, "learning_rate": 8.668544873540017e-06, "loss": 0.6325, "step": 11818 }, { "epoch": 1.668878847783112, "grad_norm": 4.0888828645099915, "learning_rate": 8.667034009026972e-06, "loss": 0.5921, "step": 11819 }, { "epoch": 1.669020050833098, "grad_norm": 3.7078082540154633, "learning_rate": 8.665523175491484e-06, "loss": 0.6071, "step": 11820 }, { "epoch": 1.6691612538830838, "grad_norm": 3.180805037556508, "learning_rate": 8.664012372968658e-06, "loss": 0.478, "step": 11821 }, { "epoch": 1.6693024569330697, "grad_norm": 3.407186692631098, "learning_rate": 8.662501601493607e-06, "loss": 0.4855, "step": 11822 }, { "epoch": 1.6694436599830556, "grad_norm": 3.756222899851191, "learning_rate": 8.66099086110144e-06, "loss": 0.5248, "step": 11823 }, { "epoch": 1.6695848630330414, "grad_norm": 3.493340715982749, "learning_rate": 8.659480151827267e-06, "loss": 0.6306, "step": 11824 }, { "epoch": 1.6697260660830273, "grad_norm": 3.0816676012854054, "learning_rate": 8.657969473706197e-06, "loss": 0.4261, "step": 11825 }, { "epoch": 1.6698672691330132, "grad_norm": 3.298957463594451, "learning_rate": 8.65645882677334e-06, "loss": 0.4269, "step": 11826 }, { "epoch": 1.670008472182999, "grad_norm": 4.536664585722148, "learning_rate": 8.654948211063794e-06, "loss": 0.6175, "step": 11827 }, { "epoch": 1.670149675232985, "grad_norm": 4.091091377225782, "learning_rate": 8.65343762661268e-06, "loss": 0.6747, "step": 11828 }, { "epoch": 1.6702908782829708, "grad_norm": 3.4472066568414683, "learning_rate": 8.651927073455085e-06, "loss": 0.5497, "step": 11829 }, { "epoch": 1.6704320813329567, "grad_norm": 3.7384452692408634, "learning_rate": 8.650416551626126e-06, "loss": 0.4999, "step": 11830 }, { "epoch": 1.6705732843829426, "grad_norm": 3.323611335239815, "learning_rate": 8.648906061160903e-06, "loss": 0.5476, "step": 11831 }, { "epoch": 1.6707144874329285, "grad_norm": 2.9648162417218695, "learning_rate": 8.647395602094517e-06, "loss": 0.4715, "step": 11832 }, { "epoch": 1.6708556904829144, "grad_norm": 3.787153880016145, "learning_rate": 8.645885174462077e-06, "loss": 0.5878, "step": 11833 }, { "epoch": 1.6709968935329003, "grad_norm": 4.443030949399692, "learning_rate": 8.64437477829868e-06, "loss": 0.7999, "step": 11834 }, { "epoch": 1.6711380965828861, "grad_norm": 3.409431606087524, "learning_rate": 8.642864413639425e-06, "loss": 0.5353, "step": 11835 }, { "epoch": 1.671279299632872, "grad_norm": 3.6564535830588185, "learning_rate": 8.641354080519422e-06, "loss": 0.6192, "step": 11836 }, { "epoch": 1.671420502682858, "grad_norm": 3.664445994642492, "learning_rate": 8.639843778973756e-06, "loss": 0.5791, "step": 11837 }, { "epoch": 1.6715617057328438, "grad_norm": 4.459591095245657, "learning_rate": 8.638333509037537e-06, "loss": 0.6629, "step": 11838 }, { "epoch": 1.6717029087828297, "grad_norm": 4.068535487914683, "learning_rate": 8.636823270745858e-06, "loss": 0.615, "step": 11839 }, { "epoch": 1.6718441118328156, "grad_norm": 4.497429931957836, "learning_rate": 8.635313064133817e-06, "loss": 0.6714, "step": 11840 }, { "epoch": 1.6719853148828014, "grad_norm": 3.1216720773124242, "learning_rate": 8.633802889236509e-06, "loss": 0.4743, "step": 11841 }, { "epoch": 1.6721265179327873, "grad_norm": 3.420122440356066, "learning_rate": 8.632292746089034e-06, "loss": 0.6103, "step": 11842 }, { "epoch": 1.6722677209827732, "grad_norm": 3.88428446179128, "learning_rate": 8.630782634726487e-06, "loss": 0.5517, "step": 11843 }, { "epoch": 1.672408924032759, "grad_norm": 3.1563730771256173, "learning_rate": 8.629272555183956e-06, "loss": 0.5258, "step": 11844 }, { "epoch": 1.672550127082745, "grad_norm": 4.51886496990165, "learning_rate": 8.627762507496546e-06, "loss": 0.7154, "step": 11845 }, { "epoch": 1.6726913301327309, "grad_norm": 3.592722847511932, "learning_rate": 8.626252491699335e-06, "loss": 0.5898, "step": 11846 }, { "epoch": 1.6728325331827167, "grad_norm": 3.249355488456688, "learning_rate": 8.624742507827427e-06, "loss": 0.558, "step": 11847 }, { "epoch": 1.6729737362327026, "grad_norm": 2.9130105166090616, "learning_rate": 8.623232555915907e-06, "loss": 0.5132, "step": 11848 }, { "epoch": 1.6731149392826885, "grad_norm": 3.9198247650453655, "learning_rate": 8.621722635999868e-06, "loss": 0.6091, "step": 11849 }, { "epoch": 1.6732561423326744, "grad_norm": 3.3647444032271108, "learning_rate": 8.6202127481144e-06, "loss": 0.5635, "step": 11850 }, { "epoch": 1.6733973453826603, "grad_norm": 4.807280972052962, "learning_rate": 8.618702892294593e-06, "loss": 0.7692, "step": 11851 }, { "epoch": 1.6735385484326462, "grad_norm": 3.064648830490491, "learning_rate": 8.617193068575534e-06, "loss": 0.4539, "step": 11852 }, { "epoch": 1.673679751482632, "grad_norm": 3.30783750301966, "learning_rate": 8.615683276992313e-06, "loss": 0.4834, "step": 11853 }, { "epoch": 1.673820954532618, "grad_norm": 4.138434182904973, "learning_rate": 8.61417351758001e-06, "loss": 0.7309, "step": 11854 }, { "epoch": 1.6739621575826038, "grad_norm": 2.9961660126614236, "learning_rate": 8.61266379037372e-06, "loss": 0.4829, "step": 11855 }, { "epoch": 1.6741033606325897, "grad_norm": 3.17518453226916, "learning_rate": 8.611154095408521e-06, "loss": 0.4367, "step": 11856 }, { "epoch": 1.6742445636825756, "grad_norm": 4.067531199968323, "learning_rate": 8.609644432719504e-06, "loss": 0.6748, "step": 11857 }, { "epoch": 1.6743857667325615, "grad_norm": 3.4702533010281855, "learning_rate": 8.608134802341745e-06, "loss": 0.5683, "step": 11858 }, { "epoch": 1.6745269697825473, "grad_norm": 3.507799220589695, "learning_rate": 8.606625204310337e-06, "loss": 0.4881, "step": 11859 }, { "epoch": 1.6746681728325332, "grad_norm": 3.4217478350021837, "learning_rate": 8.605115638660356e-06, "loss": 0.5644, "step": 11860 }, { "epoch": 1.674809375882519, "grad_norm": 3.1644089494950847, "learning_rate": 8.603606105426884e-06, "loss": 0.5144, "step": 11861 }, { "epoch": 1.674950578932505, "grad_norm": 4.145861669458106, "learning_rate": 8.602096604645009e-06, "loss": 0.6752, "step": 11862 }, { "epoch": 1.6750917819824909, "grad_norm": 3.508954585457945, "learning_rate": 8.600587136349799e-06, "loss": 0.4418, "step": 11863 }, { "epoch": 1.6752329850324768, "grad_norm": 3.695775431775974, "learning_rate": 8.599077700576342e-06, "loss": 0.537, "step": 11864 }, { "epoch": 1.6753741880824626, "grad_norm": 3.6021196733255447, "learning_rate": 8.597568297359713e-06, "loss": 0.5555, "step": 11865 }, { "epoch": 1.6755153911324485, "grad_norm": 4.435646139946198, "learning_rate": 8.59605892673499e-06, "loss": 0.6973, "step": 11866 }, { "epoch": 1.6756565941824344, "grad_norm": 3.487067399872672, "learning_rate": 8.594549588737253e-06, "loss": 0.5746, "step": 11867 }, { "epoch": 1.6757977972324203, "grad_norm": 4.349398823529884, "learning_rate": 8.593040283401576e-06, "loss": 0.6564, "step": 11868 }, { "epoch": 1.6759390002824062, "grad_norm": 3.3969434218627557, "learning_rate": 8.591531010763036e-06, "loss": 0.569, "step": 11869 }, { "epoch": 1.676080203332392, "grad_norm": 4.151062540440489, "learning_rate": 8.590021770856708e-06, "loss": 0.6894, "step": 11870 }, { "epoch": 1.676221406382378, "grad_norm": 3.5364462398823813, "learning_rate": 8.588512563717664e-06, "loss": 0.5183, "step": 11871 }, { "epoch": 1.6763626094323638, "grad_norm": 3.9188700852249254, "learning_rate": 8.587003389380977e-06, "loss": 0.6232, "step": 11872 }, { "epoch": 1.6765038124823497, "grad_norm": 3.690461266187758, "learning_rate": 8.585494247881722e-06, "loss": 0.439, "step": 11873 }, { "epoch": 1.6766450155323356, "grad_norm": 3.263811869505128, "learning_rate": 8.58398513925497e-06, "loss": 0.4537, "step": 11874 }, { "epoch": 1.6767862185823215, "grad_norm": 3.464898975227575, "learning_rate": 8.58247606353579e-06, "loss": 0.5901, "step": 11875 }, { "epoch": 1.6769274216323073, "grad_norm": 3.1390323153972277, "learning_rate": 8.580967020759257e-06, "loss": 0.508, "step": 11876 }, { "epoch": 1.6770686246822932, "grad_norm": 4.798097362770125, "learning_rate": 8.579458010960435e-06, "loss": 0.756, "step": 11877 }, { "epoch": 1.6772098277322791, "grad_norm": 3.4561803754450255, "learning_rate": 8.577949034174395e-06, "loss": 0.7076, "step": 11878 }, { "epoch": 1.677351030782265, "grad_norm": 4.054219532430299, "learning_rate": 8.576440090436213e-06, "loss": 0.5889, "step": 11879 }, { "epoch": 1.6774922338322509, "grad_norm": 3.293657309526127, "learning_rate": 8.57493117978094e-06, "loss": 0.6084, "step": 11880 }, { "epoch": 1.6776334368822368, "grad_norm": 3.5517445078406236, "learning_rate": 8.573422302243653e-06, "loss": 0.5517, "step": 11881 }, { "epoch": 1.6777746399322226, "grad_norm": 3.791752462856347, "learning_rate": 8.571913457859418e-06, "loss": 0.55, "step": 11882 }, { "epoch": 1.6779158429822085, "grad_norm": 3.4349666111582375, "learning_rate": 8.570404646663295e-06, "loss": 0.5008, "step": 11883 }, { "epoch": 1.6780570460321944, "grad_norm": 3.3265798616561, "learning_rate": 8.56889586869035e-06, "loss": 0.5913, "step": 11884 }, { "epoch": 1.6781982490821803, "grad_norm": 4.518021111624617, "learning_rate": 8.567387123975648e-06, "loss": 0.5671, "step": 11885 }, { "epoch": 1.6783394521321662, "grad_norm": 3.260256711687439, "learning_rate": 8.565878412554251e-06, "loss": 0.5686, "step": 11886 }, { "epoch": 1.678480655182152, "grad_norm": 3.2415569306181413, "learning_rate": 8.564369734461222e-06, "loss": 0.5175, "step": 11887 }, { "epoch": 1.678621858232138, "grad_norm": 3.2715087448908484, "learning_rate": 8.562861089731618e-06, "loss": 0.505, "step": 11888 }, { "epoch": 1.6787630612821238, "grad_norm": 2.8598828442356816, "learning_rate": 8.561352478400501e-06, "loss": 0.4608, "step": 11889 }, { "epoch": 1.6789042643321097, "grad_norm": 3.7979466670185604, "learning_rate": 8.559843900502934e-06, "loss": 0.6318, "step": 11890 }, { "epoch": 1.6790454673820956, "grad_norm": 3.5692995331344015, "learning_rate": 8.55833535607397e-06, "loss": 0.527, "step": 11891 }, { "epoch": 1.6791866704320815, "grad_norm": 3.13632668650295, "learning_rate": 8.556826845148669e-06, "loss": 0.4456, "step": 11892 }, { "epoch": 1.6793278734820674, "grad_norm": 3.1417910369169144, "learning_rate": 8.55531836776209e-06, "loss": 0.5042, "step": 11893 }, { "epoch": 1.6794690765320532, "grad_norm": 3.3386501533137434, "learning_rate": 8.55380992394929e-06, "loss": 0.5512, "step": 11894 }, { "epoch": 1.6796102795820391, "grad_norm": 4.026068427138042, "learning_rate": 8.552301513745322e-06, "loss": 0.665, "step": 11895 }, { "epoch": 1.679751482632025, "grad_norm": 3.380219839451239, "learning_rate": 8.550793137185243e-06, "loss": 0.5191, "step": 11896 }, { "epoch": 1.6798926856820109, "grad_norm": 4.887089884202536, "learning_rate": 8.549284794304102e-06, "loss": 0.724, "step": 11897 }, { "epoch": 1.6800338887319968, "grad_norm": 3.439596434299331, "learning_rate": 8.547776485136957e-06, "loss": 0.5404, "step": 11898 }, { "epoch": 1.6801750917819827, "grad_norm": 3.0716721957899957, "learning_rate": 8.546268209718862e-06, "loss": 0.5001, "step": 11899 }, { "epoch": 1.6803162948319683, "grad_norm": 2.9402522796596027, "learning_rate": 8.544759968084863e-06, "loss": 0.4701, "step": 11900 }, { "epoch": 1.6804574978819542, "grad_norm": 4.126089825878445, "learning_rate": 8.543251760270013e-06, "loss": 0.5241, "step": 11901 }, { "epoch": 1.68059870093194, "grad_norm": 3.4580561325043577, "learning_rate": 8.541743586309366e-06, "loss": 0.5369, "step": 11902 }, { "epoch": 1.680739903981926, "grad_norm": 3.3150190948575577, "learning_rate": 8.540235446237967e-06, "loss": 0.4515, "step": 11903 }, { "epoch": 1.6808811070319118, "grad_norm": 3.2151665450479, "learning_rate": 8.53872734009087e-06, "loss": 0.5344, "step": 11904 }, { "epoch": 1.6810223100818977, "grad_norm": 3.5302943013673147, "learning_rate": 8.537219267903115e-06, "loss": 0.5848, "step": 11905 }, { "epoch": 1.6811635131318836, "grad_norm": 3.8622418323248304, "learning_rate": 8.535711229709749e-06, "loss": 0.6894, "step": 11906 }, { "epoch": 1.6813047161818695, "grad_norm": 4.7294114379445755, "learning_rate": 8.534203225545824e-06, "loss": 0.7566, "step": 11907 }, { "epoch": 1.6814459192318554, "grad_norm": 4.079624387607387, "learning_rate": 8.532695255446384e-06, "loss": 0.6253, "step": 11908 }, { "epoch": 1.6815871222818413, "grad_norm": 3.415157974730411, "learning_rate": 8.53118731944647e-06, "loss": 0.516, "step": 11909 }, { "epoch": 1.6817283253318271, "grad_norm": 3.1271981421768706, "learning_rate": 8.52967941758113e-06, "loss": 0.5921, "step": 11910 }, { "epoch": 1.681869528381813, "grad_norm": 3.3309398080967445, "learning_rate": 8.528171549885409e-06, "loss": 0.4864, "step": 11911 }, { "epoch": 1.682010731431799, "grad_norm": 3.3849159350185642, "learning_rate": 8.52666371639434e-06, "loss": 0.4973, "step": 11912 }, { "epoch": 1.6821519344817848, "grad_norm": 3.8222053962175035, "learning_rate": 8.525155917142977e-06, "loss": 0.646, "step": 11913 }, { "epoch": 1.6822931375317707, "grad_norm": 3.8354437254392786, "learning_rate": 8.523648152166349e-06, "loss": 0.5603, "step": 11914 }, { "epoch": 1.6824343405817566, "grad_norm": 4.653551489276745, "learning_rate": 8.522140421499499e-06, "loss": 0.6588, "step": 11915 }, { "epoch": 1.6825755436317424, "grad_norm": 3.6917062912717924, "learning_rate": 8.520632725177468e-06, "loss": 0.601, "step": 11916 }, { "epoch": 1.6827167466817283, "grad_norm": 3.2943006528974537, "learning_rate": 8.519125063235293e-06, "loss": 0.4501, "step": 11917 }, { "epoch": 1.6828579497317142, "grad_norm": 2.9020728347640077, "learning_rate": 8.517617435708011e-06, "loss": 0.4228, "step": 11918 }, { "epoch": 1.6829991527817, "grad_norm": 4.2663202857206, "learning_rate": 8.516109842630664e-06, "loss": 0.7258, "step": 11919 }, { "epoch": 1.683140355831686, "grad_norm": 3.7813351125697343, "learning_rate": 8.51460228403828e-06, "loss": 0.6253, "step": 11920 }, { "epoch": 1.6832815588816719, "grad_norm": 3.0451526183866946, "learning_rate": 8.513094759965904e-06, "loss": 0.467, "step": 11921 }, { "epoch": 1.6834227619316577, "grad_norm": 3.4390450369385994, "learning_rate": 8.511587270448556e-06, "loss": 0.5002, "step": 11922 }, { "epoch": 1.6835639649816436, "grad_norm": 3.8699248988901553, "learning_rate": 8.510079815521278e-06, "loss": 0.516, "step": 11923 }, { "epoch": 1.6837051680316295, "grad_norm": 3.8654364724422123, "learning_rate": 8.508572395219104e-06, "loss": 0.6801, "step": 11924 }, { "epoch": 1.6838463710816154, "grad_norm": 3.6302794554577353, "learning_rate": 8.507065009577062e-06, "loss": 0.5823, "step": 11925 }, { "epoch": 1.6839875741316013, "grad_norm": 3.436179194359734, "learning_rate": 8.505557658630186e-06, "loss": 0.5481, "step": 11926 }, { "epoch": 1.6841287771815872, "grad_norm": 4.212051964642586, "learning_rate": 8.504050342413501e-06, "loss": 0.6707, "step": 11927 }, { "epoch": 1.684269980231573, "grad_norm": 3.8337224614408294, "learning_rate": 8.502543060962043e-06, "loss": 0.5991, "step": 11928 }, { "epoch": 1.684411183281559, "grad_norm": 3.3989261823118007, "learning_rate": 8.501035814310837e-06, "loss": 0.5847, "step": 11929 }, { "epoch": 1.6845523863315448, "grad_norm": 4.498316434136469, "learning_rate": 8.499528602494914e-06, "loss": 0.6681, "step": 11930 }, { "epoch": 1.6846935893815307, "grad_norm": 3.3969688170107584, "learning_rate": 8.498021425549297e-06, "loss": 0.6057, "step": 11931 }, { "epoch": 1.6848347924315166, "grad_norm": 3.0295022580033124, "learning_rate": 8.49651428350901e-06, "loss": 0.4096, "step": 11932 }, { "epoch": 1.6849759954815025, "grad_norm": 2.562232881069511, "learning_rate": 8.495007176409084e-06, "loss": 0.4406, "step": 11933 }, { "epoch": 1.6851171985314881, "grad_norm": 3.6538137182320933, "learning_rate": 8.493500104284539e-06, "loss": 0.6067, "step": 11934 }, { "epoch": 1.685258401581474, "grad_norm": 3.143094142837082, "learning_rate": 8.491993067170402e-06, "loss": 0.5265, "step": 11935 }, { "epoch": 1.6853996046314599, "grad_norm": 3.812238440942335, "learning_rate": 8.490486065101698e-06, "loss": 0.6259, "step": 11936 }, { "epoch": 1.6855408076814458, "grad_norm": 3.696254718380954, "learning_rate": 8.488979098113443e-06, "loss": 0.5517, "step": 11937 }, { "epoch": 1.6856820107314316, "grad_norm": 4.679432889335485, "learning_rate": 8.487472166240665e-06, "loss": 0.7741, "step": 11938 }, { "epoch": 1.6858232137814175, "grad_norm": 3.554304780013752, "learning_rate": 8.485965269518376e-06, "loss": 0.5494, "step": 11939 }, { "epoch": 1.6859644168314034, "grad_norm": 3.4813361364161315, "learning_rate": 8.484458407981601e-06, "loss": 0.5278, "step": 11940 }, { "epoch": 1.6861056198813893, "grad_norm": 3.591135728061469, "learning_rate": 8.482951581665359e-06, "loss": 0.5692, "step": 11941 }, { "epoch": 1.6862468229313752, "grad_norm": 4.142239881597264, "learning_rate": 8.481444790604668e-06, "loss": 0.6604, "step": 11942 }, { "epoch": 1.686388025981361, "grad_norm": 3.83618552375508, "learning_rate": 8.479938034834544e-06, "loss": 0.6222, "step": 11943 }, { "epoch": 1.686529229031347, "grad_norm": 3.1993426566388967, "learning_rate": 8.478431314390002e-06, "loss": 0.4609, "step": 11944 }, { "epoch": 1.6866704320813328, "grad_norm": 3.4014673568017053, "learning_rate": 8.47692462930606e-06, "loss": 0.6253, "step": 11945 }, { "epoch": 1.6868116351313187, "grad_norm": 3.495760303803192, "learning_rate": 8.475417979617732e-06, "loss": 0.5142, "step": 11946 }, { "epoch": 1.6869528381813046, "grad_norm": 3.153674721075382, "learning_rate": 8.473911365360034e-06, "loss": 0.5628, "step": 11947 }, { "epoch": 1.6870940412312905, "grad_norm": 3.3615557192352616, "learning_rate": 8.472404786567974e-06, "loss": 0.6918, "step": 11948 }, { "epoch": 1.6872352442812764, "grad_norm": 3.341137733312112, "learning_rate": 8.470898243276567e-06, "loss": 0.5935, "step": 11949 }, { "epoch": 1.6873764473312622, "grad_norm": 4.166899821663311, "learning_rate": 8.469391735520824e-06, "loss": 0.6654, "step": 11950 }, { "epoch": 1.6875176503812481, "grad_norm": 2.593401378475758, "learning_rate": 8.467885263335758e-06, "loss": 0.4129, "step": 11951 }, { "epoch": 1.687658853431234, "grad_norm": 3.0162962430221807, "learning_rate": 8.466378826756373e-06, "loss": 0.4495, "step": 11952 }, { "epoch": 1.68780005648122, "grad_norm": 4.630351245004714, "learning_rate": 8.464872425817685e-06, "loss": 0.6902, "step": 11953 }, { "epoch": 1.6879412595312058, "grad_norm": 3.5297394210807904, "learning_rate": 8.463366060554698e-06, "loss": 0.5952, "step": 11954 }, { "epoch": 1.6880824625811917, "grad_norm": 4.213731713748684, "learning_rate": 8.461859731002424e-06, "loss": 0.5896, "step": 11955 }, { "epoch": 1.6882236656311775, "grad_norm": 3.646529711848193, "learning_rate": 8.460353437195864e-06, "loss": 0.564, "step": 11956 }, { "epoch": 1.6883648686811634, "grad_norm": 4.012265138496627, "learning_rate": 8.45884717917002e-06, "loss": 0.5982, "step": 11957 }, { "epoch": 1.6885060717311493, "grad_norm": 3.3979530396281064, "learning_rate": 8.457340956959905e-06, "loss": 0.5098, "step": 11958 }, { "epoch": 1.6886472747811352, "grad_norm": 3.662600791436109, "learning_rate": 8.455834770600522e-06, "loss": 0.4891, "step": 11959 }, { "epoch": 1.688788477831121, "grad_norm": 3.953349824344211, "learning_rate": 8.454328620126871e-06, "loss": 0.5781, "step": 11960 }, { "epoch": 1.688929680881107, "grad_norm": 3.6772406665479784, "learning_rate": 8.452822505573952e-06, "loss": 0.5629, "step": 11961 }, { "epoch": 1.6890708839310928, "grad_norm": 3.1185899892635205, "learning_rate": 8.451316426976773e-06, "loss": 0.4564, "step": 11962 }, { "epoch": 1.6892120869810787, "grad_norm": 3.813657972758221, "learning_rate": 8.44981038437033e-06, "loss": 0.6108, "step": 11963 }, { "epoch": 1.6893532900310646, "grad_norm": 3.2089515395563075, "learning_rate": 8.448304377789628e-06, "loss": 0.376, "step": 11964 }, { "epoch": 1.6894944930810505, "grad_norm": 3.9801835298305046, "learning_rate": 8.44679840726966e-06, "loss": 0.5776, "step": 11965 }, { "epoch": 1.6896356961310364, "grad_norm": 3.8884405820722763, "learning_rate": 8.445292472845423e-06, "loss": 0.5844, "step": 11966 }, { "epoch": 1.6897768991810223, "grad_norm": 3.1653196236020205, "learning_rate": 8.44378657455192e-06, "loss": 0.4808, "step": 11967 }, { "epoch": 1.6899181022310081, "grad_norm": 4.127114579013589, "learning_rate": 8.442280712424146e-06, "loss": 0.6901, "step": 11968 }, { "epoch": 1.690059305280994, "grad_norm": 4.033862980037114, "learning_rate": 8.440774886497091e-06, "loss": 0.666, "step": 11969 }, { "epoch": 1.69020050833098, "grad_norm": 3.8528238498850946, "learning_rate": 8.439269096805758e-06, "loss": 0.5511, "step": 11970 }, { "epoch": 1.6903417113809658, "grad_norm": 2.8284339327128256, "learning_rate": 8.437763343385139e-06, "loss": 0.4253, "step": 11971 }, { "epoch": 1.6904829144309517, "grad_norm": 3.3682359222072615, "learning_rate": 8.436257626270225e-06, "loss": 0.4953, "step": 11972 }, { "epoch": 1.6906241174809375, "grad_norm": 3.71691343224829, "learning_rate": 8.434751945496006e-06, "loss": 0.6153, "step": 11973 }, { "epoch": 1.6907653205309234, "grad_norm": 3.7289965768185294, "learning_rate": 8.433246301097477e-06, "loss": 0.6627, "step": 11974 }, { "epoch": 1.6909065235809093, "grad_norm": 3.148609326314491, "learning_rate": 8.431740693109624e-06, "loss": 0.4549, "step": 11975 }, { "epoch": 1.6910477266308952, "grad_norm": 3.302838609807822, "learning_rate": 8.430235121567444e-06, "loss": 0.5476, "step": 11976 }, { "epoch": 1.691188929680881, "grad_norm": 4.193818876843916, "learning_rate": 8.42872958650592e-06, "loss": 0.549, "step": 11977 }, { "epoch": 1.691330132730867, "grad_norm": 3.7707387315257583, "learning_rate": 8.42722408796004e-06, "loss": 0.5613, "step": 11978 }, { "epoch": 1.6914713357808528, "grad_norm": 4.104939895534065, "learning_rate": 8.425718625964796e-06, "loss": 0.5696, "step": 11979 }, { "epoch": 1.6916125388308387, "grad_norm": 4.471213485672427, "learning_rate": 8.424213200555171e-06, "loss": 0.6467, "step": 11980 }, { "epoch": 1.6917537418808246, "grad_norm": 3.5709493862125705, "learning_rate": 8.422707811766153e-06, "loss": 0.5402, "step": 11981 }, { "epoch": 1.6918949449308105, "grad_norm": 4.186321471820759, "learning_rate": 8.42120245963272e-06, "loss": 0.5831, "step": 11982 }, { "epoch": 1.6920361479807964, "grad_norm": 4.426383209009122, "learning_rate": 8.419697144189861e-06, "loss": 0.5967, "step": 11983 }, { "epoch": 1.6921773510307823, "grad_norm": 3.6132591871042257, "learning_rate": 8.418191865472559e-06, "loss": 0.4595, "step": 11984 }, { "epoch": 1.6923185540807681, "grad_norm": 3.5302187678725034, "learning_rate": 8.416686623515794e-06, "loss": 0.5307, "step": 11985 }, { "epoch": 1.692459757130754, "grad_norm": 3.543861937630865, "learning_rate": 8.415181418354548e-06, "loss": 0.5766, "step": 11986 }, { "epoch": 1.69260096018074, "grad_norm": 3.190792974932769, "learning_rate": 8.4136762500238e-06, "loss": 0.4955, "step": 11987 }, { "epoch": 1.6927421632307258, "grad_norm": 3.628818886049226, "learning_rate": 8.412171118558534e-06, "loss": 0.5936, "step": 11988 }, { "epoch": 1.6928833662807117, "grad_norm": 4.0765163571882645, "learning_rate": 8.410666023993727e-06, "loss": 0.6075, "step": 11989 }, { "epoch": 1.6930245693306976, "grad_norm": 3.7392567212805994, "learning_rate": 8.409160966364351e-06, "loss": 0.5722, "step": 11990 }, { "epoch": 1.6931657723806834, "grad_norm": 3.1492757431061995, "learning_rate": 8.40765594570539e-06, "loss": 0.4482, "step": 11991 }, { "epoch": 1.6933069754306693, "grad_norm": 3.901422606042593, "learning_rate": 8.406150962051813e-06, "loss": 0.632, "step": 11992 }, { "epoch": 1.6934481784806552, "grad_norm": 3.6485869025523656, "learning_rate": 8.404646015438602e-06, "loss": 0.5655, "step": 11993 }, { "epoch": 1.693589381530641, "grad_norm": 3.337371130768836, "learning_rate": 8.40314110590073e-06, "loss": 0.5529, "step": 11994 }, { "epoch": 1.693730584580627, "grad_norm": 3.072518133162186, "learning_rate": 8.401636233473164e-06, "loss": 0.4901, "step": 11995 }, { "epoch": 1.6938717876306129, "grad_norm": 4.066319098280157, "learning_rate": 8.400131398190887e-06, "loss": 0.6101, "step": 11996 }, { "epoch": 1.6940129906805987, "grad_norm": 3.091061795611824, "learning_rate": 8.398626600088866e-06, "loss": 0.5372, "step": 11997 }, { "epoch": 1.6941541937305846, "grad_norm": 3.408413783819742, "learning_rate": 8.397121839202069e-06, "loss": 0.6069, "step": 11998 }, { "epoch": 1.6942953967805705, "grad_norm": 3.530090811552823, "learning_rate": 8.395617115565468e-06, "loss": 0.5876, "step": 11999 }, { "epoch": 1.6944365998305564, "grad_norm": 3.1920548127487316, "learning_rate": 8.394112429214032e-06, "loss": 0.4328, "step": 12000 }, { "epoch": 1.6945778028805423, "grad_norm": 4.896839745851315, "learning_rate": 8.39260778018273e-06, "loss": 0.9414, "step": 12001 }, { "epoch": 1.6947190059305282, "grad_norm": 2.625872035510413, "learning_rate": 8.391103168506529e-06, "loss": 0.4458, "step": 12002 }, { "epoch": 1.694860208980514, "grad_norm": 3.6048671729879818, "learning_rate": 8.389598594220395e-06, "loss": 0.5273, "step": 12003 }, { "epoch": 1.6950014120305, "grad_norm": 3.2422973570379474, "learning_rate": 8.388094057359295e-06, "loss": 0.4852, "step": 12004 }, { "epoch": 1.6951426150804858, "grad_norm": 3.5559299875739123, "learning_rate": 8.386589557958192e-06, "loss": 0.6569, "step": 12005 }, { "epoch": 1.6952838181304717, "grad_norm": 3.709460825178122, "learning_rate": 8.385085096052053e-06, "loss": 0.5762, "step": 12006 }, { "epoch": 1.6954250211804576, "grad_norm": 4.413747350314546, "learning_rate": 8.383580671675839e-06, "loss": 0.6347, "step": 12007 }, { "epoch": 1.6955662242304435, "grad_norm": 3.4335817173708554, "learning_rate": 8.38207628486451e-06, "loss": 0.5083, "step": 12008 }, { "epoch": 1.6957074272804293, "grad_norm": 4.666933867619174, "learning_rate": 8.380571935653029e-06, "loss": 0.7187, "step": 12009 }, { "epoch": 1.6958486303304152, "grad_norm": 3.705435840967929, "learning_rate": 8.379067624076358e-06, "loss": 0.6462, "step": 12010 }, { "epoch": 1.695989833380401, "grad_norm": 3.1508963033032775, "learning_rate": 8.377563350169456e-06, "loss": 0.4522, "step": 12011 }, { "epoch": 1.696131036430387, "grad_norm": 3.787420179070938, "learning_rate": 8.376059113967279e-06, "loss": 0.5155, "step": 12012 }, { "epoch": 1.6962722394803729, "grad_norm": 3.1270551577841146, "learning_rate": 8.374554915504787e-06, "loss": 0.4895, "step": 12013 }, { "epoch": 1.6964134425303588, "grad_norm": 3.0872476778712374, "learning_rate": 8.373050754816942e-06, "loss": 0.3875, "step": 12014 }, { "epoch": 1.6965546455803446, "grad_norm": 4.539227383276902, "learning_rate": 8.37154663193869e-06, "loss": 0.6069, "step": 12015 }, { "epoch": 1.6966958486303305, "grad_norm": 3.523181698555804, "learning_rate": 8.370042546904992e-06, "loss": 0.48, "step": 12016 }, { "epoch": 1.6968370516803164, "grad_norm": 4.015778502127796, "learning_rate": 8.368538499750803e-06, "loss": 0.6202, "step": 12017 }, { "epoch": 1.6969782547303023, "grad_norm": 3.9183380504274234, "learning_rate": 8.36703449051107e-06, "loss": 0.5462, "step": 12018 }, { "epoch": 1.6971194577802882, "grad_norm": 3.7154730430025285, "learning_rate": 8.365530519220753e-06, "loss": 0.5657, "step": 12019 }, { "epoch": 1.697260660830274, "grad_norm": 2.8318631002108927, "learning_rate": 8.364026585914802e-06, "loss": 0.4155, "step": 12020 }, { "epoch": 1.69740186388026, "grad_norm": 3.2430120254574146, "learning_rate": 8.362522690628165e-06, "loss": 0.4545, "step": 12021 }, { "epoch": 1.6975430669302458, "grad_norm": 3.4776238402179014, "learning_rate": 8.361018833395792e-06, "loss": 0.4995, "step": 12022 }, { "epoch": 1.6976842699802317, "grad_norm": 4.08553572457225, "learning_rate": 8.35951501425264e-06, "loss": 0.5972, "step": 12023 }, { "epoch": 1.6978254730302176, "grad_norm": 3.920123467933314, "learning_rate": 8.358011233233646e-06, "loss": 0.5979, "step": 12024 }, { "epoch": 1.6979666760802035, "grad_norm": 3.8094740367571287, "learning_rate": 8.356507490373761e-06, "loss": 0.6649, "step": 12025 }, { "epoch": 1.6981078791301893, "grad_norm": 3.6749445415920676, "learning_rate": 8.355003785707932e-06, "loss": 0.4896, "step": 12026 }, { "epoch": 1.6982490821801752, "grad_norm": 3.663610078213634, "learning_rate": 8.353500119271106e-06, "loss": 0.5436, "step": 12027 }, { "epoch": 1.6983902852301611, "grad_norm": 3.515624139345645, "learning_rate": 8.351996491098227e-06, "loss": 0.6488, "step": 12028 }, { "epoch": 1.698531488280147, "grad_norm": 3.3996488399975986, "learning_rate": 8.350492901224237e-06, "loss": 0.5146, "step": 12029 }, { "epoch": 1.6986726913301329, "grad_norm": 3.402353464726417, "learning_rate": 8.348989349684077e-06, "loss": 0.4307, "step": 12030 }, { "epoch": 1.6988138943801188, "grad_norm": 4.288558881589772, "learning_rate": 8.347485836512696e-06, "loss": 0.6446, "step": 12031 }, { "epoch": 1.6989550974301046, "grad_norm": 3.716701170843746, "learning_rate": 8.345982361745029e-06, "loss": 0.4738, "step": 12032 }, { "epoch": 1.6990963004800905, "grad_norm": 4.435573069445306, "learning_rate": 8.344478925416017e-06, "loss": 0.643, "step": 12033 }, { "epoch": 1.6992375035300764, "grad_norm": 4.15979042953053, "learning_rate": 8.342975527560601e-06, "loss": 0.5794, "step": 12034 }, { "epoch": 1.6993787065800623, "grad_norm": 5.12326878406178, "learning_rate": 8.341472168213714e-06, "loss": 0.729, "step": 12035 }, { "epoch": 1.699519909630048, "grad_norm": 3.3165732673117296, "learning_rate": 8.339968847410301e-06, "loss": 0.5068, "step": 12036 }, { "epoch": 1.6996611126800338, "grad_norm": 3.8424771936465563, "learning_rate": 8.338465565185295e-06, "loss": 0.5853, "step": 12037 }, { "epoch": 1.6998023157300197, "grad_norm": 3.9273386462087925, "learning_rate": 8.33696232157363e-06, "loss": 0.5612, "step": 12038 }, { "epoch": 1.6999435187800056, "grad_norm": 3.6001428234598616, "learning_rate": 8.335459116610243e-06, "loss": 0.631, "step": 12039 }, { "epoch": 1.7000847218299915, "grad_norm": 3.749783494577981, "learning_rate": 8.33395595033007e-06, "loss": 0.6889, "step": 12040 }, { "epoch": 1.7002259248799774, "grad_norm": 3.532879681610789, "learning_rate": 8.33245282276804e-06, "loss": 0.6552, "step": 12041 }, { "epoch": 1.7003671279299633, "grad_norm": 3.635617782995768, "learning_rate": 8.330949733959084e-06, "loss": 0.6501, "step": 12042 }, { "epoch": 1.7005083309799491, "grad_norm": 3.278750401326439, "learning_rate": 8.329446683938137e-06, "loss": 0.5318, "step": 12043 }, { "epoch": 1.700649534029935, "grad_norm": 4.171329853229081, "learning_rate": 8.327943672740126e-06, "loss": 0.6559, "step": 12044 }, { "epoch": 1.700790737079921, "grad_norm": 4.093493653682459, "learning_rate": 8.326440700399985e-06, "loss": 0.6663, "step": 12045 }, { "epoch": 1.7009319401299068, "grad_norm": 3.5065347595147016, "learning_rate": 8.324937766952638e-06, "loss": 0.513, "step": 12046 }, { "epoch": 1.7010731431798927, "grad_norm": 3.6517954017906176, "learning_rate": 8.323434872433011e-06, "loss": 0.6003, "step": 12047 }, { "epoch": 1.7012143462298785, "grad_norm": 4.225272733663781, "learning_rate": 8.32193201687604e-06, "loss": 0.6019, "step": 12048 }, { "epoch": 1.7013555492798644, "grad_norm": 3.1345018115954746, "learning_rate": 8.320429200316638e-06, "loss": 0.5076, "step": 12049 }, { "epoch": 1.7014967523298503, "grad_norm": 3.2586703089023796, "learning_rate": 8.31892642278974e-06, "loss": 0.4971, "step": 12050 }, { "epoch": 1.7016379553798362, "grad_norm": 3.1505818877166183, "learning_rate": 8.317423684330263e-06, "loss": 0.5951, "step": 12051 }, { "epoch": 1.701779158429822, "grad_norm": 3.5226273104754986, "learning_rate": 8.315920984973134e-06, "loss": 0.6036, "step": 12052 }, { "epoch": 1.701920361479808, "grad_norm": 4.409133754091182, "learning_rate": 8.314418324753274e-06, "loss": 0.6299, "step": 12053 }, { "epoch": 1.7020615645297938, "grad_norm": 3.7511346204498572, "learning_rate": 8.312915703705603e-06, "loss": 0.6291, "step": 12054 }, { "epoch": 1.7022027675797797, "grad_norm": 3.1977204581923235, "learning_rate": 8.311413121865044e-06, "loss": 0.4962, "step": 12055 }, { "epoch": 1.7023439706297656, "grad_norm": 3.6153937999220846, "learning_rate": 8.309910579266514e-06, "loss": 0.5574, "step": 12056 }, { "epoch": 1.7024851736797515, "grad_norm": 3.672048776242605, "learning_rate": 8.308408075944935e-06, "loss": 0.594, "step": 12057 }, { "epoch": 1.7026263767297374, "grad_norm": 4.491399068546746, "learning_rate": 8.306905611935221e-06, "loss": 0.5801, "step": 12058 }, { "epoch": 1.7027675797797233, "grad_norm": 3.3535772981552103, "learning_rate": 8.305403187272288e-06, "loss": 0.5142, "step": 12059 }, { "epoch": 1.7029087828297091, "grad_norm": 3.666192198898361, "learning_rate": 8.303900801991052e-06, "loss": 0.5106, "step": 12060 }, { "epoch": 1.703049985879695, "grad_norm": 2.8242289968525878, "learning_rate": 8.302398456126429e-06, "loss": 0.3964, "step": 12061 }, { "epoch": 1.703191188929681, "grad_norm": 4.164194688050874, "learning_rate": 8.300896149713334e-06, "loss": 0.6733, "step": 12062 }, { "epoch": 1.7033323919796668, "grad_norm": 2.6959153575720403, "learning_rate": 8.299393882786679e-06, "loss": 0.4173, "step": 12063 }, { "epoch": 1.7034735950296527, "grad_norm": 4.0850340507385585, "learning_rate": 8.297891655381375e-06, "loss": 0.6352, "step": 12064 }, { "epoch": 1.7036147980796386, "grad_norm": 3.376835139144042, "learning_rate": 8.296389467532338e-06, "loss": 0.5652, "step": 12065 }, { "epoch": 1.7037560011296244, "grad_norm": 3.7693602300000935, "learning_rate": 8.294887319274467e-06, "loss": 0.5357, "step": 12066 }, { "epoch": 1.7038972041796103, "grad_norm": 3.376901775109604, "learning_rate": 8.29338521064268e-06, "loss": 0.5715, "step": 12067 }, { "epoch": 1.7040384072295962, "grad_norm": 3.518801575256679, "learning_rate": 8.291883141671888e-06, "loss": 0.5103, "step": 12068 }, { "epoch": 1.704179610279582, "grad_norm": 3.2649482779591725, "learning_rate": 8.290381112396989e-06, "loss": 0.4803, "step": 12069 }, { "epoch": 1.704320813329568, "grad_norm": 3.7257827447448, "learning_rate": 8.288879122852897e-06, "loss": 0.6265, "step": 12070 }, { "epoch": 1.7044620163795536, "grad_norm": 3.8864217531436775, "learning_rate": 8.287377173074515e-06, "loss": 0.643, "step": 12071 }, { "epoch": 1.7046032194295395, "grad_norm": 2.6843198184670163, "learning_rate": 8.285875263096746e-06, "loss": 0.4418, "step": 12072 }, { "epoch": 1.7047444224795254, "grad_norm": 3.3659497245912977, "learning_rate": 8.284373392954496e-06, "loss": 0.5174, "step": 12073 }, { "epoch": 1.7048856255295113, "grad_norm": 4.965912136104793, "learning_rate": 8.282871562682673e-06, "loss": 0.6999, "step": 12074 }, { "epoch": 1.7050268285794972, "grad_norm": 3.016330326272305, "learning_rate": 8.28136977231617e-06, "loss": 0.4263, "step": 12075 }, { "epoch": 1.705168031629483, "grad_norm": 3.532192561985221, "learning_rate": 8.27986802188989e-06, "loss": 0.6173, "step": 12076 }, { "epoch": 1.705309234679469, "grad_norm": 4.4643229228843175, "learning_rate": 8.278366311438735e-06, "loss": 0.6838, "step": 12077 }, { "epoch": 1.7054504377294548, "grad_norm": 4.213501917480052, "learning_rate": 8.276864640997602e-06, "loss": 0.499, "step": 12078 }, { "epoch": 1.7055916407794407, "grad_norm": 3.1710108834049353, "learning_rate": 8.275363010601392e-06, "loss": 0.5628, "step": 12079 }, { "epoch": 1.7057328438294266, "grad_norm": 4.700198323306438, "learning_rate": 8.273861420285e-06, "loss": 0.6558, "step": 12080 }, { "epoch": 1.7058740468794125, "grad_norm": 3.608002308278103, "learning_rate": 8.272359870083321e-06, "loss": 0.5464, "step": 12081 }, { "epoch": 1.7060152499293983, "grad_norm": 3.458487830224624, "learning_rate": 8.27085836003126e-06, "loss": 0.5832, "step": 12082 }, { "epoch": 1.7061564529793842, "grad_norm": 4.19363310732357, "learning_rate": 8.269356890163698e-06, "loss": 0.5265, "step": 12083 }, { "epoch": 1.7062976560293701, "grad_norm": 3.827286089545369, "learning_rate": 8.267855460515536e-06, "loss": 0.6019, "step": 12084 }, { "epoch": 1.706438859079356, "grad_norm": 3.521000897084492, "learning_rate": 8.266354071121665e-06, "loss": 0.5756, "step": 12085 }, { "epoch": 1.7065800621293419, "grad_norm": 3.7811564502788824, "learning_rate": 8.264852722016974e-06, "loss": 0.6296, "step": 12086 }, { "epoch": 1.7067212651793278, "grad_norm": 3.3938452950977003, "learning_rate": 8.263351413236359e-06, "loss": 0.6497, "step": 12087 }, { "epoch": 1.7068624682293136, "grad_norm": 3.6560818787316047, "learning_rate": 8.261850144814707e-06, "loss": 0.5346, "step": 12088 }, { "epoch": 1.7070036712792995, "grad_norm": 3.677929389546445, "learning_rate": 8.260348916786907e-06, "loss": 0.5612, "step": 12089 }, { "epoch": 1.7071448743292854, "grad_norm": 4.051830010039964, "learning_rate": 8.258847729187845e-06, "loss": 0.6617, "step": 12090 }, { "epoch": 1.7072860773792713, "grad_norm": 3.432787356197747, "learning_rate": 8.257346582052414e-06, "loss": 0.5598, "step": 12091 }, { "epoch": 1.7074272804292572, "grad_norm": 3.4563040551317394, "learning_rate": 8.255845475415493e-06, "loss": 0.5517, "step": 12092 }, { "epoch": 1.707568483479243, "grad_norm": 3.5320357396688524, "learning_rate": 8.254344409311972e-06, "loss": 0.5488, "step": 12093 }, { "epoch": 1.707709686529229, "grad_norm": 3.147980133731133, "learning_rate": 8.252843383776731e-06, "loss": 0.541, "step": 12094 }, { "epoch": 1.7078508895792148, "grad_norm": 2.7469826274699467, "learning_rate": 8.251342398844654e-06, "loss": 0.4586, "step": 12095 }, { "epoch": 1.7079920926292007, "grad_norm": 3.0709922517883053, "learning_rate": 8.249841454550626e-06, "loss": 0.4501, "step": 12096 }, { "epoch": 1.7081332956791866, "grad_norm": 3.872673651572887, "learning_rate": 8.248340550929527e-06, "loss": 0.5906, "step": 12097 }, { "epoch": 1.7082744987291725, "grad_norm": 3.9838360671146806, "learning_rate": 8.246839688016235e-06, "loss": 0.7099, "step": 12098 }, { "epoch": 1.7084157017791584, "grad_norm": 4.2997128062141075, "learning_rate": 8.245338865845638e-06, "loss": 0.7455, "step": 12099 }, { "epoch": 1.7085569048291442, "grad_norm": 3.426254077382975, "learning_rate": 8.243838084452603e-06, "loss": 0.4575, "step": 12100 }, { "epoch": 1.7086981078791301, "grad_norm": 3.7480365668054887, "learning_rate": 8.242337343872012e-06, "loss": 0.556, "step": 12101 }, { "epoch": 1.708839310929116, "grad_norm": 3.839889287703012, "learning_rate": 8.240836644138743e-06, "loss": 0.6454, "step": 12102 }, { "epoch": 1.708980513979102, "grad_norm": 3.504475924167166, "learning_rate": 8.239335985287669e-06, "loss": 0.5728, "step": 12103 }, { "epoch": 1.7091217170290878, "grad_norm": 3.8067273471746277, "learning_rate": 8.237835367353668e-06, "loss": 0.5528, "step": 12104 }, { "epoch": 1.7092629200790737, "grad_norm": 3.960033390955305, "learning_rate": 8.236334790371612e-06, "loss": 0.7281, "step": 12105 }, { "epoch": 1.7094041231290595, "grad_norm": 4.221282115117411, "learning_rate": 8.234834254376375e-06, "loss": 0.586, "step": 12106 }, { "epoch": 1.7095453261790454, "grad_norm": 3.534784188061318, "learning_rate": 8.233333759402823e-06, "loss": 0.5233, "step": 12107 }, { "epoch": 1.7096865292290313, "grad_norm": 3.7869981817278062, "learning_rate": 8.231833305485841e-06, "loss": 0.6853, "step": 12108 }, { "epoch": 1.7098277322790172, "grad_norm": 3.2844084763260684, "learning_rate": 8.230332892660282e-06, "loss": 0.6304, "step": 12109 }, { "epoch": 1.709968935329003, "grad_norm": 5.002310325414373, "learning_rate": 8.228832520961023e-06, "loss": 0.6713, "step": 12110 }, { "epoch": 1.710110138378989, "grad_norm": 4.713410233599082, "learning_rate": 8.227332190422931e-06, "loss": 0.6872, "step": 12111 }, { "epoch": 1.7102513414289748, "grad_norm": 3.286573245850408, "learning_rate": 8.225831901080874e-06, "loss": 0.5107, "step": 12112 }, { "epoch": 1.7103925444789607, "grad_norm": 4.595493875791277, "learning_rate": 8.224331652969717e-06, "loss": 0.6759, "step": 12113 }, { "epoch": 1.7105337475289466, "grad_norm": 2.781788048847269, "learning_rate": 8.222831446124327e-06, "loss": 0.4307, "step": 12114 }, { "epoch": 1.7106749505789325, "grad_norm": 3.8196337329699155, "learning_rate": 8.221331280579564e-06, "loss": 0.5458, "step": 12115 }, { "epoch": 1.7108161536289184, "grad_norm": 3.1943440336277584, "learning_rate": 8.2198311563703e-06, "loss": 0.4822, "step": 12116 }, { "epoch": 1.7109573566789043, "grad_norm": 3.546531341180148, "learning_rate": 8.218331073531385e-06, "loss": 0.5193, "step": 12117 }, { "epoch": 1.7110985597288901, "grad_norm": 3.9259198236937567, "learning_rate": 8.216831032097689e-06, "loss": 0.7017, "step": 12118 }, { "epoch": 1.711239762778876, "grad_norm": 3.4020336498043635, "learning_rate": 8.215331032104069e-06, "loss": 0.5081, "step": 12119 }, { "epoch": 1.711380965828862, "grad_norm": 2.9410025017694172, "learning_rate": 8.213831073585385e-06, "loss": 0.462, "step": 12120 }, { "epoch": 1.7115221688788478, "grad_norm": 2.9289316138982557, "learning_rate": 8.212331156576494e-06, "loss": 0.4996, "step": 12121 }, { "epoch": 1.7116633719288337, "grad_norm": 3.9403214989180726, "learning_rate": 8.210831281112257e-06, "loss": 0.5537, "step": 12122 }, { "epoch": 1.7118045749788195, "grad_norm": 3.0106434552851646, "learning_rate": 8.209331447227527e-06, "loss": 0.6173, "step": 12123 }, { "epoch": 1.7119457780288054, "grad_norm": 3.6693322858761617, "learning_rate": 8.207831654957162e-06, "loss": 0.5414, "step": 12124 }, { "epoch": 1.7120869810787913, "grad_norm": 3.2299484171198496, "learning_rate": 8.206331904336018e-06, "loss": 0.5561, "step": 12125 }, { "epoch": 1.7122281841287772, "grad_norm": 3.8857129556178727, "learning_rate": 8.204832195398941e-06, "loss": 0.6388, "step": 12126 }, { "epoch": 1.712369387178763, "grad_norm": 3.806817808003596, "learning_rate": 8.20333252818079e-06, "loss": 0.5558, "step": 12127 }, { "epoch": 1.712510590228749, "grad_norm": 3.7137945755281407, "learning_rate": 8.201832902716416e-06, "loss": 0.4898, "step": 12128 }, { "epoch": 1.7126517932787348, "grad_norm": 4.109023340479056, "learning_rate": 8.200333319040667e-06, "loss": 0.5986, "step": 12129 }, { "epoch": 1.7127929963287207, "grad_norm": 4.054893392640815, "learning_rate": 8.198833777188396e-06, "loss": 0.6688, "step": 12130 }, { "epoch": 1.7129341993787066, "grad_norm": 3.9183425907148917, "learning_rate": 8.19733427719445e-06, "loss": 0.5966, "step": 12131 }, { "epoch": 1.7130754024286925, "grad_norm": 2.996937512998745, "learning_rate": 8.195834819093677e-06, "loss": 0.504, "step": 12132 }, { "epoch": 1.7132166054786784, "grad_norm": 3.526809740074254, "learning_rate": 8.194335402920926e-06, "loss": 0.552, "step": 12133 }, { "epoch": 1.7133578085286643, "grad_norm": 3.2563473316884326, "learning_rate": 8.192836028711036e-06, "loss": 0.4956, "step": 12134 }, { "epoch": 1.7134990115786501, "grad_norm": 3.801827567299866, "learning_rate": 8.19133669649886e-06, "loss": 0.555, "step": 12135 }, { "epoch": 1.713640214628636, "grad_norm": 3.635986046368952, "learning_rate": 8.189837406319233e-06, "loss": 0.4912, "step": 12136 }, { "epoch": 1.713781417678622, "grad_norm": 3.3138475747002674, "learning_rate": 8.18833815820701e-06, "loss": 0.5174, "step": 12137 }, { "epoch": 1.7139226207286078, "grad_norm": 4.249289845465241, "learning_rate": 8.186838952197019e-06, "loss": 0.5674, "step": 12138 }, { "epoch": 1.7140638237785937, "grad_norm": 3.282852163807536, "learning_rate": 8.18533978832411e-06, "loss": 0.4974, "step": 12139 }, { "epoch": 1.7142050268285796, "grad_norm": 3.870769621176407, "learning_rate": 8.183840666623123e-06, "loss": 0.6881, "step": 12140 }, { "epoch": 1.7143462298785654, "grad_norm": 3.750560031335267, "learning_rate": 8.18234158712889e-06, "loss": 0.5684, "step": 12141 }, { "epoch": 1.7144874329285513, "grad_norm": 4.053014601195905, "learning_rate": 8.18084254987626e-06, "loss": 0.5868, "step": 12142 }, { "epoch": 1.7146286359785372, "grad_norm": 3.551553934584082, "learning_rate": 8.179343554900058e-06, "loss": 0.5523, "step": 12143 }, { "epoch": 1.714769839028523, "grad_norm": 4.491036406895923, "learning_rate": 8.177844602235128e-06, "loss": 0.6575, "step": 12144 }, { "epoch": 1.714911042078509, "grad_norm": 3.034906255915081, "learning_rate": 8.176345691916301e-06, "loss": 0.5007, "step": 12145 }, { "epoch": 1.7150522451284949, "grad_norm": 3.936583464549599, "learning_rate": 8.174846823978412e-06, "loss": 0.6016, "step": 12146 }, { "epoch": 1.7151934481784807, "grad_norm": 3.5754729594935517, "learning_rate": 8.173347998456297e-06, "loss": 0.5849, "step": 12147 }, { "epoch": 1.7153346512284666, "grad_norm": 4.149353547153928, "learning_rate": 8.171849215384786e-06, "loss": 0.6609, "step": 12148 }, { "epoch": 1.7154758542784525, "grad_norm": 3.816472918127026, "learning_rate": 8.170350474798707e-06, "loss": 0.649, "step": 12149 }, { "epoch": 1.7156170573284384, "grad_norm": 3.4280514357370793, "learning_rate": 8.168851776732897e-06, "loss": 0.5663, "step": 12150 }, { "epoch": 1.7157582603784243, "grad_norm": 3.6173490166521423, "learning_rate": 8.167353121222179e-06, "loss": 0.5239, "step": 12151 }, { "epoch": 1.7158994634284102, "grad_norm": 4.253521523913439, "learning_rate": 8.16585450830138e-06, "loss": 0.5368, "step": 12152 }, { "epoch": 1.716040666478396, "grad_norm": 4.244863967749043, "learning_rate": 8.164355938005332e-06, "loss": 0.5973, "step": 12153 }, { "epoch": 1.716181869528382, "grad_norm": 3.8748081115052817, "learning_rate": 8.162857410368859e-06, "loss": 0.6155, "step": 12154 }, { "epoch": 1.7163230725783678, "grad_norm": 3.8257371082469103, "learning_rate": 8.161358925426786e-06, "loss": 0.5558, "step": 12155 }, { "epoch": 1.7164642756283537, "grad_norm": 3.6584318333380854, "learning_rate": 8.159860483213938e-06, "loss": 0.5269, "step": 12156 }, { "epoch": 1.7166054786783396, "grad_norm": 3.092629331673926, "learning_rate": 8.158362083765139e-06, "loss": 0.5034, "step": 12157 }, { "epoch": 1.7167466817283255, "grad_norm": 3.6774075867857627, "learning_rate": 8.15686372711521e-06, "loss": 0.6203, "step": 12158 }, { "epoch": 1.7168878847783113, "grad_norm": 4.227954583628856, "learning_rate": 8.155365413298972e-06, "loss": 0.484, "step": 12159 }, { "epoch": 1.7170290878282972, "grad_norm": 3.434216604324574, "learning_rate": 8.153867142351242e-06, "loss": 0.5147, "step": 12160 }, { "epoch": 1.717170290878283, "grad_norm": 4.067747650666089, "learning_rate": 8.152368914306846e-06, "loss": 0.5967, "step": 12161 }, { "epoch": 1.717311493928269, "grad_norm": 3.074072588046501, "learning_rate": 8.150870729200595e-06, "loss": 0.5234, "step": 12162 }, { "epoch": 1.7174526969782549, "grad_norm": 3.800746424640453, "learning_rate": 8.149372587067313e-06, "loss": 0.5936, "step": 12163 }, { "epoch": 1.7175939000282407, "grad_norm": 3.844190459457351, "learning_rate": 8.147874487941809e-06, "loss": 0.6711, "step": 12164 }, { "epoch": 1.7177351030782266, "grad_norm": 3.9560662085909204, "learning_rate": 8.146376431858904e-06, "loss": 0.5657, "step": 12165 }, { "epoch": 1.7178763061282125, "grad_norm": 3.0361422510309146, "learning_rate": 8.144878418853412e-06, "loss": 0.4404, "step": 12166 }, { "epoch": 1.7180175091781984, "grad_norm": 3.611330695746184, "learning_rate": 8.143380448960145e-06, "loss": 0.5732, "step": 12167 }, { "epoch": 1.7181587122281843, "grad_norm": 4.423159504551577, "learning_rate": 8.141882522213913e-06, "loss": 0.661, "step": 12168 }, { "epoch": 1.7182999152781702, "grad_norm": 4.237863380851219, "learning_rate": 8.140384638649526e-06, "loss": 0.628, "step": 12169 }, { "epoch": 1.718441118328156, "grad_norm": 3.407136614865714, "learning_rate": 8.1388867983018e-06, "loss": 0.509, "step": 12170 }, { "epoch": 1.718582321378142, "grad_norm": 2.6554550088819107, "learning_rate": 8.13738900120554e-06, "loss": 0.3896, "step": 12171 }, { "epoch": 1.7187235244281278, "grad_norm": 3.6975887047241605, "learning_rate": 8.135891247395554e-06, "loss": 0.5816, "step": 12172 }, { "epoch": 1.7188647274781135, "grad_norm": 4.986008878103458, "learning_rate": 8.13439353690665e-06, "loss": 0.6702, "step": 12173 }, { "epoch": 1.7190059305280994, "grad_norm": 2.885638809655849, "learning_rate": 8.132895869773638e-06, "loss": 0.4377, "step": 12174 }, { "epoch": 1.7191471335780852, "grad_norm": 3.6411908937846045, "learning_rate": 8.13139824603132e-06, "loss": 0.5092, "step": 12175 }, { "epoch": 1.7192883366280711, "grad_norm": 3.3225901546561256, "learning_rate": 8.129900665714498e-06, "loss": 0.5362, "step": 12176 }, { "epoch": 1.719429539678057, "grad_norm": 3.870434208267787, "learning_rate": 8.128403128857975e-06, "loss": 0.6169, "step": 12177 }, { "epoch": 1.719570742728043, "grad_norm": 3.10875241153497, "learning_rate": 8.126905635496557e-06, "loss": 0.4896, "step": 12178 }, { "epoch": 1.7197119457780288, "grad_norm": 3.891532963967446, "learning_rate": 8.125408185665042e-06, "loss": 0.5825, "step": 12179 }, { "epoch": 1.7198531488280147, "grad_norm": 4.121449673029451, "learning_rate": 8.123910779398233e-06, "loss": 0.6444, "step": 12180 }, { "epoch": 1.7199943518780005, "grad_norm": 3.178926346655412, "learning_rate": 8.122413416730924e-06, "loss": 0.4653, "step": 12181 }, { "epoch": 1.7201355549279864, "grad_norm": 2.85923260405976, "learning_rate": 8.120916097697918e-06, "loss": 0.4963, "step": 12182 }, { "epoch": 1.7202767579779723, "grad_norm": 3.574173476979432, "learning_rate": 8.119418822334012e-06, "loss": 0.5874, "step": 12183 }, { "epoch": 1.7204179610279582, "grad_norm": 2.7183218053107105, "learning_rate": 8.117921590674002e-06, "loss": 0.3856, "step": 12184 }, { "epoch": 1.720559164077944, "grad_norm": 4.421908563295283, "learning_rate": 8.116424402752679e-06, "loss": 0.609, "step": 12185 }, { "epoch": 1.72070036712793, "grad_norm": 3.7065062414041186, "learning_rate": 8.114927258604837e-06, "loss": 0.6704, "step": 12186 }, { "epoch": 1.7208415701779158, "grad_norm": 3.751844731557451, "learning_rate": 8.113430158265273e-06, "loss": 0.619, "step": 12187 }, { "epoch": 1.7209827732279017, "grad_norm": 3.9804235848732907, "learning_rate": 8.111933101768779e-06, "loss": 0.6074, "step": 12188 }, { "epoch": 1.7211239762778876, "grad_norm": 3.077736258206469, "learning_rate": 8.110436089150141e-06, "loss": 0.5151, "step": 12189 }, { "epoch": 1.7212651793278735, "grad_norm": 3.052250760966108, "learning_rate": 8.108939120444154e-06, "loss": 0.4722, "step": 12190 }, { "epoch": 1.7214063823778594, "grad_norm": 3.68651662490432, "learning_rate": 8.107442195685607e-06, "loss": 0.585, "step": 12191 }, { "epoch": 1.7215475854278453, "grad_norm": 3.571257340141153, "learning_rate": 8.105945314909287e-06, "loss": 0.5229, "step": 12192 }, { "epoch": 1.7216887884778311, "grad_norm": 4.08641754134329, "learning_rate": 8.104448478149978e-06, "loss": 0.5884, "step": 12193 }, { "epoch": 1.721829991527817, "grad_norm": 4.045181988121996, "learning_rate": 8.102951685442466e-06, "loss": 0.6772, "step": 12194 }, { "epoch": 1.721971194577803, "grad_norm": 3.849027603738308, "learning_rate": 8.101454936821538e-06, "loss": 0.5483, "step": 12195 }, { "epoch": 1.7221123976277888, "grad_norm": 3.2420325570681845, "learning_rate": 8.099958232321978e-06, "loss": 0.4969, "step": 12196 }, { "epoch": 1.7222536006777747, "grad_norm": 3.9568091341176594, "learning_rate": 8.098461571978568e-06, "loss": 0.6268, "step": 12197 }, { "epoch": 1.7223948037277605, "grad_norm": 3.724914901107439, "learning_rate": 8.09696495582609e-06, "loss": 0.6149, "step": 12198 }, { "epoch": 1.7225360067777464, "grad_norm": 3.7612845298481394, "learning_rate": 8.095468383899325e-06, "loss": 0.6019, "step": 12199 }, { "epoch": 1.7226772098277323, "grad_norm": 4.874308144556847, "learning_rate": 8.093971856233051e-06, "loss": 0.7785, "step": 12200 }, { "epoch": 1.7228184128777182, "grad_norm": 3.414374388096351, "learning_rate": 8.092475372862053e-06, "loss": 0.5765, "step": 12201 }, { "epoch": 1.722959615927704, "grad_norm": 3.507195475531474, "learning_rate": 8.0909789338211e-06, "loss": 0.6245, "step": 12202 }, { "epoch": 1.72310081897769, "grad_norm": 3.075922119038128, "learning_rate": 8.089482539144969e-06, "loss": 0.4874, "step": 12203 }, { "epoch": 1.7232420220276758, "grad_norm": 3.6150831567255186, "learning_rate": 8.087986188868441e-06, "loss": 0.6373, "step": 12204 }, { "epoch": 1.7233832250776617, "grad_norm": 5.305551573919046, "learning_rate": 8.086489883026289e-06, "loss": 0.6291, "step": 12205 }, { "epoch": 1.7235244281276476, "grad_norm": 3.434006304427154, "learning_rate": 8.084993621653283e-06, "loss": 0.5147, "step": 12206 }, { "epoch": 1.7236656311776333, "grad_norm": 3.887252616656358, "learning_rate": 8.083497404784201e-06, "loss": 0.588, "step": 12207 }, { "epoch": 1.7238068342276192, "grad_norm": 3.7938974416963327, "learning_rate": 8.08200123245381e-06, "loss": 0.5923, "step": 12208 }, { "epoch": 1.723948037277605, "grad_norm": 3.9916202014473225, "learning_rate": 8.080505104696888e-06, "loss": 0.5968, "step": 12209 }, { "epoch": 1.724089240327591, "grad_norm": 2.910236642107398, "learning_rate": 8.079009021548193e-06, "loss": 0.3988, "step": 12210 }, { "epoch": 1.7242304433775768, "grad_norm": 3.599685527091173, "learning_rate": 8.0775129830425e-06, "loss": 0.5523, "step": 12211 }, { "epoch": 1.7243716464275627, "grad_norm": 3.553002712567692, "learning_rate": 8.076016989214572e-06, "loss": 0.6312, "step": 12212 }, { "epoch": 1.7245128494775486, "grad_norm": 3.7422758875627724, "learning_rate": 8.07452104009918e-06, "loss": 0.6952, "step": 12213 }, { "epoch": 1.7246540525275345, "grad_norm": 3.593462025981845, "learning_rate": 8.07302513573109e-06, "loss": 0.5626, "step": 12214 }, { "epoch": 1.7247952555775203, "grad_norm": 3.7338387602964254, "learning_rate": 8.071529276145058e-06, "loss": 0.6589, "step": 12215 }, { "epoch": 1.7249364586275062, "grad_norm": 3.590416260315603, "learning_rate": 8.070033461375857e-06, "loss": 0.565, "step": 12216 }, { "epoch": 1.725077661677492, "grad_norm": 2.620999421055926, "learning_rate": 8.068537691458245e-06, "loss": 0.4334, "step": 12217 }, { "epoch": 1.725218864727478, "grad_norm": 3.625520189382327, "learning_rate": 8.067041966426984e-06, "loss": 0.5656, "step": 12218 }, { "epoch": 1.7253600677774639, "grad_norm": 3.783579599330728, "learning_rate": 8.065546286316831e-06, "loss": 0.6062, "step": 12219 }, { "epoch": 1.7255012708274498, "grad_norm": 4.306797541216445, "learning_rate": 8.064050651162546e-06, "loss": 0.5288, "step": 12220 }, { "epoch": 1.7256424738774356, "grad_norm": 3.388195268447762, "learning_rate": 8.06255506099889e-06, "loss": 0.4538, "step": 12221 }, { "epoch": 1.7257836769274215, "grad_norm": 4.201347192704246, "learning_rate": 8.061059515860616e-06, "loss": 0.7752, "step": 12222 }, { "epoch": 1.7259248799774074, "grad_norm": 3.9111571200621924, "learning_rate": 8.059564015782482e-06, "loss": 0.5626, "step": 12223 }, { "epoch": 1.7260660830273933, "grad_norm": 3.76524335082086, "learning_rate": 8.058068560799241e-06, "loss": 0.5256, "step": 12224 }, { "epoch": 1.7262072860773792, "grad_norm": 4.9649659984590535, "learning_rate": 8.05657315094565e-06, "loss": 0.6715, "step": 12225 }, { "epoch": 1.726348489127365, "grad_norm": 3.0642175156289473, "learning_rate": 8.05507778625646e-06, "loss": 0.4551, "step": 12226 }, { "epoch": 1.726489692177351, "grad_norm": 3.526888362696003, "learning_rate": 8.053582466766423e-06, "loss": 0.5123, "step": 12227 }, { "epoch": 1.7266308952273368, "grad_norm": 5.064137671008736, "learning_rate": 8.052087192510285e-06, "loss": 0.8105, "step": 12228 }, { "epoch": 1.7267720982773227, "grad_norm": 4.630414867952721, "learning_rate": 8.0505919635228e-06, "loss": 0.7968, "step": 12229 }, { "epoch": 1.7269133013273086, "grad_norm": 3.3879606359060763, "learning_rate": 8.04909677983872e-06, "loss": 0.4738, "step": 12230 }, { "epoch": 1.7270545043772945, "grad_norm": 3.9782570775956607, "learning_rate": 8.047601641492784e-06, "loss": 0.6933, "step": 12231 }, { "epoch": 1.7271957074272803, "grad_norm": 3.623446529655996, "learning_rate": 8.046106548519743e-06, "loss": 0.5441, "step": 12232 }, { "epoch": 1.7273369104772662, "grad_norm": 3.4763148097066545, "learning_rate": 8.044611500954344e-06, "loss": 0.5492, "step": 12233 }, { "epoch": 1.7274781135272521, "grad_norm": 3.7734385018433123, "learning_rate": 8.043116498831328e-06, "loss": 0.5965, "step": 12234 }, { "epoch": 1.727619316577238, "grad_norm": 5.27261544030686, "learning_rate": 8.041621542185442e-06, "loss": 0.8437, "step": 12235 }, { "epoch": 1.7277605196272239, "grad_norm": 3.39506498132273, "learning_rate": 8.040126631051425e-06, "loss": 0.5913, "step": 12236 }, { "epoch": 1.7279017226772098, "grad_norm": 3.0154087166645396, "learning_rate": 8.038631765464016e-06, "loss": 0.4831, "step": 12237 }, { "epoch": 1.7280429257271956, "grad_norm": 3.9602428805395347, "learning_rate": 8.037136945457959e-06, "loss": 0.8279, "step": 12238 }, { "epoch": 1.7281841287771815, "grad_norm": 3.09684090788484, "learning_rate": 8.035642171067992e-06, "loss": 0.5233, "step": 12239 }, { "epoch": 1.7283253318271674, "grad_norm": 3.805726032756525, "learning_rate": 8.034147442328852e-06, "loss": 0.6807, "step": 12240 }, { "epoch": 1.7284665348771533, "grad_norm": 3.3237179438899593, "learning_rate": 8.032652759275276e-06, "loss": 0.5801, "step": 12241 }, { "epoch": 1.7286077379271392, "grad_norm": 3.425530020948634, "learning_rate": 8.031158121942001e-06, "loss": 0.5666, "step": 12242 }, { "epoch": 1.728748940977125, "grad_norm": 3.873430501135082, "learning_rate": 8.029663530363763e-06, "loss": 0.6271, "step": 12243 }, { "epoch": 1.728890144027111, "grad_norm": 3.676871430132923, "learning_rate": 8.028168984575292e-06, "loss": 0.6457, "step": 12244 }, { "epoch": 1.7290313470770968, "grad_norm": 3.0676100948401634, "learning_rate": 8.026674484611321e-06, "loss": 0.4898, "step": 12245 }, { "epoch": 1.7291725501270827, "grad_norm": 3.5734219914097354, "learning_rate": 8.025180030506584e-06, "loss": 0.7054, "step": 12246 }, { "epoch": 1.7293137531770686, "grad_norm": 3.903938404793958, "learning_rate": 8.023685622295809e-06, "loss": 0.5416, "step": 12247 }, { "epoch": 1.7294549562270545, "grad_norm": 3.385259919057092, "learning_rate": 8.022191260013727e-06, "loss": 0.4163, "step": 12248 }, { "epoch": 1.7295961592770404, "grad_norm": 2.99959604340233, "learning_rate": 8.020696943695065e-06, "loss": 0.4735, "step": 12249 }, { "epoch": 1.7297373623270262, "grad_norm": 3.5629449555996784, "learning_rate": 8.019202673374554e-06, "loss": 0.5286, "step": 12250 }, { "epoch": 1.7298785653770121, "grad_norm": 3.020364585083156, "learning_rate": 8.017708449086916e-06, "loss": 0.4624, "step": 12251 }, { "epoch": 1.730019768426998, "grad_norm": 4.263244098164861, "learning_rate": 8.01621427086688e-06, "loss": 0.6008, "step": 12252 }, { "epoch": 1.7301609714769839, "grad_norm": 2.9861079764136367, "learning_rate": 8.014720138749166e-06, "loss": 0.4721, "step": 12253 }, { "epoch": 1.7303021745269698, "grad_norm": 3.037958425612766, "learning_rate": 8.013226052768498e-06, "loss": 0.4973, "step": 12254 }, { "epoch": 1.7304433775769557, "grad_norm": 3.177595732084706, "learning_rate": 8.011732012959596e-06, "loss": 0.4774, "step": 12255 }, { "epoch": 1.7305845806269415, "grad_norm": 4.1008592476658485, "learning_rate": 8.010238019357185e-06, "loss": 0.5575, "step": 12256 }, { "epoch": 1.7307257836769274, "grad_norm": 3.0706694362789597, "learning_rate": 8.008744071995987e-06, "loss": 0.4586, "step": 12257 }, { "epoch": 1.7308669867269133, "grad_norm": 3.3933264570107813, "learning_rate": 8.00725017091071e-06, "loss": 0.5053, "step": 12258 }, { "epoch": 1.7310081897768992, "grad_norm": 3.942766825970233, "learning_rate": 8.005756316136083e-06, "loss": 0.6197, "step": 12259 }, { "epoch": 1.731149392826885, "grad_norm": 2.6765244404896733, "learning_rate": 8.004262507706819e-06, "loss": 0.346, "step": 12260 }, { "epoch": 1.731290595876871, "grad_norm": 3.4399286566659795, "learning_rate": 8.002768745657632e-06, "loss": 0.4843, "step": 12261 }, { "epoch": 1.7314317989268568, "grad_norm": 3.1918969918617055, "learning_rate": 8.001275030023234e-06, "loss": 0.4269, "step": 12262 }, { "epoch": 1.7315730019768427, "grad_norm": 4.600125060250842, "learning_rate": 7.999781360838342e-06, "loss": 0.6863, "step": 12263 }, { "epoch": 1.7317142050268286, "grad_norm": 3.847966620955857, "learning_rate": 7.998287738137669e-06, "loss": 0.487, "step": 12264 }, { "epoch": 1.7318554080768145, "grad_norm": 3.836696575294387, "learning_rate": 7.996794161955921e-06, "loss": 0.4876, "step": 12265 }, { "epoch": 1.7319966111268004, "grad_norm": 4.0118424865781455, "learning_rate": 7.995300632327816e-06, "loss": 0.4952, "step": 12266 }, { "epoch": 1.7321378141767863, "grad_norm": 4.245363982826849, "learning_rate": 7.993807149288053e-06, "loss": 0.572, "step": 12267 }, { "epoch": 1.7322790172267721, "grad_norm": 3.4529183617002683, "learning_rate": 7.99231371287135e-06, "loss": 0.5066, "step": 12268 }, { "epoch": 1.732420220276758, "grad_norm": 3.5250300523353606, "learning_rate": 7.99082032311241e-06, "loss": 0.5158, "step": 12269 }, { "epoch": 1.732561423326744, "grad_norm": 3.6279822389578245, "learning_rate": 7.989326980045937e-06, "loss": 0.5948, "step": 12270 }, { "epoch": 1.7327026263767298, "grad_norm": 3.94906311511895, "learning_rate": 7.987833683706637e-06, "loss": 0.6903, "step": 12271 }, { "epoch": 1.7328438294267157, "grad_norm": 4.078626684325957, "learning_rate": 7.98634043412921e-06, "loss": 0.5793, "step": 12272 }, { "epoch": 1.7329850324767015, "grad_norm": 3.6157947825926033, "learning_rate": 7.984847231348363e-06, "loss": 0.5574, "step": 12273 }, { "epoch": 1.7331262355266874, "grad_norm": 3.2365441168173015, "learning_rate": 7.983354075398797e-06, "loss": 0.5132, "step": 12274 }, { "epoch": 1.7332674385766733, "grad_norm": 3.783482920836712, "learning_rate": 7.98186096631521e-06, "loss": 0.5376, "step": 12275 }, { "epoch": 1.7334086416266592, "grad_norm": 4.510950147147192, "learning_rate": 7.980367904132303e-06, "loss": 0.6456, "step": 12276 }, { "epoch": 1.733549844676645, "grad_norm": 3.877354506538229, "learning_rate": 7.978874888884777e-06, "loss": 0.6097, "step": 12277 }, { "epoch": 1.733691047726631, "grad_norm": 4.167590620925349, "learning_rate": 7.977381920607324e-06, "loss": 0.5913, "step": 12278 }, { "epoch": 1.7338322507766168, "grad_norm": 3.5721782283903636, "learning_rate": 7.97588899933464e-06, "loss": 0.5914, "step": 12279 }, { "epoch": 1.7339734538266027, "grad_norm": 4.076474059302508, "learning_rate": 7.97439612510142e-06, "loss": 0.6127, "step": 12280 }, { "epoch": 1.7341146568765886, "grad_norm": 4.399695408243882, "learning_rate": 7.972903297942361e-06, "loss": 0.6075, "step": 12281 }, { "epoch": 1.7342558599265745, "grad_norm": 3.4154928638674287, "learning_rate": 7.971410517892155e-06, "loss": 0.5369, "step": 12282 }, { "epoch": 1.7343970629765604, "grad_norm": 3.203158479025877, "learning_rate": 7.969917784985493e-06, "loss": 0.4545, "step": 12283 }, { "epoch": 1.7345382660265463, "grad_norm": 3.9318904239073995, "learning_rate": 7.968425099257062e-06, "loss": 0.7079, "step": 12284 }, { "epoch": 1.7346794690765321, "grad_norm": 3.8422767264503106, "learning_rate": 7.966932460741557e-06, "loss": 0.583, "step": 12285 }, { "epoch": 1.734820672126518, "grad_norm": 3.46944674871209, "learning_rate": 7.965439869473664e-06, "loss": 0.5556, "step": 12286 }, { "epoch": 1.734961875176504, "grad_norm": 3.3739363239272655, "learning_rate": 7.96394732548807e-06, "loss": 0.477, "step": 12287 }, { "epoch": 1.7351030782264898, "grad_norm": 3.8110579664551705, "learning_rate": 7.96245482881946e-06, "loss": 0.5566, "step": 12288 }, { "epoch": 1.7352442812764757, "grad_norm": 3.809238411332083, "learning_rate": 7.960962379502516e-06, "loss": 0.5784, "step": 12289 }, { "epoch": 1.7353854843264616, "grad_norm": 3.510142243588005, "learning_rate": 7.95946997757193e-06, "loss": 0.5417, "step": 12290 }, { "epoch": 1.7355266873764474, "grad_norm": 3.5286695784832562, "learning_rate": 7.957977623062379e-06, "loss": 0.5101, "step": 12291 }, { "epoch": 1.7356678904264333, "grad_norm": 3.2894451457279605, "learning_rate": 7.956485316008545e-06, "loss": 0.4857, "step": 12292 }, { "epoch": 1.7358090934764192, "grad_norm": 3.184568229054848, "learning_rate": 7.95499305644511e-06, "loss": 0.5463, "step": 12293 }, { "epoch": 1.735950296526405, "grad_norm": 3.6959208738156804, "learning_rate": 7.953500844406758e-06, "loss": 0.6844, "step": 12294 }, { "epoch": 1.736091499576391, "grad_norm": 3.373305534261933, "learning_rate": 7.95200867992816e-06, "loss": 0.4855, "step": 12295 }, { "epoch": 1.7362327026263769, "grad_norm": 3.21993280665393, "learning_rate": 7.950516563043994e-06, "loss": 0.5964, "step": 12296 }, { "epoch": 1.7363739056763627, "grad_norm": 3.516494198029777, "learning_rate": 7.949024493788938e-06, "loss": 0.4791, "step": 12297 }, { "epoch": 1.7365151087263486, "grad_norm": 2.866289877521798, "learning_rate": 7.947532472197668e-06, "loss": 0.3723, "step": 12298 }, { "epoch": 1.7366563117763345, "grad_norm": 3.5286222145144577, "learning_rate": 7.946040498304857e-06, "loss": 0.4949, "step": 12299 }, { "epoch": 1.7367975148263204, "grad_norm": 3.9486269367211313, "learning_rate": 7.944548572145178e-06, "loss": 0.6941, "step": 12300 }, { "epoch": 1.7369387178763063, "grad_norm": 3.7737242937431392, "learning_rate": 7.9430566937533e-06, "loss": 0.5685, "step": 12301 }, { "epoch": 1.7370799209262922, "grad_norm": 4.260221950896501, "learning_rate": 7.941564863163899e-06, "loss": 0.5547, "step": 12302 }, { "epoch": 1.737221123976278, "grad_norm": 3.452874788743592, "learning_rate": 7.940073080411643e-06, "loss": 0.5751, "step": 12303 }, { "epoch": 1.737362327026264, "grad_norm": 3.330685658869131, "learning_rate": 7.938581345531197e-06, "loss": 0.5395, "step": 12304 }, { "epoch": 1.7375035300762498, "grad_norm": 3.645365460990179, "learning_rate": 7.93708965855723e-06, "loss": 0.6058, "step": 12305 }, { "epoch": 1.7376447331262357, "grad_norm": 3.073263168403292, "learning_rate": 7.935598019524406e-06, "loss": 0.567, "step": 12306 }, { "epoch": 1.7377859361762216, "grad_norm": 3.8088501374111416, "learning_rate": 7.934106428467397e-06, "loss": 0.5371, "step": 12307 }, { "epoch": 1.7379271392262075, "grad_norm": 3.1999865097594413, "learning_rate": 7.932614885420859e-06, "loss": 0.5054, "step": 12308 }, { "epoch": 1.7380683422761931, "grad_norm": 4.15971483965218, "learning_rate": 7.931123390419458e-06, "loss": 0.5387, "step": 12309 }, { "epoch": 1.738209545326179, "grad_norm": 3.606674944923736, "learning_rate": 7.929631943497858e-06, "loss": 0.5649, "step": 12310 }, { "epoch": 1.7383507483761649, "grad_norm": 3.4011682593208175, "learning_rate": 7.928140544690719e-06, "loss": 0.6067, "step": 12311 }, { "epoch": 1.7384919514261508, "grad_norm": 4.738748730683039, "learning_rate": 7.926649194032699e-06, "loss": 0.6545, "step": 12312 }, { "epoch": 1.7386331544761366, "grad_norm": 4.004762251212013, "learning_rate": 7.925157891558455e-06, "loss": 0.5294, "step": 12313 }, { "epoch": 1.7387743575261225, "grad_norm": 3.6087246256532475, "learning_rate": 7.923666637302643e-06, "loss": 0.5755, "step": 12314 }, { "epoch": 1.7389155605761084, "grad_norm": 3.868696021497168, "learning_rate": 7.922175431299922e-06, "loss": 0.6617, "step": 12315 }, { "epoch": 1.7390567636260943, "grad_norm": 4.074657425117152, "learning_rate": 7.920684273584949e-06, "loss": 0.6987, "step": 12316 }, { "epoch": 1.7391979666760802, "grad_norm": 3.6695572768126796, "learning_rate": 7.919193164192374e-06, "loss": 0.6106, "step": 12317 }, { "epoch": 1.739339169726066, "grad_norm": 3.229760717080736, "learning_rate": 7.91770210315685e-06, "loss": 0.4719, "step": 12318 }, { "epoch": 1.739480372776052, "grad_norm": 3.588585125261572, "learning_rate": 7.916211090513037e-06, "loss": 0.572, "step": 12319 }, { "epoch": 1.7396215758260378, "grad_norm": 3.8831475524331966, "learning_rate": 7.914720126295572e-06, "loss": 0.6116, "step": 12320 }, { "epoch": 1.7397627788760237, "grad_norm": 4.00616999922467, "learning_rate": 7.913229210539113e-06, "loss": 0.5617, "step": 12321 }, { "epoch": 1.7399039819260096, "grad_norm": 3.6380298624212393, "learning_rate": 7.911738343278303e-06, "loss": 0.6849, "step": 12322 }, { "epoch": 1.7400451849759955, "grad_norm": 3.4289387645820333, "learning_rate": 7.910247524547793e-06, "loss": 0.5663, "step": 12323 }, { "epoch": 1.7401863880259814, "grad_norm": 3.4743933179940227, "learning_rate": 7.908756754382228e-06, "loss": 0.5575, "step": 12324 }, { "epoch": 1.7403275910759672, "grad_norm": 3.3365012277165125, "learning_rate": 7.907266032816254e-06, "loss": 0.565, "step": 12325 }, { "epoch": 1.7404687941259531, "grad_norm": 3.8360519938743716, "learning_rate": 7.905775359884514e-06, "loss": 0.6907, "step": 12326 }, { "epoch": 1.740609997175939, "grad_norm": 3.6741361140004134, "learning_rate": 7.904284735621648e-06, "loss": 0.5443, "step": 12327 }, { "epoch": 1.7407512002259249, "grad_norm": 3.5222500025388084, "learning_rate": 7.902794160062303e-06, "loss": 0.5869, "step": 12328 }, { "epoch": 1.7408924032759108, "grad_norm": 4.316278890500796, "learning_rate": 7.901303633241115e-06, "loss": 0.6863, "step": 12329 }, { "epoch": 1.7410336063258967, "grad_norm": 4.237230454234031, "learning_rate": 7.899813155192723e-06, "loss": 0.7047, "step": 12330 }, { "epoch": 1.7411748093758825, "grad_norm": 3.3224374764895765, "learning_rate": 7.898322725951768e-06, "loss": 0.5892, "step": 12331 }, { "epoch": 1.7413160124258684, "grad_norm": 3.8412073499102792, "learning_rate": 7.896832345552882e-06, "loss": 0.672, "step": 12332 }, { "epoch": 1.7414572154758543, "grad_norm": 3.3120997495042865, "learning_rate": 7.895342014030706e-06, "loss": 0.4388, "step": 12333 }, { "epoch": 1.7415984185258402, "grad_norm": 3.566704976149142, "learning_rate": 7.893851731419872e-06, "loss": 0.6121, "step": 12334 }, { "epoch": 1.741739621575826, "grad_norm": 3.8738259805703534, "learning_rate": 7.892361497755013e-06, "loss": 0.5813, "step": 12335 }, { "epoch": 1.741880824625812, "grad_norm": 3.1056172410959446, "learning_rate": 7.890871313070768e-06, "loss": 0.494, "step": 12336 }, { "epoch": 1.7420220276757978, "grad_norm": 3.947035380131418, "learning_rate": 7.889381177401758e-06, "loss": 0.5936, "step": 12337 }, { "epoch": 1.7421632307257837, "grad_norm": 4.301704831686292, "learning_rate": 7.88789109078262e-06, "loss": 0.6153, "step": 12338 }, { "epoch": 1.7423044337757696, "grad_norm": 3.179182520795745, "learning_rate": 7.886401053247982e-06, "loss": 0.5162, "step": 12339 }, { "epoch": 1.7424456368257555, "grad_norm": 2.949479357994074, "learning_rate": 7.884911064832466e-06, "loss": 0.4034, "step": 12340 }, { "epoch": 1.7425868398757414, "grad_norm": 3.656064398150352, "learning_rate": 7.883421125570705e-06, "loss": 0.5672, "step": 12341 }, { "epoch": 1.7427280429257272, "grad_norm": 3.0039954588497304, "learning_rate": 7.881931235497324e-06, "loss": 0.4362, "step": 12342 }, { "epoch": 1.742869245975713, "grad_norm": 3.57816052300096, "learning_rate": 7.880441394646947e-06, "loss": 0.6384, "step": 12343 }, { "epoch": 1.7430104490256988, "grad_norm": 3.598478295490384, "learning_rate": 7.878951603054195e-06, "loss": 0.6154, "step": 12344 }, { "epoch": 1.7431516520756847, "grad_norm": 3.0525034275813905, "learning_rate": 7.877461860753697e-06, "loss": 0.541, "step": 12345 }, { "epoch": 1.7432928551256706, "grad_norm": 3.2114003288540327, "learning_rate": 7.875972167780063e-06, "loss": 0.4746, "step": 12346 }, { "epoch": 1.7434340581756564, "grad_norm": 3.4013373130005826, "learning_rate": 7.87448252416792e-06, "loss": 0.6067, "step": 12347 }, { "epoch": 1.7435752612256423, "grad_norm": 3.351396816533248, "learning_rate": 7.872992929951886e-06, "loss": 0.5184, "step": 12348 }, { "epoch": 1.7437164642756282, "grad_norm": 3.560096971177, "learning_rate": 7.871503385166575e-06, "loss": 0.4978, "step": 12349 }, { "epoch": 1.743857667325614, "grad_norm": 2.920457301641549, "learning_rate": 7.870013889846608e-06, "loss": 0.4201, "step": 12350 }, { "epoch": 1.7439988703756, "grad_norm": 3.6718460401748185, "learning_rate": 7.868524444026599e-06, "loss": 0.5081, "step": 12351 }, { "epoch": 1.7441400734255859, "grad_norm": 3.7099225855011904, "learning_rate": 7.867035047741159e-06, "loss": 0.4773, "step": 12352 }, { "epoch": 1.7442812764755717, "grad_norm": 4.019298564382873, "learning_rate": 7.865545701024909e-06, "loss": 0.6674, "step": 12353 }, { "epoch": 1.7444224795255576, "grad_norm": 3.9919315067912393, "learning_rate": 7.86405640391245e-06, "loss": 0.6395, "step": 12354 }, { "epoch": 1.7445636825755435, "grad_norm": 3.864375827067851, "learning_rate": 7.8625671564384e-06, "loss": 0.5734, "step": 12355 }, { "epoch": 1.7447048856255294, "grad_norm": 4.496668351196489, "learning_rate": 7.861077958637365e-06, "loss": 0.7376, "step": 12356 }, { "epoch": 1.7448460886755153, "grad_norm": 3.7312405891850133, "learning_rate": 7.859588810543954e-06, "loss": 0.6316, "step": 12357 }, { "epoch": 1.7449872917255012, "grad_norm": 4.256561724193676, "learning_rate": 7.858099712192774e-06, "loss": 0.6544, "step": 12358 }, { "epoch": 1.745128494775487, "grad_norm": 2.9174758539217875, "learning_rate": 7.856610663618433e-06, "loss": 0.4834, "step": 12359 }, { "epoch": 1.745269697825473, "grad_norm": 3.2666706788577127, "learning_rate": 7.855121664855535e-06, "loss": 0.6432, "step": 12360 }, { "epoch": 1.7454109008754588, "grad_norm": 3.6525343795097487, "learning_rate": 7.853632715938681e-06, "loss": 0.5931, "step": 12361 }, { "epoch": 1.7455521039254447, "grad_norm": 2.985662491137568, "learning_rate": 7.852143816902483e-06, "loss": 0.3859, "step": 12362 }, { "epoch": 1.7456933069754306, "grad_norm": 3.2167960396533264, "learning_rate": 7.850654967781528e-06, "loss": 0.3888, "step": 12363 }, { "epoch": 1.7458345100254165, "grad_norm": 3.161667660721012, "learning_rate": 7.849166168610424e-06, "loss": 0.4959, "step": 12364 }, { "epoch": 1.7459757130754023, "grad_norm": 4.706689960674808, "learning_rate": 7.847677419423771e-06, "loss": 0.6461, "step": 12365 }, { "epoch": 1.7461169161253882, "grad_norm": 3.822588998727122, "learning_rate": 7.846188720256162e-06, "loss": 0.5328, "step": 12366 }, { "epoch": 1.746258119175374, "grad_norm": 3.73248783854724, "learning_rate": 7.844700071142199e-06, "loss": 0.4458, "step": 12367 }, { "epoch": 1.74639932222536, "grad_norm": 3.599584735453198, "learning_rate": 7.843211472116476e-06, "loss": 0.5464, "step": 12368 }, { "epoch": 1.7465405252753459, "grad_norm": 4.020897616261642, "learning_rate": 7.841722923213588e-06, "loss": 0.7149, "step": 12369 }, { "epoch": 1.7466817283253318, "grad_norm": 3.5873575416194647, "learning_rate": 7.84023442446813e-06, "loss": 0.519, "step": 12370 }, { "epoch": 1.7468229313753176, "grad_norm": 3.6164550347190683, "learning_rate": 7.838745975914685e-06, "loss": 0.5589, "step": 12371 }, { "epoch": 1.7469641344253035, "grad_norm": 3.260271711054987, "learning_rate": 7.837257577587853e-06, "loss": 0.4767, "step": 12372 }, { "epoch": 1.7471053374752894, "grad_norm": 3.3743143240128006, "learning_rate": 7.83576922952222e-06, "loss": 0.5715, "step": 12373 }, { "epoch": 1.7472465405252753, "grad_norm": 4.713378204797731, "learning_rate": 7.834280931752375e-06, "loss": 0.9426, "step": 12374 }, { "epoch": 1.7473877435752612, "grad_norm": 4.309944586408834, "learning_rate": 7.832792684312906e-06, "loss": 0.5755, "step": 12375 }, { "epoch": 1.747528946625247, "grad_norm": 4.197338238469789, "learning_rate": 7.8313044872384e-06, "loss": 0.4115, "step": 12376 }, { "epoch": 1.747670149675233, "grad_norm": 4.027456567216389, "learning_rate": 7.829816340563442e-06, "loss": 0.5859, "step": 12377 }, { "epoch": 1.7478113527252188, "grad_norm": 3.2214342329578365, "learning_rate": 7.828328244322612e-06, "loss": 0.5247, "step": 12378 }, { "epoch": 1.7479525557752047, "grad_norm": 4.315903548684153, "learning_rate": 7.826840198550502e-06, "loss": 0.7698, "step": 12379 }, { "epoch": 1.7480937588251906, "grad_norm": 3.7478556416410194, "learning_rate": 7.825352203281682e-06, "loss": 0.6152, "step": 12380 }, { "epoch": 1.7482349618751765, "grad_norm": 3.3887610738317826, "learning_rate": 7.823864258550737e-06, "loss": 0.5404, "step": 12381 }, { "epoch": 1.7483761649251623, "grad_norm": 3.2784850387918087, "learning_rate": 7.822376364392248e-06, "loss": 0.4762, "step": 12382 }, { "epoch": 1.7485173679751482, "grad_norm": 4.983852373183085, "learning_rate": 7.820888520840792e-06, "loss": 0.7832, "step": 12383 }, { "epoch": 1.7486585710251341, "grad_norm": 3.8465093684948175, "learning_rate": 7.819400727930947e-06, "loss": 0.5706, "step": 12384 }, { "epoch": 1.74879977407512, "grad_norm": 3.1149277383497225, "learning_rate": 7.817912985697285e-06, "loss": 0.4687, "step": 12385 }, { "epoch": 1.7489409771251059, "grad_norm": 3.233405587984255, "learning_rate": 7.816425294174385e-06, "loss": 0.5024, "step": 12386 }, { "epoch": 1.7490821801750918, "grad_norm": 2.974922456001024, "learning_rate": 7.81493765339682e-06, "loss": 0.4442, "step": 12387 }, { "epoch": 1.7492233832250776, "grad_norm": 2.9463989654925804, "learning_rate": 7.813450063399158e-06, "loss": 0.4865, "step": 12388 }, { "epoch": 1.7493645862750635, "grad_norm": 3.272557570518201, "learning_rate": 7.811962524215969e-06, "loss": 0.5506, "step": 12389 }, { "epoch": 1.7495057893250494, "grad_norm": 2.851310692924613, "learning_rate": 7.81047503588183e-06, "loss": 0.4693, "step": 12390 }, { "epoch": 1.7496469923750353, "grad_norm": 3.857979085930518, "learning_rate": 7.808987598431303e-06, "loss": 0.576, "step": 12391 }, { "epoch": 1.7497881954250212, "grad_norm": 3.477501051022583, "learning_rate": 7.807500211898959e-06, "loss": 0.4537, "step": 12392 }, { "epoch": 1.749929398475007, "grad_norm": 3.841066769323869, "learning_rate": 7.806012876319363e-06, "loss": 0.6522, "step": 12393 }, { "epoch": 1.750070601524993, "grad_norm": 4.4022426387169595, "learning_rate": 7.804525591727081e-06, "loss": 0.597, "step": 12394 }, { "epoch": 1.7502118045749788, "grad_norm": 3.1189317036371524, "learning_rate": 7.803038358156674e-06, "loss": 0.4532, "step": 12395 }, { "epoch": 1.7503530076249647, "grad_norm": 3.0483550107122, "learning_rate": 7.801551175642715e-06, "loss": 0.4644, "step": 12396 }, { "epoch": 1.7504942106749506, "grad_norm": 3.740861349048714, "learning_rate": 7.800064044219748e-06, "loss": 0.5674, "step": 12397 }, { "epoch": 1.7506354137249365, "grad_norm": 3.183118901989265, "learning_rate": 7.798576963922347e-06, "loss": 0.453, "step": 12398 }, { "epoch": 1.7507766167749224, "grad_norm": 3.5555690871886783, "learning_rate": 7.797089934785064e-06, "loss": 0.5352, "step": 12399 }, { "epoch": 1.7509178198249082, "grad_norm": 3.214463952819515, "learning_rate": 7.79560295684246e-06, "loss": 0.4298, "step": 12400 }, { "epoch": 1.7510590228748941, "grad_norm": 3.6092818374244424, "learning_rate": 7.794116030129092e-06, "loss": 0.498, "step": 12401 }, { "epoch": 1.75120022592488, "grad_norm": 3.481068837760789, "learning_rate": 7.792629154679514e-06, "loss": 0.5789, "step": 12402 }, { "epoch": 1.7513414289748659, "grad_norm": 3.320025410895974, "learning_rate": 7.791142330528282e-06, "loss": 0.4826, "step": 12403 }, { "epoch": 1.7514826320248518, "grad_norm": 4.287026252117888, "learning_rate": 7.789655557709951e-06, "loss": 0.64, "step": 12404 }, { "epoch": 1.7516238350748377, "grad_norm": 3.5418558294109657, "learning_rate": 7.788168836259068e-06, "loss": 0.512, "step": 12405 }, { "epoch": 1.7517650381248235, "grad_norm": 3.7013688031577687, "learning_rate": 7.786682166210184e-06, "loss": 0.5077, "step": 12406 }, { "epoch": 1.7519062411748094, "grad_norm": 4.844171770141107, "learning_rate": 7.785195547597852e-06, "loss": 0.757, "step": 12407 }, { "epoch": 1.7520474442247953, "grad_norm": 3.9827763643644905, "learning_rate": 7.78370898045662e-06, "loss": 0.6008, "step": 12408 }, { "epoch": 1.7521886472747812, "grad_norm": 3.576850432309614, "learning_rate": 7.782222464821029e-06, "loss": 0.4929, "step": 12409 }, { "epoch": 1.752329850324767, "grad_norm": 3.013653370376524, "learning_rate": 7.780736000725636e-06, "loss": 0.4692, "step": 12410 }, { "epoch": 1.752471053374753, "grad_norm": 3.8339708949039113, "learning_rate": 7.779249588204978e-06, "loss": 0.5544, "step": 12411 }, { "epoch": 1.7526122564247388, "grad_norm": 3.7851784337776384, "learning_rate": 7.7777632272936e-06, "loss": 0.5443, "step": 12412 }, { "epoch": 1.7527534594747247, "grad_norm": 4.184582800669715, "learning_rate": 7.77627691802605e-06, "loss": 0.6184, "step": 12413 }, { "epoch": 1.7528946625247106, "grad_norm": 4.336026424260855, "learning_rate": 7.774790660436857e-06, "loss": 0.6384, "step": 12414 }, { "epoch": 1.7530358655746965, "grad_norm": 4.114409127227056, "learning_rate": 7.773304454560572e-06, "loss": 0.5843, "step": 12415 }, { "epoch": 1.7531770686246824, "grad_norm": 4.826660550456006, "learning_rate": 7.771818300431729e-06, "loss": 0.5815, "step": 12416 }, { "epoch": 1.7533182716746682, "grad_norm": 3.3396370008846237, "learning_rate": 7.770332198084865e-06, "loss": 0.4707, "step": 12417 }, { "epoch": 1.7534594747246541, "grad_norm": 3.0058608180821498, "learning_rate": 7.768846147554517e-06, "loss": 0.4896, "step": 12418 }, { "epoch": 1.75360067777464, "grad_norm": 3.794863819078056, "learning_rate": 7.767360148875224e-06, "loss": 0.6703, "step": 12419 }, { "epoch": 1.753741880824626, "grad_norm": 3.749540162153938, "learning_rate": 7.765874202081516e-06, "loss": 0.6648, "step": 12420 }, { "epoch": 1.7538830838746118, "grad_norm": 4.024943912211173, "learning_rate": 7.764388307207929e-06, "loss": 0.5399, "step": 12421 }, { "epoch": 1.7540242869245977, "grad_norm": 3.8522656584107984, "learning_rate": 7.76290246428899e-06, "loss": 0.6598, "step": 12422 }, { "epoch": 1.7541654899745835, "grad_norm": 4.302520506927567, "learning_rate": 7.761416673359228e-06, "loss": 0.646, "step": 12423 }, { "epoch": 1.7543066930245694, "grad_norm": 3.2521125943511775, "learning_rate": 7.75993093445318e-06, "loss": 0.4286, "step": 12424 }, { "epoch": 1.7544478960745553, "grad_norm": 3.727592637949972, "learning_rate": 7.758445247605369e-06, "loss": 0.619, "step": 12425 }, { "epoch": 1.7545890991245412, "grad_norm": 3.9465865827172655, "learning_rate": 7.75695961285032e-06, "loss": 0.694, "step": 12426 }, { "epoch": 1.754730302174527, "grad_norm": 3.5876326317937304, "learning_rate": 7.755474030222564e-06, "loss": 0.5902, "step": 12427 }, { "epoch": 1.754871505224513, "grad_norm": 4.006412893091304, "learning_rate": 7.753988499756621e-06, "loss": 0.578, "step": 12428 }, { "epoch": 1.7550127082744988, "grad_norm": 3.26647581513503, "learning_rate": 7.752503021487016e-06, "loss": 0.5125, "step": 12429 }, { "epoch": 1.7551539113244847, "grad_norm": 4.827549325208341, "learning_rate": 7.75101759544827e-06, "loss": 0.6756, "step": 12430 }, { "epoch": 1.7552951143744706, "grad_norm": 3.1948763108323797, "learning_rate": 7.749532221674904e-06, "loss": 0.4933, "step": 12431 }, { "epoch": 1.7554363174244565, "grad_norm": 3.938548724410424, "learning_rate": 7.748046900201438e-06, "loss": 0.6477, "step": 12432 }, { "epoch": 1.7555775204744424, "grad_norm": 3.78848357955434, "learning_rate": 7.746561631062386e-06, "loss": 0.6098, "step": 12433 }, { "epoch": 1.7557187235244283, "grad_norm": 3.7595967713038756, "learning_rate": 7.745076414292272e-06, "loss": 0.6302, "step": 12434 }, { "epoch": 1.7558599265744141, "grad_norm": 2.9910045865051034, "learning_rate": 7.743591249925604e-06, "loss": 0.4828, "step": 12435 }, { "epoch": 1.7560011296244, "grad_norm": 4.205834549567604, "learning_rate": 7.742106137996905e-06, "loss": 0.6327, "step": 12436 }, { "epoch": 1.756142332674386, "grad_norm": 3.363653112547713, "learning_rate": 7.740621078540684e-06, "loss": 0.4779, "step": 12437 }, { "epoch": 1.7562835357243718, "grad_norm": 3.7202910017397106, "learning_rate": 7.739136071591455e-06, "loss": 0.4958, "step": 12438 }, { "epoch": 1.7564247387743577, "grad_norm": 3.184843695095462, "learning_rate": 7.737651117183725e-06, "loss": 0.4968, "step": 12439 }, { "epoch": 1.7565659418243436, "grad_norm": 3.6441293252089277, "learning_rate": 7.736166215352004e-06, "loss": 0.6185, "step": 12440 }, { "epoch": 1.7567071448743294, "grad_norm": 3.7396841070723146, "learning_rate": 7.734681366130805e-06, "loss": 0.5923, "step": 12441 }, { "epoch": 1.7568483479243153, "grad_norm": 3.5747049117362897, "learning_rate": 7.733196569554632e-06, "loss": 0.5441, "step": 12442 }, { "epoch": 1.7569895509743012, "grad_norm": 3.616448822988445, "learning_rate": 7.73171182565799e-06, "loss": 0.5794, "step": 12443 }, { "epoch": 1.757130754024287, "grad_norm": 4.158406021438606, "learning_rate": 7.730227134475388e-06, "loss": 0.5254, "step": 12444 }, { "epoch": 1.7572719570742728, "grad_norm": 3.7260402433423048, "learning_rate": 7.728742496041326e-06, "loss": 0.5396, "step": 12445 }, { "epoch": 1.7574131601242586, "grad_norm": 4.131829241296021, "learning_rate": 7.727257910390309e-06, "loss": 0.5744, "step": 12446 }, { "epoch": 1.7575543631742445, "grad_norm": 3.161312591002527, "learning_rate": 7.725773377556838e-06, "loss": 0.5048, "step": 12447 }, { "epoch": 1.7576955662242304, "grad_norm": 3.9437560228453687, "learning_rate": 7.724288897575409e-06, "loss": 0.5525, "step": 12448 }, { "epoch": 1.7578367692742163, "grad_norm": 3.0303829478932482, "learning_rate": 7.722804470480523e-06, "loss": 0.4279, "step": 12449 }, { "epoch": 1.7579779723242022, "grad_norm": 3.234638917294806, "learning_rate": 7.721320096306679e-06, "loss": 0.5012, "step": 12450 }, { "epoch": 1.758119175374188, "grad_norm": 3.273380982501268, "learning_rate": 7.71983577508837e-06, "loss": 0.4477, "step": 12451 }, { "epoch": 1.758260378424174, "grad_norm": 2.9510704001089776, "learning_rate": 7.718351506860092e-06, "loss": 0.4267, "step": 12452 }, { "epoch": 1.7584015814741598, "grad_norm": 3.7286723360227847, "learning_rate": 7.716867291656341e-06, "loss": 0.5682, "step": 12453 }, { "epoch": 1.7585427845241457, "grad_norm": 3.6130512085527635, "learning_rate": 7.71538312951161e-06, "loss": 0.5478, "step": 12454 }, { "epoch": 1.7586839875741316, "grad_norm": 3.754924627022415, "learning_rate": 7.713899020460389e-06, "loss": 0.6228, "step": 12455 }, { "epoch": 1.7588251906241175, "grad_norm": 4.006916714402932, "learning_rate": 7.712414964537167e-06, "loss": 0.5613, "step": 12456 }, { "epoch": 1.7589663936741033, "grad_norm": 3.593531592735026, "learning_rate": 7.710930961776428e-06, "loss": 0.5732, "step": 12457 }, { "epoch": 1.7591075967240892, "grad_norm": 3.454797259548896, "learning_rate": 7.70944701221267e-06, "loss": 0.477, "step": 12458 }, { "epoch": 1.7592487997740751, "grad_norm": 3.40887114553142, "learning_rate": 7.707963115880372e-06, "loss": 0.5769, "step": 12459 }, { "epoch": 1.759390002824061, "grad_norm": 3.540560189162941, "learning_rate": 7.706479272814024e-06, "loss": 0.6118, "step": 12460 }, { "epoch": 1.7595312058740469, "grad_norm": 4.086662815632813, "learning_rate": 7.704995483048104e-06, "loss": 0.623, "step": 12461 }, { "epoch": 1.7596724089240328, "grad_norm": 3.5864495388759794, "learning_rate": 7.703511746617098e-06, "loss": 0.7263, "step": 12462 }, { "epoch": 1.7598136119740186, "grad_norm": 3.1412525923311447, "learning_rate": 7.70202806355549e-06, "loss": 0.404, "step": 12463 }, { "epoch": 1.7599548150240045, "grad_norm": 4.1705673588870935, "learning_rate": 7.700544433897757e-06, "loss": 0.6693, "step": 12464 }, { "epoch": 1.7600960180739904, "grad_norm": 4.25484100255558, "learning_rate": 7.699060857678379e-06, "loss": 0.7164, "step": 12465 }, { "epoch": 1.7602372211239763, "grad_norm": 3.818364759082351, "learning_rate": 7.697577334931828e-06, "loss": 0.6078, "step": 12466 }, { "epoch": 1.7603784241739622, "grad_norm": 3.479626639121365, "learning_rate": 7.69609386569259e-06, "loss": 0.5779, "step": 12467 }, { "epoch": 1.760519627223948, "grad_norm": 3.0343303614358996, "learning_rate": 7.694610449995133e-06, "loss": 0.4539, "step": 12468 }, { "epoch": 1.760660830273934, "grad_norm": 3.4071469460333357, "learning_rate": 7.693127087873934e-06, "loss": 0.5078, "step": 12469 }, { "epoch": 1.7608020333239198, "grad_norm": 3.695379526051402, "learning_rate": 7.691643779363465e-06, "loss": 0.55, "step": 12470 }, { "epoch": 1.7609432363739057, "grad_norm": 3.2366026101224827, "learning_rate": 7.6901605244982e-06, "loss": 0.4449, "step": 12471 }, { "epoch": 1.7610844394238916, "grad_norm": 3.3483212117843726, "learning_rate": 7.688677323312608e-06, "loss": 0.5006, "step": 12472 }, { "epoch": 1.7612256424738775, "grad_norm": 3.138517254846151, "learning_rate": 7.687194175841153e-06, "loss": 0.4817, "step": 12473 }, { "epoch": 1.7613668455238634, "grad_norm": 3.873700100256128, "learning_rate": 7.685711082118306e-06, "loss": 0.6783, "step": 12474 }, { "epoch": 1.7615080485738492, "grad_norm": 4.0994443523852855, "learning_rate": 7.684228042178536e-06, "loss": 0.5511, "step": 12475 }, { "epoch": 1.7616492516238351, "grad_norm": 2.9669050754286834, "learning_rate": 7.682745056056307e-06, "loss": 0.4713, "step": 12476 }, { "epoch": 1.761790454673821, "grad_norm": 3.276796391040959, "learning_rate": 7.681262123786083e-06, "loss": 0.5239, "step": 12477 }, { "epoch": 1.7619316577238069, "grad_norm": 4.588210055670645, "learning_rate": 7.679779245402321e-06, "loss": 0.6622, "step": 12478 }, { "epoch": 1.7620728607737925, "grad_norm": 4.860233168534756, "learning_rate": 7.67829642093949e-06, "loss": 0.7755, "step": 12479 }, { "epoch": 1.7622140638237784, "grad_norm": 2.918906494675368, "learning_rate": 7.67681365043205e-06, "loss": 0.4447, "step": 12480 }, { "epoch": 1.7623552668737643, "grad_norm": 3.000341926525375, "learning_rate": 7.675330933914456e-06, "loss": 0.4517, "step": 12481 }, { "epoch": 1.7624964699237502, "grad_norm": 3.556254236340234, "learning_rate": 7.673848271421166e-06, "loss": 0.4879, "step": 12482 }, { "epoch": 1.762637672973736, "grad_norm": 3.4892057606616347, "learning_rate": 7.672365662986636e-06, "loss": 0.4889, "step": 12483 }, { "epoch": 1.762778876023722, "grad_norm": 3.352287111773432, "learning_rate": 7.670883108645326e-06, "loss": 0.5165, "step": 12484 }, { "epoch": 1.7629200790737078, "grad_norm": 3.9138987733594335, "learning_rate": 7.669400608431686e-06, "loss": 0.6818, "step": 12485 }, { "epoch": 1.7630612821236937, "grad_norm": 3.721553392858244, "learning_rate": 7.667918162380166e-06, "loss": 0.4745, "step": 12486 }, { "epoch": 1.7632024851736796, "grad_norm": 3.433281503553015, "learning_rate": 7.666435770525224e-06, "loss": 0.5461, "step": 12487 }, { "epoch": 1.7633436882236655, "grad_norm": 4.657290225582665, "learning_rate": 7.664953432901306e-06, "loss": 0.7559, "step": 12488 }, { "epoch": 1.7634848912736514, "grad_norm": 4.438244327797083, "learning_rate": 7.663471149542865e-06, "loss": 0.6731, "step": 12489 }, { "epoch": 1.7636260943236373, "grad_norm": 3.2846179751617237, "learning_rate": 7.661988920484344e-06, "loss": 0.5256, "step": 12490 }, { "epoch": 1.7637672973736231, "grad_norm": 3.743108081404323, "learning_rate": 7.66050674576019e-06, "loss": 0.595, "step": 12491 }, { "epoch": 1.763908500423609, "grad_norm": 3.9667407457509607, "learning_rate": 7.659024625404845e-06, "loss": 0.619, "step": 12492 }, { "epoch": 1.764049703473595, "grad_norm": 3.3926561400859105, "learning_rate": 7.65754255945276e-06, "loss": 0.6196, "step": 12493 }, { "epoch": 1.7641909065235808, "grad_norm": 3.797393127518146, "learning_rate": 7.656060547938375e-06, "loss": 0.6706, "step": 12494 }, { "epoch": 1.7643321095735667, "grad_norm": 3.1753276106854296, "learning_rate": 7.654578590896128e-06, "loss": 0.4997, "step": 12495 }, { "epoch": 1.7644733126235526, "grad_norm": 3.6983842960646776, "learning_rate": 7.653096688360465e-06, "loss": 0.5968, "step": 12496 }, { "epoch": 1.7646145156735384, "grad_norm": 3.6418490432114052, "learning_rate": 7.651614840365822e-06, "loss": 0.6449, "step": 12497 }, { "epoch": 1.7647557187235243, "grad_norm": 4.200852459256813, "learning_rate": 7.650133046946633e-06, "loss": 0.756, "step": 12498 }, { "epoch": 1.7648969217735102, "grad_norm": 3.463497769193341, "learning_rate": 7.64865130813734e-06, "loss": 0.5321, "step": 12499 }, { "epoch": 1.765038124823496, "grad_norm": 5.15969524879268, "learning_rate": 7.64716962397237e-06, "loss": 0.9924, "step": 12500 }, { "epoch": 1.765179327873482, "grad_norm": 3.6127014030402576, "learning_rate": 7.645687994486165e-06, "loss": 0.623, "step": 12501 }, { "epoch": 1.7653205309234679, "grad_norm": 3.677239976045142, "learning_rate": 7.644206419713155e-06, "loss": 0.5312, "step": 12502 }, { "epoch": 1.7654617339734537, "grad_norm": 3.983191264827294, "learning_rate": 7.64272489968777e-06, "loss": 0.6314, "step": 12503 }, { "epoch": 1.7656029370234396, "grad_norm": 3.0828536063773755, "learning_rate": 7.641243434444439e-06, "loss": 0.5338, "step": 12504 }, { "epoch": 1.7657441400734255, "grad_norm": 3.4281820809601933, "learning_rate": 7.639762024017592e-06, "loss": 0.5823, "step": 12505 }, { "epoch": 1.7658853431234114, "grad_norm": 3.0254029177888198, "learning_rate": 7.63828066844166e-06, "loss": 0.437, "step": 12506 }, { "epoch": 1.7660265461733973, "grad_norm": 3.4728798278346225, "learning_rate": 7.636799367751062e-06, "loss": 0.4968, "step": 12507 }, { "epoch": 1.7661677492233832, "grad_norm": 3.7957023052909213, "learning_rate": 7.635318121980228e-06, "loss": 0.6929, "step": 12508 }, { "epoch": 1.766308952273369, "grad_norm": 3.446410238644995, "learning_rate": 7.633836931163575e-06, "loss": 0.5711, "step": 12509 }, { "epoch": 1.766450155323355, "grad_norm": 4.313189330047758, "learning_rate": 7.632355795335533e-06, "loss": 0.6865, "step": 12510 }, { "epoch": 1.7665913583733408, "grad_norm": 3.6680780157159347, "learning_rate": 7.63087471453052e-06, "loss": 0.5883, "step": 12511 }, { "epoch": 1.7667325614233267, "grad_norm": 4.3449302535295615, "learning_rate": 7.629393688782954e-06, "loss": 0.559, "step": 12512 }, { "epoch": 1.7668737644733126, "grad_norm": 3.6368062360221143, "learning_rate": 7.627912718127257e-06, "loss": 0.5448, "step": 12513 }, { "epoch": 1.7670149675232985, "grad_norm": 3.4341223475847227, "learning_rate": 7.6264318025978455e-06, "loss": 0.5146, "step": 12514 }, { "epoch": 1.7671561705732843, "grad_norm": 2.6058425808405503, "learning_rate": 7.6249509422291325e-06, "loss": 0.3774, "step": 12515 }, { "epoch": 1.7672973736232702, "grad_norm": 3.5900926502805297, "learning_rate": 7.623470137055533e-06, "loss": 0.5576, "step": 12516 }, { "epoch": 1.767438576673256, "grad_norm": 4.077587604592495, "learning_rate": 7.621989387111459e-06, "loss": 0.6274, "step": 12517 }, { "epoch": 1.767579779723242, "grad_norm": 3.1653100221081285, "learning_rate": 7.620508692431327e-06, "loss": 0.5186, "step": 12518 }, { "epoch": 1.7677209827732279, "grad_norm": 3.6051823481068803, "learning_rate": 7.619028053049545e-06, "loss": 0.5296, "step": 12519 }, { "epoch": 1.7678621858232137, "grad_norm": 4.492520168642713, "learning_rate": 7.617547469000524e-06, "loss": 0.8, "step": 12520 }, { "epoch": 1.7680033888731996, "grad_norm": 3.82134154485563, "learning_rate": 7.616066940318667e-06, "loss": 0.4863, "step": 12521 }, { "epoch": 1.7681445919231855, "grad_norm": 4.08631481197251, "learning_rate": 7.6145864670383884e-06, "loss": 0.6602, "step": 12522 }, { "epoch": 1.7682857949731714, "grad_norm": 3.7572633724594025, "learning_rate": 7.613106049194092e-06, "loss": 0.6828, "step": 12523 }, { "epoch": 1.7684269980231573, "grad_norm": 3.0669350542553135, "learning_rate": 7.611625686820177e-06, "loss": 0.4887, "step": 12524 }, { "epoch": 1.7685682010731432, "grad_norm": 3.0469922818845876, "learning_rate": 7.610145379951051e-06, "loss": 0.5176, "step": 12525 }, { "epoch": 1.768709404123129, "grad_norm": 3.4268071974597567, "learning_rate": 7.608665128621111e-06, "loss": 0.5113, "step": 12526 }, { "epoch": 1.768850607173115, "grad_norm": 3.7498798767074604, "learning_rate": 7.607184932864764e-06, "loss": 0.614, "step": 12527 }, { "epoch": 1.7689918102231008, "grad_norm": 3.457951959657924, "learning_rate": 7.6057047927164055e-06, "loss": 0.5069, "step": 12528 }, { "epoch": 1.7691330132730867, "grad_norm": 3.844647584444253, "learning_rate": 7.60422470821043e-06, "loss": 0.6102, "step": 12529 }, { "epoch": 1.7692742163230726, "grad_norm": 3.6876561053647823, "learning_rate": 7.602744679381241e-06, "loss": 0.5933, "step": 12530 }, { "epoch": 1.7694154193730585, "grad_norm": 3.2184194650490956, "learning_rate": 7.6012647062632325e-06, "loss": 0.4679, "step": 12531 }, { "epoch": 1.7695566224230443, "grad_norm": 3.3119275553877157, "learning_rate": 7.599784788890794e-06, "loss": 0.4713, "step": 12532 }, { "epoch": 1.7696978254730302, "grad_norm": 3.352943485152535, "learning_rate": 7.598304927298321e-06, "loss": 0.4538, "step": 12533 }, { "epoch": 1.7698390285230161, "grad_norm": 3.651492646490969, "learning_rate": 7.596825121520202e-06, "loss": 0.6036, "step": 12534 }, { "epoch": 1.769980231573002, "grad_norm": 4.008703276035849, "learning_rate": 7.5953453715908295e-06, "loss": 0.5779, "step": 12535 }, { "epoch": 1.7701214346229879, "grad_norm": 3.070428313959527, "learning_rate": 7.593865677544594e-06, "loss": 0.526, "step": 12536 }, { "epoch": 1.7702626376729738, "grad_norm": 3.825177336104953, "learning_rate": 7.592386039415881e-06, "loss": 0.5823, "step": 12537 }, { "epoch": 1.7704038407229596, "grad_norm": 4.518224386240222, "learning_rate": 7.590906457239073e-06, "loss": 0.7404, "step": 12538 }, { "epoch": 1.7705450437729455, "grad_norm": 3.0833367074018025, "learning_rate": 7.589426931048562e-06, "loss": 0.518, "step": 12539 }, { "epoch": 1.7706862468229314, "grad_norm": 2.773794350920757, "learning_rate": 7.587947460878731e-06, "loss": 0.3966, "step": 12540 }, { "epoch": 1.7708274498729173, "grad_norm": 3.270102781492716, "learning_rate": 7.586468046763956e-06, "loss": 0.5815, "step": 12541 }, { "epoch": 1.7709686529229032, "grad_norm": 4.244018171905019, "learning_rate": 7.584988688738622e-06, "loss": 0.7261, "step": 12542 }, { "epoch": 1.771109855972889, "grad_norm": 3.176724270447471, "learning_rate": 7.583509386837104e-06, "loss": 0.4999, "step": 12543 }, { "epoch": 1.771251059022875, "grad_norm": 3.257812949457098, "learning_rate": 7.582030141093788e-06, "loss": 0.6247, "step": 12544 }, { "epoch": 1.7713922620728608, "grad_norm": 3.4429436694725903, "learning_rate": 7.580550951543048e-06, "loss": 0.5239, "step": 12545 }, { "epoch": 1.7715334651228467, "grad_norm": 3.7518113615551023, "learning_rate": 7.5790718182192545e-06, "loss": 0.5843, "step": 12546 }, { "epoch": 1.7716746681728326, "grad_norm": 3.1761870502269614, "learning_rate": 7.577592741156791e-06, "loss": 0.5849, "step": 12547 }, { "epoch": 1.7718158712228185, "grad_norm": 3.2543912862349704, "learning_rate": 7.576113720390027e-06, "loss": 0.5699, "step": 12548 }, { "epoch": 1.7719570742728044, "grad_norm": 4.378120058932897, "learning_rate": 7.574634755953331e-06, "loss": 0.6104, "step": 12549 }, { "epoch": 1.7720982773227902, "grad_norm": 3.3733759258172946, "learning_rate": 7.573155847881076e-06, "loss": 0.5081, "step": 12550 }, { "epoch": 1.7722394803727761, "grad_norm": 2.8668752163132267, "learning_rate": 7.5716769962076305e-06, "loss": 0.5344, "step": 12551 }, { "epoch": 1.772380683422762, "grad_norm": 3.061969354807529, "learning_rate": 7.570198200967363e-06, "loss": 0.5341, "step": 12552 }, { "epoch": 1.7725218864727479, "grad_norm": 3.326654387973597, "learning_rate": 7.568719462194639e-06, "loss": 0.5171, "step": 12553 }, { "epoch": 1.7726630895227338, "grad_norm": 4.263205024566076, "learning_rate": 7.567240779923827e-06, "loss": 0.7098, "step": 12554 }, { "epoch": 1.7728042925727197, "grad_norm": 3.629190852869291, "learning_rate": 7.565762154189284e-06, "loss": 0.5145, "step": 12555 }, { "epoch": 1.7729454956227055, "grad_norm": 3.3060427977322187, "learning_rate": 7.564283585025383e-06, "loss": 0.5156, "step": 12556 }, { "epoch": 1.7730866986726914, "grad_norm": 3.3801415206100924, "learning_rate": 7.562805072466479e-06, "loss": 0.513, "step": 12557 }, { "epoch": 1.7732279017226773, "grad_norm": 3.62197143566428, "learning_rate": 7.561326616546932e-06, "loss": 0.5418, "step": 12558 }, { "epoch": 1.7733691047726632, "grad_norm": 3.441188461105048, "learning_rate": 7.5598482173011e-06, "loss": 0.5428, "step": 12559 }, { "epoch": 1.773510307822649, "grad_norm": 3.253321556039228, "learning_rate": 7.5583698747633394e-06, "loss": 0.545, "step": 12560 }, { "epoch": 1.773651510872635, "grad_norm": 4.134119948160922, "learning_rate": 7.556891588968011e-06, "loss": 0.666, "step": 12561 }, { "epoch": 1.7737927139226208, "grad_norm": 4.09011967203926, "learning_rate": 7.555413359949468e-06, "loss": 0.6204, "step": 12562 }, { "epoch": 1.7739339169726067, "grad_norm": 4.581483731829366, "learning_rate": 7.553935187742061e-06, "loss": 0.7081, "step": 12563 }, { "epoch": 1.7740751200225926, "grad_norm": 3.4292329268034796, "learning_rate": 7.552457072380143e-06, "loss": 0.599, "step": 12564 }, { "epoch": 1.7742163230725785, "grad_norm": 3.960074085062633, "learning_rate": 7.55097901389807e-06, "loss": 0.6195, "step": 12565 }, { "epoch": 1.7743575261225644, "grad_norm": 5.301532332494007, "learning_rate": 7.549501012330184e-06, "loss": 0.8063, "step": 12566 }, { "epoch": 1.7744987291725502, "grad_norm": 3.1016080455082005, "learning_rate": 7.548023067710837e-06, "loss": 0.477, "step": 12567 }, { "epoch": 1.7746399322225361, "grad_norm": 3.87243441724298, "learning_rate": 7.546545180074374e-06, "loss": 0.5837, "step": 12568 }, { "epoch": 1.774781135272522, "grad_norm": 3.7543941466471473, "learning_rate": 7.54506734945514e-06, "loss": 0.5718, "step": 12569 }, { "epoch": 1.774922338322508, "grad_norm": 3.6294971934761167, "learning_rate": 7.543589575887482e-06, "loss": 0.5938, "step": 12570 }, { "epoch": 1.7750635413724938, "grad_norm": 3.1139574918940096, "learning_rate": 7.542111859405743e-06, "loss": 0.472, "step": 12571 }, { "epoch": 1.7752047444224797, "grad_norm": 3.8647637320750046, "learning_rate": 7.54063420004426e-06, "loss": 0.6484, "step": 12572 }, { "epoch": 1.7753459474724655, "grad_norm": 3.6346673522727677, "learning_rate": 7.539156597837378e-06, "loss": 0.5631, "step": 12573 }, { "epoch": 1.7754871505224514, "grad_norm": 3.4561902016910726, "learning_rate": 7.5376790528194354e-06, "loss": 0.5434, "step": 12574 }, { "epoch": 1.7756283535724373, "grad_norm": 3.5734905648712134, "learning_rate": 7.536201565024768e-06, "loss": 0.4642, "step": 12575 }, { "epoch": 1.7757695566224232, "grad_norm": 3.6518198853939237, "learning_rate": 7.534724134487709e-06, "loss": 0.6207, "step": 12576 }, { "epoch": 1.775910759672409, "grad_norm": 2.8179548782523125, "learning_rate": 7.533246761242598e-06, "loss": 0.4829, "step": 12577 }, { "epoch": 1.776051962722395, "grad_norm": 3.162921981434475, "learning_rate": 7.531769445323767e-06, "loss": 0.4685, "step": 12578 }, { "epoch": 1.7761931657723808, "grad_norm": 3.5070201542811636, "learning_rate": 7.530292186765548e-06, "loss": 0.5568, "step": 12579 }, { "epoch": 1.7763343688223667, "grad_norm": 3.312220876501213, "learning_rate": 7.528814985602273e-06, "loss": 0.5343, "step": 12580 }, { "epoch": 1.7764755718723524, "grad_norm": 3.3396743926493824, "learning_rate": 7.5273378418682675e-06, "loss": 0.5692, "step": 12581 }, { "epoch": 1.7766167749223383, "grad_norm": 3.6904956595668637, "learning_rate": 7.52586075559787e-06, "loss": 0.5595, "step": 12582 }, { "epoch": 1.7767579779723242, "grad_norm": 3.2002263143480847, "learning_rate": 7.524383726825393e-06, "loss": 0.4677, "step": 12583 }, { "epoch": 1.77689918102231, "grad_norm": 3.1659314314007783, "learning_rate": 7.522906755585171e-06, "loss": 0.4969, "step": 12584 }, { "epoch": 1.777040384072296, "grad_norm": 3.4771976551360755, "learning_rate": 7.5214298419115276e-06, "loss": 0.602, "step": 12585 }, { "epoch": 1.7771815871222818, "grad_norm": 2.675592134959582, "learning_rate": 7.5199529858387795e-06, "loss": 0.4096, "step": 12586 }, { "epoch": 1.7773227901722677, "grad_norm": 3.2689852867449125, "learning_rate": 7.518476187401258e-06, "loss": 0.5612, "step": 12587 }, { "epoch": 1.7774639932222536, "grad_norm": 4.562471569764921, "learning_rate": 7.516999446633277e-06, "loss": 0.7384, "step": 12588 }, { "epoch": 1.7776051962722395, "grad_norm": 4.496506750814818, "learning_rate": 7.5155227635691544e-06, "loss": 0.7187, "step": 12589 }, { "epoch": 1.7777463993222253, "grad_norm": 3.4723875571012095, "learning_rate": 7.514046138243211e-06, "loss": 0.5757, "step": 12590 }, { "epoch": 1.7778876023722112, "grad_norm": 3.8986731880578267, "learning_rate": 7.512569570689765e-06, "loss": 0.5776, "step": 12591 }, { "epoch": 1.778028805422197, "grad_norm": 3.048740512667674, "learning_rate": 7.511093060943125e-06, "loss": 0.4961, "step": 12592 }, { "epoch": 1.778170008472183, "grad_norm": 3.650814113577218, "learning_rate": 7.509616609037608e-06, "loss": 0.6637, "step": 12593 }, { "epoch": 1.7783112115221689, "grad_norm": 4.078932597071414, "learning_rate": 7.508140215007526e-06, "loss": 0.5934, "step": 12594 }, { "epoch": 1.7784524145721547, "grad_norm": 3.9426654685506213, "learning_rate": 7.506663878887186e-06, "loss": 0.5488, "step": 12595 }, { "epoch": 1.7785936176221406, "grad_norm": 3.9262854116583576, "learning_rate": 7.505187600710903e-06, "loss": 0.6591, "step": 12596 }, { "epoch": 1.7787348206721265, "grad_norm": 3.712545125532184, "learning_rate": 7.5037113805129835e-06, "loss": 0.6308, "step": 12597 }, { "epoch": 1.7788760237221124, "grad_norm": 3.7387462030315044, "learning_rate": 7.50223521832773e-06, "loss": 0.5182, "step": 12598 }, { "epoch": 1.7790172267720983, "grad_norm": 3.5699442642172228, "learning_rate": 7.50075911418946e-06, "loss": 0.5374, "step": 12599 }, { "epoch": 1.7791584298220842, "grad_norm": 3.9092569592706425, "learning_rate": 7.49928306813246e-06, "loss": 0.7467, "step": 12600 }, { "epoch": 1.77929963287207, "grad_norm": 3.118935331467771, "learning_rate": 7.497807080191046e-06, "loss": 0.4793, "step": 12601 }, { "epoch": 1.779440835922056, "grad_norm": 3.3574525816442895, "learning_rate": 7.496331150399512e-06, "loss": 0.605, "step": 12602 }, { "epoch": 1.7795820389720418, "grad_norm": 3.61421947780563, "learning_rate": 7.494855278792161e-06, "loss": 0.6011, "step": 12603 }, { "epoch": 1.7797232420220277, "grad_norm": 3.629797217769061, "learning_rate": 7.493379465403292e-06, "loss": 0.4923, "step": 12604 }, { "epoch": 1.7798644450720136, "grad_norm": 2.889899810849255, "learning_rate": 7.491903710267203e-06, "loss": 0.523, "step": 12605 }, { "epoch": 1.7800056481219995, "grad_norm": 3.906373564724152, "learning_rate": 7.490428013418187e-06, "loss": 0.774, "step": 12606 }, { "epoch": 1.7801468511719853, "grad_norm": 2.805753335559399, "learning_rate": 7.48895237489054e-06, "loss": 0.4316, "step": 12607 }, { "epoch": 1.7802880542219712, "grad_norm": 3.7765057671541937, "learning_rate": 7.4874767947185586e-06, "loss": 0.641, "step": 12608 }, { "epoch": 1.780429257271957, "grad_norm": 4.61490289679377, "learning_rate": 7.486001272936529e-06, "loss": 0.7612, "step": 12609 }, { "epoch": 1.780570460321943, "grad_norm": 4.148799089621253, "learning_rate": 7.4845258095787454e-06, "loss": 0.5987, "step": 12610 }, { "epoch": 1.7807116633719289, "grad_norm": 3.7275191942711157, "learning_rate": 7.4830504046794946e-06, "loss": 0.602, "step": 12611 }, { "epoch": 1.7808528664219148, "grad_norm": 3.1133517208204755, "learning_rate": 7.481575058273063e-06, "loss": 0.4384, "step": 12612 }, { "epoch": 1.7809940694719006, "grad_norm": 8.440440034910013, "learning_rate": 7.480099770393743e-06, "loss": 0.4437, "step": 12613 }, { "epoch": 1.7811352725218865, "grad_norm": 3.2018307085635516, "learning_rate": 7.478624541075814e-06, "loss": 0.4697, "step": 12614 }, { "epoch": 1.7812764755718722, "grad_norm": 3.1604731052340167, "learning_rate": 7.477149370353561e-06, "loss": 0.5455, "step": 12615 }, { "epoch": 1.781417678621858, "grad_norm": 3.509888587215544, "learning_rate": 7.475674258261274e-06, "loss": 0.588, "step": 12616 }, { "epoch": 1.781558881671844, "grad_norm": 3.5057161896353968, "learning_rate": 7.474199204833219e-06, "loss": 0.5795, "step": 12617 }, { "epoch": 1.7817000847218298, "grad_norm": 3.5040352268113257, "learning_rate": 7.472724210103687e-06, "loss": 0.6301, "step": 12618 }, { "epoch": 1.7818412877718157, "grad_norm": 5.19547656890915, "learning_rate": 7.4712492741069535e-06, "loss": 0.7418, "step": 12619 }, { "epoch": 1.7819824908218016, "grad_norm": 4.128503519134039, "learning_rate": 7.4697743968772906e-06, "loss": 0.5854, "step": 12620 }, { "epoch": 1.7821236938717875, "grad_norm": 3.9129094746573507, "learning_rate": 7.468299578448981e-06, "loss": 0.5506, "step": 12621 }, { "epoch": 1.7822648969217734, "grad_norm": 3.4439051374703817, "learning_rate": 7.466824818856296e-06, "loss": 0.6094, "step": 12622 }, { "epoch": 1.7824060999717593, "grad_norm": 3.6578575341536426, "learning_rate": 7.465350118133509e-06, "loss": 0.6297, "step": 12623 }, { "epoch": 1.7825473030217451, "grad_norm": 3.530279689275235, "learning_rate": 7.463875476314888e-06, "loss": 0.4868, "step": 12624 }, { "epoch": 1.782688506071731, "grad_norm": 3.6803974768764363, "learning_rate": 7.462400893434711e-06, "loss": 0.4803, "step": 12625 }, { "epoch": 1.782829709121717, "grad_norm": 3.505152466274522, "learning_rate": 7.460926369527236e-06, "loss": 0.5904, "step": 12626 }, { "epoch": 1.7829709121717028, "grad_norm": 3.9216795158815057, "learning_rate": 7.459451904626738e-06, "loss": 0.5733, "step": 12627 }, { "epoch": 1.7831121152216887, "grad_norm": 3.3964606147122987, "learning_rate": 7.45797749876748e-06, "loss": 0.5131, "step": 12628 }, { "epoch": 1.7832533182716745, "grad_norm": 3.4724944940623743, "learning_rate": 7.456503151983725e-06, "loss": 0.6069, "step": 12629 }, { "epoch": 1.7833945213216604, "grad_norm": 3.1994757420439894, "learning_rate": 7.455028864309742e-06, "loss": 0.4793, "step": 12630 }, { "epoch": 1.7835357243716463, "grad_norm": 3.3571940134403038, "learning_rate": 7.453554635779786e-06, "loss": 0.4749, "step": 12631 }, { "epoch": 1.7836769274216322, "grad_norm": 3.6312057788111876, "learning_rate": 7.4520804664281195e-06, "loss": 0.5593, "step": 12632 }, { "epoch": 1.783818130471618, "grad_norm": 3.768872218264469, "learning_rate": 7.450606356289009e-06, "loss": 0.5434, "step": 12633 }, { "epoch": 1.783959333521604, "grad_norm": 3.259814140292135, "learning_rate": 7.449132305396701e-06, "loss": 0.5693, "step": 12634 }, { "epoch": 1.7841005365715898, "grad_norm": 3.597756759226259, "learning_rate": 7.447658313785458e-06, "loss": 0.6635, "step": 12635 }, { "epoch": 1.7842417396215757, "grad_norm": 5.127070173959923, "learning_rate": 7.446184381489533e-06, "loss": 0.6483, "step": 12636 }, { "epoch": 1.7843829426715616, "grad_norm": 3.687316716326993, "learning_rate": 7.444710508543181e-06, "loss": 0.5838, "step": 12637 }, { "epoch": 1.7845241457215475, "grad_norm": 3.1021132460273226, "learning_rate": 7.443236694980649e-06, "loss": 0.4489, "step": 12638 }, { "epoch": 1.7846653487715334, "grad_norm": 3.509011034754844, "learning_rate": 7.441762940836197e-06, "loss": 0.6171, "step": 12639 }, { "epoch": 1.7848065518215193, "grad_norm": 3.726795909998218, "learning_rate": 7.440289246144067e-06, "loss": 0.6288, "step": 12640 }, { "epoch": 1.7849477548715051, "grad_norm": 3.737239882044377, "learning_rate": 7.438815610938512e-06, "loss": 0.5934, "step": 12641 }, { "epoch": 1.785088957921491, "grad_norm": 3.573548072965206, "learning_rate": 7.437342035253775e-06, "loss": 0.4912, "step": 12642 }, { "epoch": 1.785230160971477, "grad_norm": 4.218418493286466, "learning_rate": 7.4358685191241e-06, "loss": 0.6675, "step": 12643 }, { "epoch": 1.7853713640214628, "grad_norm": 2.93671541893229, "learning_rate": 7.434395062583735e-06, "loss": 0.4634, "step": 12644 }, { "epoch": 1.7855125670714487, "grad_norm": 4.28654180282839, "learning_rate": 7.432921665666921e-06, "loss": 0.7337, "step": 12645 }, { "epoch": 1.7856537701214346, "grad_norm": 3.4784233271388985, "learning_rate": 7.431448328407896e-06, "loss": 0.5185, "step": 12646 }, { "epoch": 1.7857949731714204, "grad_norm": 2.977648676040175, "learning_rate": 7.4299750508409054e-06, "loss": 0.4106, "step": 12647 }, { "epoch": 1.7859361762214063, "grad_norm": 4.515369302150069, "learning_rate": 7.428501833000186e-06, "loss": 0.5768, "step": 12648 }, { "epoch": 1.7860773792713922, "grad_norm": 3.1530223545070837, "learning_rate": 7.427028674919969e-06, "loss": 0.4981, "step": 12649 }, { "epoch": 1.786218582321378, "grad_norm": 4.108880410984938, "learning_rate": 7.4255555766345025e-06, "loss": 0.6309, "step": 12650 }, { "epoch": 1.786359785371364, "grad_norm": 3.409095633110417, "learning_rate": 7.4240825381780046e-06, "loss": 0.5093, "step": 12651 }, { "epoch": 1.7865009884213499, "grad_norm": 3.3522932557898897, "learning_rate": 7.422609559584719e-06, "loss": 0.5409, "step": 12652 }, { "epoch": 1.7866421914713357, "grad_norm": 2.5047917482370767, "learning_rate": 7.421136640888874e-06, "loss": 0.3895, "step": 12653 }, { "epoch": 1.7867833945213216, "grad_norm": 3.631414884750939, "learning_rate": 7.4196637821247e-06, "loss": 0.5799, "step": 12654 }, { "epoch": 1.7869245975713075, "grad_norm": 4.098617431893127, "learning_rate": 7.418190983326423e-06, "loss": 0.6633, "step": 12655 }, { "epoch": 1.7870658006212934, "grad_norm": 3.2313347050858123, "learning_rate": 7.416718244528275e-06, "loss": 0.5511, "step": 12656 }, { "epoch": 1.7872070036712793, "grad_norm": 3.748926376229165, "learning_rate": 7.415245565764479e-06, "loss": 0.5683, "step": 12657 }, { "epoch": 1.7873482067212652, "grad_norm": 2.989385034941308, "learning_rate": 7.413772947069262e-06, "loss": 0.3899, "step": 12658 }, { "epoch": 1.787489409771251, "grad_norm": 4.193753736571592, "learning_rate": 7.412300388476843e-06, "loss": 0.6039, "step": 12659 }, { "epoch": 1.787630612821237, "grad_norm": 3.3443997501048375, "learning_rate": 7.410827890021444e-06, "loss": 0.5453, "step": 12660 }, { "epoch": 1.7877718158712228, "grad_norm": 2.6002350329770976, "learning_rate": 7.409355451737287e-06, "loss": 0.3999, "step": 12661 }, { "epoch": 1.7879130189212087, "grad_norm": 3.3996801284863154, "learning_rate": 7.407883073658591e-06, "loss": 0.4863, "step": 12662 }, { "epoch": 1.7880542219711946, "grad_norm": 3.378611439705507, "learning_rate": 7.4064107558195705e-06, "loss": 0.4487, "step": 12663 }, { "epoch": 1.7881954250211805, "grad_norm": 3.3004741338294328, "learning_rate": 7.404938498254448e-06, "loss": 0.5053, "step": 12664 }, { "epoch": 1.7883366280711663, "grad_norm": 4.391406098640896, "learning_rate": 7.403466300997433e-06, "loss": 0.7061, "step": 12665 }, { "epoch": 1.7884778311211522, "grad_norm": 2.7147667939285056, "learning_rate": 7.401994164082738e-06, "loss": 0.3793, "step": 12666 }, { "epoch": 1.788619034171138, "grad_norm": 2.7796029442087735, "learning_rate": 7.400522087544582e-06, "loss": 0.471, "step": 12667 }, { "epoch": 1.788760237221124, "grad_norm": 3.900575154184068, "learning_rate": 7.399050071417163e-06, "loss": 0.5418, "step": 12668 }, { "epoch": 1.7889014402711099, "grad_norm": 3.997929457463544, "learning_rate": 7.397578115734701e-06, "loss": 0.6009, "step": 12669 }, { "epoch": 1.7890426433210957, "grad_norm": 3.865579488491704, "learning_rate": 7.396106220531398e-06, "loss": 0.6832, "step": 12670 }, { "epoch": 1.7891838463710816, "grad_norm": 3.698314498265299, "learning_rate": 7.394634385841464e-06, "loss": 0.4693, "step": 12671 }, { "epoch": 1.7893250494210675, "grad_norm": 4.112312581379585, "learning_rate": 7.3931626116990984e-06, "loss": 0.6782, "step": 12672 }, { "epoch": 1.7894662524710534, "grad_norm": 4.206031691032457, "learning_rate": 7.391690898138511e-06, "loss": 0.6342, "step": 12673 }, { "epoch": 1.7896074555210393, "grad_norm": 2.9341974074534907, "learning_rate": 7.3902192451939e-06, "loss": 0.4732, "step": 12674 }, { "epoch": 1.7897486585710252, "grad_norm": 3.5547094873322242, "learning_rate": 7.388747652899467e-06, "loss": 0.5304, "step": 12675 }, { "epoch": 1.789889861621011, "grad_norm": 3.580350531630066, "learning_rate": 7.3872761212894105e-06, "loss": 0.5147, "step": 12676 }, { "epoch": 1.790031064670997, "grad_norm": 3.630549936150676, "learning_rate": 7.385804650397926e-06, "loss": 0.5666, "step": 12677 }, { "epoch": 1.7901722677209828, "grad_norm": 3.0463342607227797, "learning_rate": 7.384333240259216e-06, "loss": 0.4943, "step": 12678 }, { "epoch": 1.7903134707709687, "grad_norm": 3.785761150213752, "learning_rate": 7.38286189090747e-06, "loss": 0.6318, "step": 12679 }, { "epoch": 1.7904546738209546, "grad_norm": 2.938631974523916, "learning_rate": 7.381390602376882e-06, "loss": 0.408, "step": 12680 }, { "epoch": 1.7905958768709405, "grad_norm": 3.6190864416059267, "learning_rate": 7.379919374701647e-06, "loss": 0.5577, "step": 12681 }, { "epoch": 1.7907370799209263, "grad_norm": 4.365291318520244, "learning_rate": 7.378448207915956e-06, "loss": 0.6039, "step": 12682 }, { "epoch": 1.7908782829709122, "grad_norm": 4.022758921784208, "learning_rate": 7.376977102053995e-06, "loss": 0.6264, "step": 12683 }, { "epoch": 1.791019486020898, "grad_norm": 2.9499139044916847, "learning_rate": 7.3755060571499555e-06, "loss": 0.4409, "step": 12684 }, { "epoch": 1.791160689070884, "grad_norm": 3.4629253714726933, "learning_rate": 7.374035073238019e-06, "loss": 0.4921, "step": 12685 }, { "epoch": 1.7913018921208699, "grad_norm": 3.64892059653003, "learning_rate": 7.372564150352373e-06, "loss": 0.5999, "step": 12686 }, { "epoch": 1.7914430951708558, "grad_norm": 4.007123129596786, "learning_rate": 7.3710932885272025e-06, "loss": 0.6305, "step": 12687 }, { "epoch": 1.7915842982208416, "grad_norm": 4.006839754062654, "learning_rate": 7.369622487796689e-06, "loss": 0.7175, "step": 12688 }, { "epoch": 1.7917255012708275, "grad_norm": 3.1225747930875065, "learning_rate": 7.3681517481950115e-06, "loss": 0.4113, "step": 12689 }, { "epoch": 1.7918667043208134, "grad_norm": 3.6881009908552653, "learning_rate": 7.366681069756352e-06, "loss": 0.5417, "step": 12690 }, { "epoch": 1.7920079073707993, "grad_norm": 3.434834778237671, "learning_rate": 7.365210452514887e-06, "loss": 0.5915, "step": 12691 }, { "epoch": 1.7921491104207852, "grad_norm": 3.2771102630452615, "learning_rate": 7.363739896504795e-06, "loss": 0.5171, "step": 12692 }, { "epoch": 1.792290313470771, "grad_norm": 3.3386181979533656, "learning_rate": 7.362269401760249e-06, "loss": 0.5369, "step": 12693 }, { "epoch": 1.792431516520757, "grad_norm": 4.0791853812449945, "learning_rate": 7.360798968315419e-06, "loss": 0.6142, "step": 12694 }, { "epoch": 1.7925727195707428, "grad_norm": 2.901189090695694, "learning_rate": 7.359328596204485e-06, "loss": 0.442, "step": 12695 }, { "epoch": 1.7927139226207287, "grad_norm": 3.8608285474008586, "learning_rate": 7.357858285461612e-06, "loss": 0.6361, "step": 12696 }, { "epoch": 1.7928551256707146, "grad_norm": 3.125412164644823, "learning_rate": 7.356388036120971e-06, "loss": 0.5036, "step": 12697 }, { "epoch": 1.7929963287207005, "grad_norm": 3.918134380335271, "learning_rate": 7.35491784821673e-06, "loss": 0.7047, "step": 12698 }, { "epoch": 1.7931375317706864, "grad_norm": 3.573750068360653, "learning_rate": 7.353447721783058e-06, "loss": 0.5853, "step": 12699 }, { "epoch": 1.7932787348206722, "grad_norm": 3.575881732531242, "learning_rate": 7.351977656854118e-06, "loss": 0.5195, "step": 12700 }, { "epoch": 1.7934199378706581, "grad_norm": 3.13267908164547, "learning_rate": 7.350507653464075e-06, "loss": 0.4429, "step": 12701 }, { "epoch": 1.793561140920644, "grad_norm": 3.898376231622971, "learning_rate": 7.349037711647089e-06, "loss": 0.6227, "step": 12702 }, { "epoch": 1.7937023439706299, "grad_norm": 3.236928022573513, "learning_rate": 7.347567831437318e-06, "loss": 0.4249, "step": 12703 }, { "epoch": 1.7938435470206158, "grad_norm": 3.7765891546651127, "learning_rate": 7.346098012868928e-06, "loss": 0.6913, "step": 12704 }, { "epoch": 1.7939847500706017, "grad_norm": 3.7215752914211824, "learning_rate": 7.344628255976074e-06, "loss": 0.4745, "step": 12705 }, { "epoch": 1.7941259531205875, "grad_norm": 3.3203337774186856, "learning_rate": 7.34315856079291e-06, "loss": 0.51, "step": 12706 }, { "epoch": 1.7942671561705734, "grad_norm": 2.86106333199991, "learning_rate": 7.341688927353596e-06, "loss": 0.446, "step": 12707 }, { "epoch": 1.7944083592205593, "grad_norm": 3.4183606113008844, "learning_rate": 7.340219355692284e-06, "loss": 0.5195, "step": 12708 }, { "epoch": 1.7945495622705452, "grad_norm": 3.7909207638006537, "learning_rate": 7.338749845843127e-06, "loss": 0.6058, "step": 12709 }, { "epoch": 1.794690765320531, "grad_norm": 3.2915240197073654, "learning_rate": 7.337280397840272e-06, "loss": 0.5921, "step": 12710 }, { "epoch": 1.794831968370517, "grad_norm": 3.413674397771417, "learning_rate": 7.335811011717868e-06, "loss": 0.4837, "step": 12711 }, { "epoch": 1.7949731714205028, "grad_norm": 2.890164757083276, "learning_rate": 7.334341687510069e-06, "loss": 0.485, "step": 12712 }, { "epoch": 1.7951143744704887, "grad_norm": 3.733623891650265, "learning_rate": 7.332872425251017e-06, "loss": 0.6677, "step": 12713 }, { "epoch": 1.7952555775204746, "grad_norm": 3.340627749826936, "learning_rate": 7.331403224974859e-06, "loss": 0.521, "step": 12714 }, { "epoch": 1.7953967805704605, "grad_norm": 3.2876621426675388, "learning_rate": 7.329934086715735e-06, "loss": 0.4876, "step": 12715 }, { "epoch": 1.7955379836204464, "grad_norm": 3.9149719699556877, "learning_rate": 7.328465010507791e-06, "loss": 0.5232, "step": 12716 }, { "epoch": 1.795679186670432, "grad_norm": 4.648901911737773, "learning_rate": 7.326995996385167e-06, "loss": 0.7846, "step": 12717 }, { "epoch": 1.795820389720418, "grad_norm": 3.9886244469537693, "learning_rate": 7.325527044382004e-06, "loss": 0.6559, "step": 12718 }, { "epoch": 1.7959615927704038, "grad_norm": 3.405930755157806, "learning_rate": 7.324058154532436e-06, "loss": 0.5244, "step": 12719 }, { "epoch": 1.7961027958203897, "grad_norm": 2.918366906182609, "learning_rate": 7.322589326870597e-06, "loss": 0.433, "step": 12720 }, { "epoch": 1.7962439988703756, "grad_norm": 4.031454695044306, "learning_rate": 7.3211205614306295e-06, "loss": 0.6516, "step": 12721 }, { "epoch": 1.7963852019203614, "grad_norm": 3.6081581404825736, "learning_rate": 7.319651858246661e-06, "loss": 0.554, "step": 12722 }, { "epoch": 1.7965264049703473, "grad_norm": 3.7419144127774366, "learning_rate": 7.318183217352826e-06, "loss": 0.5902, "step": 12723 }, { "epoch": 1.7966676080203332, "grad_norm": 4.742621704869029, "learning_rate": 7.316714638783257e-06, "loss": 0.6064, "step": 12724 }, { "epoch": 1.796808811070319, "grad_norm": 4.82273907888184, "learning_rate": 7.315246122572079e-06, "loss": 0.8922, "step": 12725 }, { "epoch": 1.796950014120305, "grad_norm": 3.1414286421046795, "learning_rate": 7.313777668753426e-06, "loss": 0.5858, "step": 12726 }, { "epoch": 1.7970912171702909, "grad_norm": 2.9337064126836556, "learning_rate": 7.312309277361415e-06, "loss": 0.4388, "step": 12727 }, { "epoch": 1.7972324202202767, "grad_norm": 4.370925828394352, "learning_rate": 7.310840948430178e-06, "loss": 0.5071, "step": 12728 }, { "epoch": 1.7973736232702626, "grad_norm": 3.8099790651171417, "learning_rate": 7.309372681993834e-06, "loss": 0.5017, "step": 12729 }, { "epoch": 1.7975148263202485, "grad_norm": 3.4049544222213997, "learning_rate": 7.307904478086507e-06, "loss": 0.4996, "step": 12730 }, { "epoch": 1.7976560293702344, "grad_norm": 3.1102297264107914, "learning_rate": 7.30643633674232e-06, "loss": 0.4769, "step": 12731 }, { "epoch": 1.7977972324202203, "grad_norm": 4.133678593446216, "learning_rate": 7.304968257995384e-06, "loss": 0.6377, "step": 12732 }, { "epoch": 1.7979384354702062, "grad_norm": 3.590422903533797, "learning_rate": 7.303500241879826e-06, "loss": 0.6099, "step": 12733 }, { "epoch": 1.798079638520192, "grad_norm": 3.8299392834917074, "learning_rate": 7.3020322884297565e-06, "loss": 0.5889, "step": 12734 }, { "epoch": 1.798220841570178, "grad_norm": 4.0224886958985415, "learning_rate": 7.300564397679295e-06, "loss": 0.6358, "step": 12735 }, { "epoch": 1.7983620446201638, "grad_norm": 3.3980633739495976, "learning_rate": 7.299096569662549e-06, "loss": 0.4936, "step": 12736 }, { "epoch": 1.7985032476701497, "grad_norm": 3.694730452982102, "learning_rate": 7.29762880441363e-06, "loss": 0.6171, "step": 12737 }, { "epoch": 1.7986444507201356, "grad_norm": 4.054157452143602, "learning_rate": 7.296161101966653e-06, "loss": 0.6675, "step": 12738 }, { "epoch": 1.7987856537701215, "grad_norm": 3.137732513471252, "learning_rate": 7.294693462355722e-06, "loss": 0.4635, "step": 12739 }, { "epoch": 1.7989268568201073, "grad_norm": 4.106962465112127, "learning_rate": 7.293225885614948e-06, "loss": 0.618, "step": 12740 }, { "epoch": 1.7990680598700932, "grad_norm": 3.351001136657417, "learning_rate": 7.291758371778434e-06, "loss": 0.5186, "step": 12741 }, { "epoch": 1.799209262920079, "grad_norm": 3.095535584675286, "learning_rate": 7.290290920880287e-06, "loss": 0.4581, "step": 12742 }, { "epoch": 1.799350465970065, "grad_norm": 4.8790316638921745, "learning_rate": 7.288823532954612e-06, "loss": 0.5173, "step": 12743 }, { "epoch": 1.7994916690200509, "grad_norm": 3.4235412006221924, "learning_rate": 7.2873562080355045e-06, "loss": 0.5957, "step": 12744 }, { "epoch": 1.7996328720700367, "grad_norm": 3.61441807486885, "learning_rate": 7.285888946157067e-06, "loss": 0.6229, "step": 12745 }, { "epoch": 1.7997740751200226, "grad_norm": 3.874448609758326, "learning_rate": 7.284421747353396e-06, "loss": 0.6714, "step": 12746 }, { "epoch": 1.7999152781700085, "grad_norm": 3.1205731565476236, "learning_rate": 7.282954611658593e-06, "loss": 0.4913, "step": 12747 }, { "epoch": 1.8000564812199944, "grad_norm": 3.6368251397556226, "learning_rate": 7.281487539106752e-06, "loss": 0.3863, "step": 12748 }, { "epoch": 1.8001976842699803, "grad_norm": 4.237562202468708, "learning_rate": 7.2800205297319635e-06, "loss": 0.5885, "step": 12749 }, { "epoch": 1.8003388873199662, "grad_norm": 3.9165445804160797, "learning_rate": 7.278553583568326e-06, "loss": 0.7061, "step": 12750 }, { "epoch": 1.800480090369952, "grad_norm": 3.9708163610009493, "learning_rate": 7.277086700649929e-06, "loss": 0.5641, "step": 12751 }, { "epoch": 1.8006212934199377, "grad_norm": 3.708905010268396, "learning_rate": 7.275619881010861e-06, "loss": 0.4969, "step": 12752 }, { "epoch": 1.8007624964699236, "grad_norm": 2.8383631044211186, "learning_rate": 7.27415312468521e-06, "loss": 0.4671, "step": 12753 }, { "epoch": 1.8009036995199095, "grad_norm": 3.268540859762566, "learning_rate": 7.27268643170706e-06, "loss": 0.4352, "step": 12754 }, { "epoch": 1.8010449025698954, "grad_norm": 3.8921672134913607, "learning_rate": 7.271219802110503e-06, "loss": 0.6199, "step": 12755 }, { "epoch": 1.8011861056198812, "grad_norm": 3.335159707890712, "learning_rate": 7.269753235929617e-06, "loss": 0.5978, "step": 12756 }, { "epoch": 1.8013273086698671, "grad_norm": 3.6669913236960343, "learning_rate": 7.268286733198488e-06, "loss": 0.5357, "step": 12757 }, { "epoch": 1.801468511719853, "grad_norm": 3.105498746146766, "learning_rate": 7.2668202939511946e-06, "loss": 0.4947, "step": 12758 }, { "epoch": 1.8016097147698389, "grad_norm": 3.744543631961202, "learning_rate": 7.265353918221818e-06, "loss": 0.5395, "step": 12759 }, { "epoch": 1.8017509178198248, "grad_norm": 3.4286925834638184, "learning_rate": 7.263887606044437e-06, "loss": 0.494, "step": 12760 }, { "epoch": 1.8018921208698107, "grad_norm": 4.277088564299397, "learning_rate": 7.262421357453126e-06, "loss": 0.6537, "step": 12761 }, { "epoch": 1.8020333239197965, "grad_norm": 3.773746061883331, "learning_rate": 7.260955172481959e-06, "loss": 0.5608, "step": 12762 }, { "epoch": 1.8021745269697824, "grad_norm": 4.028856070380123, "learning_rate": 7.2594890511650095e-06, "loss": 0.5824, "step": 12763 }, { "epoch": 1.8023157300197683, "grad_norm": 3.9433985389919854, "learning_rate": 7.258022993536352e-06, "loss": 0.5229, "step": 12764 }, { "epoch": 1.8024569330697542, "grad_norm": 4.015588004241889, "learning_rate": 7.2565569996300575e-06, "loss": 0.7205, "step": 12765 }, { "epoch": 1.80259813611974, "grad_norm": 3.987801265229566, "learning_rate": 7.2550910694801905e-06, "loss": 0.5594, "step": 12766 }, { "epoch": 1.802739339169726, "grad_norm": 4.186581520816828, "learning_rate": 7.253625203120823e-06, "loss": 0.572, "step": 12767 }, { "epoch": 1.8028805422197118, "grad_norm": 3.4068339951964326, "learning_rate": 7.2521594005860205e-06, "loss": 0.5226, "step": 12768 }, { "epoch": 1.8030217452696977, "grad_norm": 3.4934247803673513, "learning_rate": 7.2506936619098486e-06, "loss": 0.6156, "step": 12769 }, { "epoch": 1.8031629483196836, "grad_norm": 3.6421779793365947, "learning_rate": 7.249227987126368e-06, "loss": 0.4641, "step": 12770 }, { "epoch": 1.8033041513696695, "grad_norm": 4.659846481647254, "learning_rate": 7.247762376269638e-06, "loss": 0.6729, "step": 12771 }, { "epoch": 1.8034453544196554, "grad_norm": 3.6451761548937327, "learning_rate": 7.246296829373723e-06, "loss": 0.5617, "step": 12772 }, { "epoch": 1.8035865574696412, "grad_norm": 4.270266678295027, "learning_rate": 7.2448313464726805e-06, "loss": 0.6425, "step": 12773 }, { "epoch": 1.8037277605196271, "grad_norm": 3.745646676164853, "learning_rate": 7.24336592760057e-06, "loss": 0.5797, "step": 12774 }, { "epoch": 1.803868963569613, "grad_norm": 3.3680875064801157, "learning_rate": 7.2419005727914405e-06, "loss": 0.4409, "step": 12775 }, { "epoch": 1.804010166619599, "grad_norm": 4.343770900463015, "learning_rate": 7.240435282079352e-06, "loss": 0.5915, "step": 12776 }, { "epoch": 1.8041513696695848, "grad_norm": 3.82625734261806, "learning_rate": 7.23897005549836e-06, "loss": 0.6282, "step": 12777 }, { "epoch": 1.8042925727195707, "grad_norm": 3.311083247848096, "learning_rate": 7.237504893082507e-06, "loss": 0.5909, "step": 12778 }, { "epoch": 1.8044337757695565, "grad_norm": 3.6813289229231843, "learning_rate": 7.23603979486585e-06, "loss": 0.6215, "step": 12779 }, { "epoch": 1.8045749788195424, "grad_norm": 4.074242640755421, "learning_rate": 7.234574760882431e-06, "loss": 0.6597, "step": 12780 }, { "epoch": 1.8047161818695283, "grad_norm": 3.153750295800099, "learning_rate": 7.233109791166302e-06, "loss": 0.4488, "step": 12781 }, { "epoch": 1.8048573849195142, "grad_norm": 3.67938824784539, "learning_rate": 7.2316448857515076e-06, "loss": 0.4815, "step": 12782 }, { "epoch": 1.8049985879695, "grad_norm": 3.3171524152038545, "learning_rate": 7.230180044672088e-06, "loss": 0.4601, "step": 12783 }, { "epoch": 1.805139791019486, "grad_norm": 3.4718927648950526, "learning_rate": 7.22871526796209e-06, "loss": 0.4943, "step": 12784 }, { "epoch": 1.8052809940694718, "grad_norm": 3.283474010684326, "learning_rate": 7.2272505556555515e-06, "loss": 0.4923, "step": 12785 }, { "epoch": 1.8054221971194577, "grad_norm": 3.2633641780529987, "learning_rate": 7.225785907786516e-06, "loss": 0.4553, "step": 12786 }, { "epoch": 1.8055634001694436, "grad_norm": 3.537788569823869, "learning_rate": 7.2243213243890146e-06, "loss": 0.4854, "step": 12787 }, { "epoch": 1.8057046032194295, "grad_norm": 4.100864459972184, "learning_rate": 7.222856805497087e-06, "loss": 0.6192, "step": 12788 }, { "epoch": 1.8058458062694154, "grad_norm": 2.3592471401073594, "learning_rate": 7.221392351144767e-06, "loss": 0.3515, "step": 12789 }, { "epoch": 1.8059870093194013, "grad_norm": 3.0962669278812935, "learning_rate": 7.219927961366091e-06, "loss": 0.5731, "step": 12790 }, { "epoch": 1.8061282123693871, "grad_norm": 3.506394869642217, "learning_rate": 7.218463636195088e-06, "loss": 0.5345, "step": 12791 }, { "epoch": 1.806269415419373, "grad_norm": 3.3156456595344657, "learning_rate": 7.216999375665785e-06, "loss": 0.509, "step": 12792 }, { "epoch": 1.806410618469359, "grad_norm": 3.6154694903700886, "learning_rate": 7.215535179812219e-06, "loss": 0.5022, "step": 12793 }, { "epoch": 1.8065518215193448, "grad_norm": 4.283454322570586, "learning_rate": 7.214071048668414e-06, "loss": 0.7146, "step": 12794 }, { "epoch": 1.8066930245693307, "grad_norm": 3.219941638795533, "learning_rate": 7.212606982268393e-06, "loss": 0.4696, "step": 12795 }, { "epoch": 1.8068342276193166, "grad_norm": 3.496864381648772, "learning_rate": 7.211142980646182e-06, "loss": 0.5463, "step": 12796 }, { "epoch": 1.8069754306693024, "grad_norm": 3.449938999060595, "learning_rate": 7.209679043835801e-06, "loss": 0.5483, "step": 12797 }, { "epoch": 1.8071166337192883, "grad_norm": 3.5003768889564197, "learning_rate": 7.208215171871277e-06, "loss": 0.5336, "step": 12798 }, { "epoch": 1.8072578367692742, "grad_norm": 3.2777859876847995, "learning_rate": 7.206751364786626e-06, "loss": 0.4789, "step": 12799 }, { "epoch": 1.80739903981926, "grad_norm": 3.285162195889472, "learning_rate": 7.205287622615866e-06, "loss": 0.6362, "step": 12800 }, { "epoch": 1.807540242869246, "grad_norm": 3.044489379421284, "learning_rate": 7.203823945393015e-06, "loss": 0.4519, "step": 12801 }, { "epoch": 1.8076814459192319, "grad_norm": 3.625506880589648, "learning_rate": 7.20236033315209e-06, "loss": 0.5507, "step": 12802 }, { "epoch": 1.8078226489692177, "grad_norm": 3.620976522708403, "learning_rate": 7.200896785927102e-06, "loss": 0.4848, "step": 12803 }, { "epoch": 1.8079638520192036, "grad_norm": 4.149646574664961, "learning_rate": 7.199433303752064e-06, "loss": 0.6335, "step": 12804 }, { "epoch": 1.8081050550691895, "grad_norm": 3.8456707744668672, "learning_rate": 7.1979698866609845e-06, "loss": 0.6142, "step": 12805 }, { "epoch": 1.8082462581191754, "grad_norm": 3.4916951947600867, "learning_rate": 7.196506534687875e-06, "loss": 0.5288, "step": 12806 }, { "epoch": 1.8083874611691613, "grad_norm": 3.550521145222937, "learning_rate": 7.195043247866744e-06, "loss": 0.4856, "step": 12807 }, { "epoch": 1.8085286642191472, "grad_norm": 3.9811455053795552, "learning_rate": 7.193580026231596e-06, "loss": 0.5725, "step": 12808 }, { "epoch": 1.808669867269133, "grad_norm": 3.780150826519457, "learning_rate": 7.192116869816434e-06, "loss": 0.6369, "step": 12809 }, { "epoch": 1.808811070319119, "grad_norm": 3.529892229354472, "learning_rate": 7.190653778655267e-06, "loss": 0.5214, "step": 12810 }, { "epoch": 1.8089522733691048, "grad_norm": 3.053511670228956, "learning_rate": 7.1891907527820935e-06, "loss": 0.4901, "step": 12811 }, { "epoch": 1.8090934764190907, "grad_norm": 3.5511600211171794, "learning_rate": 7.187727792230912e-06, "loss": 0.5284, "step": 12812 }, { "epoch": 1.8092346794690766, "grad_norm": 2.869117548534986, "learning_rate": 7.186264897035722e-06, "loss": 0.373, "step": 12813 }, { "epoch": 1.8093758825190625, "grad_norm": 3.6839313224243826, "learning_rate": 7.184802067230518e-06, "loss": 0.5474, "step": 12814 }, { "epoch": 1.8095170855690483, "grad_norm": 3.8759859764280913, "learning_rate": 7.1833393028492996e-06, "loss": 0.5487, "step": 12815 }, { "epoch": 1.8096582886190342, "grad_norm": 3.010458671370947, "learning_rate": 7.18187660392606e-06, "loss": 0.481, "step": 12816 }, { "epoch": 1.80979949166902, "grad_norm": 3.432457030780017, "learning_rate": 7.1804139704947904e-06, "loss": 0.4258, "step": 12817 }, { "epoch": 1.809940694719006, "grad_norm": 3.4021210063784864, "learning_rate": 7.178951402589482e-06, "loss": 0.5436, "step": 12818 }, { "epoch": 1.8100818977689919, "grad_norm": 3.4804388177646066, "learning_rate": 7.17748890024413e-06, "loss": 0.5784, "step": 12819 }, { "epoch": 1.8102231008189777, "grad_norm": 3.5972704113643683, "learning_rate": 7.176026463492711e-06, "loss": 0.5205, "step": 12820 }, { "epoch": 1.8103643038689636, "grad_norm": 3.5505622583408862, "learning_rate": 7.17456409236922e-06, "loss": 0.5644, "step": 12821 }, { "epoch": 1.8105055069189495, "grad_norm": 3.2008258259016173, "learning_rate": 7.173101786907638e-06, "loss": 0.4622, "step": 12822 }, { "epoch": 1.8106467099689354, "grad_norm": 3.5318862895721104, "learning_rate": 7.171639547141949e-06, "loss": 0.5232, "step": 12823 }, { "epoch": 1.8107879130189213, "grad_norm": 3.4537266409057246, "learning_rate": 7.170177373106135e-06, "loss": 0.4863, "step": 12824 }, { "epoch": 1.8109291160689072, "grad_norm": 4.4507994291648245, "learning_rate": 7.168715264834178e-06, "loss": 0.679, "step": 12825 }, { "epoch": 1.811070319118893, "grad_norm": 4.387622575291305, "learning_rate": 7.167253222360056e-06, "loss": 0.6606, "step": 12826 }, { "epoch": 1.811211522168879, "grad_norm": 3.6084162764227705, "learning_rate": 7.165791245717745e-06, "loss": 0.5252, "step": 12827 }, { "epoch": 1.8113527252188648, "grad_norm": 3.909371690224482, "learning_rate": 7.164329334941225e-06, "loss": 0.5539, "step": 12828 }, { "epoch": 1.8114939282688507, "grad_norm": 3.761701722849796, "learning_rate": 7.162867490064463e-06, "loss": 0.5801, "step": 12829 }, { "epoch": 1.8116351313188366, "grad_norm": 4.289497338165475, "learning_rate": 7.161405711121436e-06, "loss": 0.6557, "step": 12830 }, { "epoch": 1.8117763343688225, "grad_norm": 3.8987627290864264, "learning_rate": 7.159943998146114e-06, "loss": 0.6136, "step": 12831 }, { "epoch": 1.8119175374188083, "grad_norm": 3.884263279689895, "learning_rate": 7.158482351172465e-06, "loss": 0.5894, "step": 12832 }, { "epoch": 1.8120587404687942, "grad_norm": 4.098844885359488, "learning_rate": 7.157020770234462e-06, "loss": 0.6347, "step": 12833 }, { "epoch": 1.81219994351878, "grad_norm": 3.408974804574832, "learning_rate": 7.155559255366067e-06, "loss": 0.5712, "step": 12834 }, { "epoch": 1.812341146568766, "grad_norm": 3.794150336662539, "learning_rate": 7.154097806601246e-06, "loss": 0.5948, "step": 12835 }, { "epoch": 1.8124823496187519, "grad_norm": 3.3815157301751766, "learning_rate": 7.152636423973969e-06, "loss": 0.5855, "step": 12836 }, { "epoch": 1.8126235526687378, "grad_norm": 3.493423415677898, "learning_rate": 7.151175107518185e-06, "loss": 0.5609, "step": 12837 }, { "epoch": 1.8127647557187236, "grad_norm": 3.4237426949607115, "learning_rate": 7.149713857267862e-06, "loss": 0.5316, "step": 12838 }, { "epoch": 1.8129059587687095, "grad_norm": 3.3766753050264846, "learning_rate": 7.148252673256959e-06, "loss": 0.4548, "step": 12839 }, { "epoch": 1.8130471618186954, "grad_norm": 3.02548650481774, "learning_rate": 7.146791555519431e-06, "loss": 0.4517, "step": 12840 }, { "epoch": 1.8131883648686813, "grad_norm": 3.170876950611142, "learning_rate": 7.145330504089236e-06, "loss": 0.4711, "step": 12841 }, { "epoch": 1.8133295679186672, "grad_norm": 4.212236301973642, "learning_rate": 7.143869519000328e-06, "loss": 0.7011, "step": 12842 }, { "epoch": 1.813470770968653, "grad_norm": 3.261385322319014, "learning_rate": 7.14240860028666e-06, "loss": 0.4848, "step": 12843 }, { "epoch": 1.813611974018639, "grad_norm": 3.2961521373568874, "learning_rate": 7.140947747982179e-06, "loss": 0.4457, "step": 12844 }, { "epoch": 1.8137531770686248, "grad_norm": 3.1731788247630495, "learning_rate": 7.139486962120841e-06, "loss": 0.4463, "step": 12845 }, { "epoch": 1.8138943801186107, "grad_norm": 4.592416823032784, "learning_rate": 7.1380262427365885e-06, "loss": 0.6016, "step": 12846 }, { "epoch": 1.8140355831685966, "grad_norm": 3.1959642312972614, "learning_rate": 7.136565589863371e-06, "loss": 0.4942, "step": 12847 }, { "epoch": 1.8141767862185825, "grad_norm": 3.8194520166813, "learning_rate": 7.135105003535132e-06, "loss": 0.6647, "step": 12848 }, { "epoch": 1.8143179892685684, "grad_norm": 4.117756177907205, "learning_rate": 7.133644483785814e-06, "loss": 0.6815, "step": 12849 }, { "epoch": 1.8144591923185542, "grad_norm": 3.4438750250954193, "learning_rate": 7.1321840306493625e-06, "loss": 0.5341, "step": 12850 }, { "epoch": 1.8146003953685401, "grad_norm": 3.9340419976386753, "learning_rate": 7.130723644159715e-06, "loss": 0.6784, "step": 12851 }, { "epoch": 1.814741598418526, "grad_norm": 5.181105243087843, "learning_rate": 7.129263324350808e-06, "loss": 0.812, "step": 12852 }, { "epoch": 1.8148828014685119, "grad_norm": 3.24221118849084, "learning_rate": 7.1278030712565896e-06, "loss": 0.5055, "step": 12853 }, { "epoch": 1.8150240045184975, "grad_norm": 4.095749801217945, "learning_rate": 7.1263428849109805e-06, "loss": 0.6455, "step": 12854 }, { "epoch": 1.8151652075684834, "grad_norm": 3.9353727956222198, "learning_rate": 7.124882765347923e-06, "loss": 0.512, "step": 12855 }, { "epoch": 1.8153064106184693, "grad_norm": 4.048878759200474, "learning_rate": 7.123422712601349e-06, "loss": 0.5852, "step": 12856 }, { "epoch": 1.8154476136684552, "grad_norm": 3.725134924002504, "learning_rate": 7.121962726705187e-06, "loss": 0.5781, "step": 12857 }, { "epoch": 1.815588816718441, "grad_norm": 3.1205570886717573, "learning_rate": 7.1205028076933705e-06, "loss": 0.4256, "step": 12858 }, { "epoch": 1.815730019768427, "grad_norm": 3.4264098254931694, "learning_rate": 7.119042955599824e-06, "loss": 0.6082, "step": 12859 }, { "epoch": 1.8158712228184128, "grad_norm": 3.3652304244043885, "learning_rate": 7.117583170458478e-06, "loss": 0.4821, "step": 12860 }, { "epoch": 1.8160124258683987, "grad_norm": 4.033551001052538, "learning_rate": 7.116123452303248e-06, "loss": 0.5044, "step": 12861 }, { "epoch": 1.8161536289183846, "grad_norm": 4.0040084829129015, "learning_rate": 7.114663801168073e-06, "loss": 0.6021, "step": 12862 }, { "epoch": 1.8162948319683705, "grad_norm": 3.9418016054370755, "learning_rate": 7.113204217086858e-06, "loss": 0.5867, "step": 12863 }, { "epoch": 1.8164360350183564, "grad_norm": 3.298424339274356, "learning_rate": 7.111744700093531e-06, "loss": 0.4045, "step": 12864 }, { "epoch": 1.8165772380683423, "grad_norm": 3.420851941401796, "learning_rate": 7.110285250222011e-06, "loss": 0.4584, "step": 12865 }, { "epoch": 1.8167184411183281, "grad_norm": 3.287380952736182, "learning_rate": 7.108825867506213e-06, "loss": 0.5353, "step": 12866 }, { "epoch": 1.816859644168314, "grad_norm": 3.57345297573272, "learning_rate": 7.107366551980053e-06, "loss": 0.5899, "step": 12867 }, { "epoch": 1.8170008472183, "grad_norm": 3.0369197946473485, "learning_rate": 7.105907303677446e-06, "loss": 0.4798, "step": 12868 }, { "epoch": 1.8171420502682858, "grad_norm": 3.6108539437221, "learning_rate": 7.104448122632302e-06, "loss": 0.5404, "step": 12869 }, { "epoch": 1.8172832533182717, "grad_norm": 3.015423832970976, "learning_rate": 7.102989008878538e-06, "loss": 0.4636, "step": 12870 }, { "epoch": 1.8174244563682576, "grad_norm": 4.2188865769592825, "learning_rate": 7.101529962450053e-06, "loss": 0.5774, "step": 12871 }, { "epoch": 1.8175656594182434, "grad_norm": 3.3373234918907184, "learning_rate": 7.100070983380763e-06, "loss": 0.5251, "step": 12872 }, { "epoch": 1.8177068624682293, "grad_norm": 2.9330609149093254, "learning_rate": 7.098612071704569e-06, "loss": 0.4617, "step": 12873 }, { "epoch": 1.8178480655182152, "grad_norm": 3.7339006018759204, "learning_rate": 7.097153227455379e-06, "loss": 0.6237, "step": 12874 }, { "epoch": 1.817989268568201, "grad_norm": 3.3856046501674424, "learning_rate": 7.0956944506670915e-06, "loss": 0.542, "step": 12875 }, { "epoch": 1.818130471618187, "grad_norm": 4.293288920298669, "learning_rate": 7.0942357413736116e-06, "loss": 0.6428, "step": 12876 }, { "epoch": 1.8182716746681729, "grad_norm": 2.661555044523843, "learning_rate": 7.092777099608841e-06, "loss": 0.442, "step": 12877 }, { "epoch": 1.8184128777181587, "grad_norm": 3.6859024310855393, "learning_rate": 7.091318525406671e-06, "loss": 0.5181, "step": 12878 }, { "epoch": 1.8185540807681446, "grad_norm": 3.1742771846401197, "learning_rate": 7.0898600188010095e-06, "loss": 0.473, "step": 12879 }, { "epoch": 1.8186952838181305, "grad_norm": 4.195754624080737, "learning_rate": 7.0884015798257365e-06, "loss": 0.617, "step": 12880 }, { "epoch": 1.8188364868681164, "grad_norm": 4.69524693322813, "learning_rate": 7.086943208514756e-06, "loss": 0.7408, "step": 12881 }, { "epoch": 1.8189776899181023, "grad_norm": 2.972310631206243, "learning_rate": 7.0854849049019584e-06, "loss": 0.4099, "step": 12882 }, { "epoch": 1.8191188929680882, "grad_norm": 3.3687230133511874, "learning_rate": 7.084026669021231e-06, "loss": 0.4717, "step": 12883 }, { "epoch": 1.819260096018074, "grad_norm": 3.7843386927456915, "learning_rate": 7.082568500906466e-06, "loss": 0.6699, "step": 12884 }, { "epoch": 1.81940129906806, "grad_norm": 3.682661043652068, "learning_rate": 7.081110400591549e-06, "loss": 0.5295, "step": 12885 }, { "epoch": 1.8195425021180458, "grad_norm": 2.8416972266776113, "learning_rate": 7.0796523681103635e-06, "loss": 0.4449, "step": 12886 }, { "epoch": 1.8196837051680317, "grad_norm": 3.470541658907999, "learning_rate": 7.0781944034968005e-06, "loss": 0.5316, "step": 12887 }, { "epoch": 1.8198249082180173, "grad_norm": 4.172147579996848, "learning_rate": 7.076736506784734e-06, "loss": 0.6595, "step": 12888 }, { "epoch": 1.8199661112680032, "grad_norm": 3.273681122943454, "learning_rate": 7.07527867800805e-06, "loss": 0.5196, "step": 12889 }, { "epoch": 1.8201073143179891, "grad_norm": 3.8661528676453663, "learning_rate": 7.073820917200627e-06, "loss": 0.6009, "step": 12890 }, { "epoch": 1.820248517367975, "grad_norm": 3.5537568982398278, "learning_rate": 7.07236322439634e-06, "loss": 0.5928, "step": 12891 }, { "epoch": 1.8203897204179609, "grad_norm": 3.7316836035607146, "learning_rate": 7.070905599629068e-06, "loss": 0.6217, "step": 12892 }, { "epoch": 1.8205309234679468, "grad_norm": 4.180834001327106, "learning_rate": 7.069448042932685e-06, "loss": 0.6357, "step": 12893 }, { "epoch": 1.8206721265179326, "grad_norm": 4.571762484979292, "learning_rate": 7.067990554341064e-06, "loss": 0.6285, "step": 12894 }, { "epoch": 1.8208133295679185, "grad_norm": 3.332436260374653, "learning_rate": 7.066533133888074e-06, "loss": 0.5565, "step": 12895 }, { "epoch": 1.8209545326179044, "grad_norm": 4.1325157189700175, "learning_rate": 7.065075781607594e-06, "loss": 0.5795, "step": 12896 }, { "epoch": 1.8210957356678903, "grad_norm": 3.962848640363837, "learning_rate": 7.06361849753348e-06, "loss": 0.5845, "step": 12897 }, { "epoch": 1.8212369387178762, "grad_norm": 3.4771717415279393, "learning_rate": 7.062161281699605e-06, "loss": 0.5677, "step": 12898 }, { "epoch": 1.821378141767862, "grad_norm": 3.271499791608426, "learning_rate": 7.060704134139833e-06, "loss": 0.545, "step": 12899 }, { "epoch": 1.821519344817848, "grad_norm": 3.155150276888311, "learning_rate": 7.059247054888025e-06, "loss": 0.4396, "step": 12900 }, { "epoch": 1.8216605478678338, "grad_norm": 3.3192578487232067, "learning_rate": 7.057790043978049e-06, "loss": 0.4588, "step": 12901 }, { "epoch": 1.8218017509178197, "grad_norm": 3.8616721500280535, "learning_rate": 7.056333101443761e-06, "loss": 0.5879, "step": 12902 }, { "epoch": 1.8219429539678056, "grad_norm": 3.175853212801913, "learning_rate": 7.054876227319021e-06, "loss": 0.4731, "step": 12903 }, { "epoch": 1.8220841570177915, "grad_norm": 2.8954068624408817, "learning_rate": 7.053419421637686e-06, "loss": 0.4487, "step": 12904 }, { "epoch": 1.8222253600677774, "grad_norm": 3.2322616949485576, "learning_rate": 7.051962684433609e-06, "loss": 0.5377, "step": 12905 }, { "epoch": 1.8223665631177632, "grad_norm": 3.745735070647311, "learning_rate": 7.050506015740646e-06, "loss": 0.5995, "step": 12906 }, { "epoch": 1.8225077661677491, "grad_norm": 3.771105120529124, "learning_rate": 7.04904941559265e-06, "loss": 0.4554, "step": 12907 }, { "epoch": 1.822648969217735, "grad_norm": 3.3877731431226428, "learning_rate": 7.047592884023473e-06, "loss": 0.4849, "step": 12908 }, { "epoch": 1.8227901722677209, "grad_norm": 3.382403822786309, "learning_rate": 7.046136421066958e-06, "loss": 0.5002, "step": 12909 }, { "epoch": 1.8229313753177068, "grad_norm": 3.438775793625825, "learning_rate": 7.04468002675696e-06, "loss": 0.3849, "step": 12910 }, { "epoch": 1.8230725783676927, "grad_norm": 3.798581986970123, "learning_rate": 7.043223701127322e-06, "loss": 0.5639, "step": 12911 }, { "epoch": 1.8232137814176785, "grad_norm": 3.855478516877096, "learning_rate": 7.041767444211886e-06, "loss": 0.5847, "step": 12912 }, { "epoch": 1.8233549844676644, "grad_norm": 3.4640605423079034, "learning_rate": 7.0403112560445035e-06, "loss": 0.5314, "step": 12913 }, { "epoch": 1.8234961875176503, "grad_norm": 2.837153723373047, "learning_rate": 7.038855136659002e-06, "loss": 0.4539, "step": 12914 }, { "epoch": 1.8236373905676362, "grad_norm": 3.8393920388939504, "learning_rate": 7.0373990860892316e-06, "loss": 0.621, "step": 12915 }, { "epoch": 1.823778593617622, "grad_norm": 3.907877872451048, "learning_rate": 7.035943104369026e-06, "loss": 0.6964, "step": 12916 }, { "epoch": 1.823919796667608, "grad_norm": 3.46895419756258, "learning_rate": 7.034487191532221e-06, "loss": 0.5484, "step": 12917 }, { "epoch": 1.8240609997175938, "grad_norm": 3.042037436344643, "learning_rate": 7.033031347612655e-06, "loss": 0.5172, "step": 12918 }, { "epoch": 1.8242022027675797, "grad_norm": 3.4928262045145053, "learning_rate": 7.031575572644159e-06, "loss": 0.4728, "step": 12919 }, { "epoch": 1.8243434058175656, "grad_norm": 3.310749770472833, "learning_rate": 7.030119866660565e-06, "loss": 0.479, "step": 12920 }, { "epoch": 1.8244846088675515, "grad_norm": 4.330850869752965, "learning_rate": 7.028664229695705e-06, "loss": 0.5467, "step": 12921 }, { "epoch": 1.8246258119175374, "grad_norm": 4.326678458502234, "learning_rate": 7.027208661783402e-06, "loss": 0.5656, "step": 12922 }, { "epoch": 1.8247670149675232, "grad_norm": 3.034908152997462, "learning_rate": 7.025753162957485e-06, "loss": 0.5014, "step": 12923 }, { "epoch": 1.8249082180175091, "grad_norm": 3.2159150696126533, "learning_rate": 7.024297733251781e-06, "loss": 0.4997, "step": 12924 }, { "epoch": 1.825049421067495, "grad_norm": 4.084357067061361, "learning_rate": 7.022842372700114e-06, "loss": 0.5331, "step": 12925 }, { "epoch": 1.825190624117481, "grad_norm": 4.430834825734025, "learning_rate": 7.021387081336302e-06, "loss": 0.7386, "step": 12926 }, { "epoch": 1.8253318271674668, "grad_norm": 3.7519976370525225, "learning_rate": 7.019931859194169e-06, "loss": 0.4744, "step": 12927 }, { "epoch": 1.8254730302174527, "grad_norm": 3.110141799865905, "learning_rate": 7.018476706307533e-06, "loss": 0.5555, "step": 12928 }, { "epoch": 1.8256142332674385, "grad_norm": 3.049452026967664, "learning_rate": 7.017021622710208e-06, "loss": 0.4372, "step": 12929 }, { "epoch": 1.8257554363174244, "grad_norm": 3.1151367330581317, "learning_rate": 7.01556660843602e-06, "loss": 0.4619, "step": 12930 }, { "epoch": 1.8258966393674103, "grad_norm": 4.201986617164945, "learning_rate": 7.014111663518768e-06, "loss": 0.6328, "step": 12931 }, { "epoch": 1.8260378424173962, "grad_norm": 4.457439444604092, "learning_rate": 7.0126567879922735e-06, "loss": 0.7733, "step": 12932 }, { "epoch": 1.826179045467382, "grad_norm": 4.36747949866512, "learning_rate": 7.011201981890345e-06, "loss": 0.5876, "step": 12933 }, { "epoch": 1.826320248517368, "grad_norm": 3.6606938349099116, "learning_rate": 7.009747245246792e-06, "loss": 0.5329, "step": 12934 }, { "epoch": 1.8264614515673538, "grad_norm": 2.9492616199750206, "learning_rate": 7.008292578095419e-06, "loss": 0.454, "step": 12935 }, { "epoch": 1.8266026546173397, "grad_norm": 3.4802303868372073, "learning_rate": 7.006837980470038e-06, "loss": 0.5824, "step": 12936 }, { "epoch": 1.8267438576673256, "grad_norm": 3.5004929693023725, "learning_rate": 7.0053834524044485e-06, "loss": 0.5644, "step": 12937 }, { "epoch": 1.8268850607173115, "grad_norm": 3.6414346385005785, "learning_rate": 7.003928993932456e-06, "loss": 0.5573, "step": 12938 }, { "epoch": 1.8270262637672974, "grad_norm": 4.415182299903268, "learning_rate": 7.002474605087859e-06, "loss": 0.6296, "step": 12939 }, { "epoch": 1.8271674668172833, "grad_norm": 4.419292363189847, "learning_rate": 7.001020285904454e-06, "loss": 0.6941, "step": 12940 }, { "epoch": 1.8273086698672691, "grad_norm": 3.532028696670464, "learning_rate": 6.9995660364160455e-06, "loss": 0.5155, "step": 12941 }, { "epoch": 1.827449872917255, "grad_norm": 3.8983695251116384, "learning_rate": 6.998111856656427e-06, "loss": 0.6924, "step": 12942 }, { "epoch": 1.827591075967241, "grad_norm": 4.119268997257333, "learning_rate": 6.99665774665939e-06, "loss": 0.573, "step": 12943 }, { "epoch": 1.8277322790172268, "grad_norm": 3.478562512655586, "learning_rate": 6.995203706458731e-06, "loss": 0.6019, "step": 12944 }, { "epoch": 1.8278734820672127, "grad_norm": 3.5515429282841007, "learning_rate": 6.993749736088241e-06, "loss": 0.5283, "step": 12945 }, { "epoch": 1.8280146851171986, "grad_norm": 3.4037633162633596, "learning_rate": 6.9922958355817085e-06, "loss": 0.5047, "step": 12946 }, { "epoch": 1.8281558881671844, "grad_norm": 3.3683701889140614, "learning_rate": 6.9908420049729244e-06, "loss": 0.6367, "step": 12947 }, { "epoch": 1.8282970912171703, "grad_norm": 3.3998008631200336, "learning_rate": 6.989388244295669e-06, "loss": 0.5997, "step": 12948 }, { "epoch": 1.8284382942671562, "grad_norm": 5.050720707850807, "learning_rate": 6.987934553583732e-06, "loss": 0.6887, "step": 12949 }, { "epoch": 1.828579497317142, "grad_norm": 4.017842694274579, "learning_rate": 6.9864809328708945e-06, "loss": 0.5307, "step": 12950 }, { "epoch": 1.828720700367128, "grad_norm": 4.015390314737476, "learning_rate": 6.9850273821909395e-06, "loss": 0.7019, "step": 12951 }, { "epoch": 1.8288619034171139, "grad_norm": 3.478739896118804, "learning_rate": 6.983573901577644e-06, "loss": 0.575, "step": 12952 }, { "epoch": 1.8290031064670997, "grad_norm": 3.3442106589343323, "learning_rate": 6.98212049106479e-06, "loss": 0.4844, "step": 12953 }, { "epoch": 1.8291443095170856, "grad_norm": 3.853777997147361, "learning_rate": 6.980667150686153e-06, "loss": 0.5272, "step": 12954 }, { "epoch": 1.8292855125670715, "grad_norm": 3.220241348100374, "learning_rate": 6.979213880475509e-06, "loss": 0.4219, "step": 12955 }, { "epoch": 1.8294267156170574, "grad_norm": 3.82295888260213, "learning_rate": 6.977760680466628e-06, "loss": 0.5854, "step": 12956 }, { "epoch": 1.8295679186670433, "grad_norm": 3.205860338350853, "learning_rate": 6.976307550693282e-06, "loss": 0.5569, "step": 12957 }, { "epoch": 1.8297091217170292, "grad_norm": 3.445479321651828, "learning_rate": 6.974854491189243e-06, "loss": 0.5719, "step": 12958 }, { "epoch": 1.829850324767015, "grad_norm": 3.0506326938637187, "learning_rate": 6.973401501988282e-06, "loss": 0.4373, "step": 12959 }, { "epoch": 1.829991527817001, "grad_norm": 3.0765396251341195, "learning_rate": 6.971948583124159e-06, "loss": 0.494, "step": 12960 }, { "epoch": 1.8301327308669868, "grad_norm": 3.710493014480271, "learning_rate": 6.970495734630644e-06, "loss": 0.5816, "step": 12961 }, { "epoch": 1.8302739339169727, "grad_norm": 3.298814896555165, "learning_rate": 6.969042956541503e-06, "loss": 0.4884, "step": 12962 }, { "epoch": 1.8304151369669586, "grad_norm": 3.1585775646162046, "learning_rate": 6.967590248890494e-06, "loss": 0.5538, "step": 12963 }, { "epoch": 1.8305563400169444, "grad_norm": 3.634156383268282, "learning_rate": 6.9661376117113775e-06, "loss": 0.5873, "step": 12964 }, { "epoch": 1.8306975430669303, "grad_norm": 3.1868988096888176, "learning_rate": 6.9646850450379135e-06, "loss": 0.466, "step": 12965 }, { "epoch": 1.8308387461169162, "grad_norm": 2.8879852298620534, "learning_rate": 6.963232548903853e-06, "loss": 0.4434, "step": 12966 }, { "epoch": 1.830979949166902, "grad_norm": 4.0245401590934495, "learning_rate": 6.961780123342962e-06, "loss": 0.6467, "step": 12967 }, { "epoch": 1.831121152216888, "grad_norm": 4.078065828583929, "learning_rate": 6.960327768388987e-06, "loss": 0.6642, "step": 12968 }, { "epoch": 1.8312623552668739, "grad_norm": 3.4408978974910207, "learning_rate": 6.95887548407568e-06, "loss": 0.4727, "step": 12969 }, { "epoch": 1.8314035583168597, "grad_norm": 3.377404608305792, "learning_rate": 6.957423270436796e-06, "loss": 0.5405, "step": 12970 }, { "epoch": 1.8315447613668456, "grad_norm": 2.850221783227584, "learning_rate": 6.95597112750608e-06, "loss": 0.4097, "step": 12971 }, { "epoch": 1.8316859644168315, "grad_norm": 4.236598820886305, "learning_rate": 6.9545190553172836e-06, "loss": 0.7268, "step": 12972 }, { "epoch": 1.8318271674668174, "grad_norm": 4.004826606188797, "learning_rate": 6.953067053904148e-06, "loss": 0.5986, "step": 12973 }, { "epoch": 1.8319683705168033, "grad_norm": 4.394742659423986, "learning_rate": 6.951615123300415e-06, "loss": 0.5886, "step": 12974 }, { "epoch": 1.8321095735667892, "grad_norm": 4.245458123013903, "learning_rate": 6.950163263539832e-06, "loss": 0.6157, "step": 12975 }, { "epoch": 1.832250776616775, "grad_norm": 3.538521373921943, "learning_rate": 6.948711474656138e-06, "loss": 0.4566, "step": 12976 }, { "epoch": 1.832391979666761, "grad_norm": 3.01240469021295, "learning_rate": 6.947259756683072e-06, "loss": 0.4359, "step": 12977 }, { "epoch": 1.8325331827167468, "grad_norm": 3.390901435180193, "learning_rate": 6.945808109654366e-06, "loss": 0.52, "step": 12978 }, { "epoch": 1.8326743857667327, "grad_norm": 3.8974968636659555, "learning_rate": 6.944356533603766e-06, "loss": 0.5053, "step": 12979 }, { "epoch": 1.8328155888167186, "grad_norm": 3.95819792918635, "learning_rate": 6.9429050285650015e-06, "loss": 0.5875, "step": 12980 }, { "epoch": 1.8329567918667045, "grad_norm": 3.948153055721032, "learning_rate": 6.941453594571801e-06, "loss": 0.5853, "step": 12981 }, { "epoch": 1.8330979949166903, "grad_norm": 3.3413157512326612, "learning_rate": 6.940002231657899e-06, "loss": 0.5088, "step": 12982 }, { "epoch": 1.8332391979666762, "grad_norm": 4.100716382762518, "learning_rate": 6.93855093985702e-06, "loss": 0.6964, "step": 12983 }, { "epoch": 1.833380401016662, "grad_norm": 3.6980727272691905, "learning_rate": 6.9370997192028995e-06, "loss": 0.5444, "step": 12984 }, { "epoch": 1.833521604066648, "grad_norm": 3.350969296039136, "learning_rate": 6.935648569729258e-06, "loss": 0.5281, "step": 12985 }, { "epoch": 1.8336628071166339, "grad_norm": 3.2090912131890237, "learning_rate": 6.934197491469818e-06, "loss": 0.4458, "step": 12986 }, { "epoch": 1.8338040101666198, "grad_norm": 3.096354032595325, "learning_rate": 6.932746484458306e-06, "loss": 0.4669, "step": 12987 }, { "epoch": 1.8339452132166056, "grad_norm": 3.3447443800897885, "learning_rate": 6.93129554872844e-06, "loss": 0.4481, "step": 12988 }, { "epoch": 1.8340864162665915, "grad_norm": 3.7473248184129537, "learning_rate": 6.929844684313944e-06, "loss": 0.5671, "step": 12989 }, { "epoch": 1.8342276193165772, "grad_norm": 3.9988993363777845, "learning_rate": 6.928393891248529e-06, "loss": 0.7134, "step": 12990 }, { "epoch": 1.834368822366563, "grad_norm": 3.58485593446313, "learning_rate": 6.926943169565912e-06, "loss": 0.5114, "step": 12991 }, { "epoch": 1.834510025416549, "grad_norm": 3.2803229232289475, "learning_rate": 6.92549251929981e-06, "loss": 0.5011, "step": 12992 }, { "epoch": 1.8346512284665348, "grad_norm": 3.313406497025851, "learning_rate": 6.924041940483935e-06, "loss": 0.5176, "step": 12993 }, { "epoch": 1.8347924315165207, "grad_norm": 3.1129437419894157, "learning_rate": 6.922591433151995e-06, "loss": 0.467, "step": 12994 }, { "epoch": 1.8349336345665066, "grad_norm": 3.5439691082162352, "learning_rate": 6.921140997337701e-06, "loss": 0.5441, "step": 12995 }, { "epoch": 1.8350748376164925, "grad_norm": 2.969154537991024, "learning_rate": 6.91969063307476e-06, "loss": 0.4099, "step": 12996 }, { "epoch": 1.8352160406664784, "grad_norm": 3.3955128724458463, "learning_rate": 6.9182403403968835e-06, "loss": 0.4975, "step": 12997 }, { "epoch": 1.8353572437164642, "grad_norm": 3.2582015403075104, "learning_rate": 6.916790119337766e-06, "loss": 0.5208, "step": 12998 }, { "epoch": 1.8354984467664501, "grad_norm": 4.622895253943369, "learning_rate": 6.915339969931116e-06, "loss": 0.6737, "step": 12999 }, { "epoch": 1.835639649816436, "grad_norm": 3.1246992229287733, "learning_rate": 6.913889892210631e-06, "loss": 0.5008, "step": 13000 }, { "epoch": 1.835780852866422, "grad_norm": 3.135299717334731, "learning_rate": 6.912439886210014e-06, "loss": 0.4637, "step": 13001 }, { "epoch": 1.8359220559164078, "grad_norm": 3.7443755135163035, "learning_rate": 6.9109899519629605e-06, "loss": 0.6192, "step": 13002 }, { "epoch": 1.8360632589663937, "grad_norm": 3.905794861537653, "learning_rate": 6.9095400895031635e-06, "loss": 0.5396, "step": 13003 }, { "epoch": 1.8362044620163795, "grad_norm": 3.3812916403383864, "learning_rate": 6.908090298864325e-06, "loss": 0.4497, "step": 13004 }, { "epoch": 1.8363456650663654, "grad_norm": 3.8188608102608663, "learning_rate": 6.9066405800801305e-06, "loss": 0.5911, "step": 13005 }, { "epoch": 1.8364868681163513, "grad_norm": 4.057828120458315, "learning_rate": 6.905190933184275e-06, "loss": 0.5235, "step": 13006 }, { "epoch": 1.8366280711663372, "grad_norm": 5.3349435502988145, "learning_rate": 6.903741358210444e-06, "loss": 0.9251, "step": 13007 }, { "epoch": 1.836769274216323, "grad_norm": 4.383967316408678, "learning_rate": 6.9022918551923245e-06, "loss": 0.5969, "step": 13008 }, { "epoch": 1.836910477266309, "grad_norm": 3.8546171920433983, "learning_rate": 6.900842424163606e-06, "loss": 0.614, "step": 13009 }, { "epoch": 1.8370516803162948, "grad_norm": 3.4380878945794313, "learning_rate": 6.899393065157972e-06, "loss": 0.5289, "step": 13010 }, { "epoch": 1.8371928833662807, "grad_norm": 3.1593764083511684, "learning_rate": 6.897943778209104e-06, "loss": 0.3584, "step": 13011 }, { "epoch": 1.8373340864162666, "grad_norm": 3.0904589831449445, "learning_rate": 6.896494563350681e-06, "loss": 0.4471, "step": 13012 }, { "epoch": 1.8374752894662525, "grad_norm": 3.9474280287847474, "learning_rate": 6.895045420616385e-06, "loss": 0.6063, "step": 13013 }, { "epoch": 1.8376164925162384, "grad_norm": 4.12602026206808, "learning_rate": 6.893596350039896e-06, "loss": 0.6523, "step": 13014 }, { "epoch": 1.8377576955662243, "grad_norm": 3.822764251328042, "learning_rate": 6.8921473516548835e-06, "loss": 0.5942, "step": 13015 }, { "epoch": 1.8378988986162101, "grad_norm": 3.112236983973115, "learning_rate": 6.8906984254950235e-06, "loss": 0.4882, "step": 13016 }, { "epoch": 1.838040101666196, "grad_norm": 3.0450469945870204, "learning_rate": 6.889249571593989e-06, "loss": 0.4571, "step": 13017 }, { "epoch": 1.838181304716182, "grad_norm": 3.5839369223158872, "learning_rate": 6.887800789985452e-06, "loss": 0.5574, "step": 13018 }, { "epoch": 1.8383225077661678, "grad_norm": 3.161950259352857, "learning_rate": 6.88635208070308e-06, "loss": 0.4716, "step": 13019 }, { "epoch": 1.8384637108161537, "grad_norm": 3.464969242668353, "learning_rate": 6.884903443780541e-06, "loss": 0.5346, "step": 13020 }, { "epoch": 1.8386049138661396, "grad_norm": 2.9927967266371818, "learning_rate": 6.883454879251501e-06, "loss": 0.4622, "step": 13021 }, { "epoch": 1.8387461169161254, "grad_norm": 3.6015519063449606, "learning_rate": 6.882006387149625e-06, "loss": 0.6709, "step": 13022 }, { "epoch": 1.8388873199661113, "grad_norm": 3.6341638971962964, "learning_rate": 6.880557967508574e-06, "loss": 0.614, "step": 13023 }, { "epoch": 1.839028523016097, "grad_norm": 3.5506296644123303, "learning_rate": 6.879109620362008e-06, "loss": 0.5115, "step": 13024 }, { "epoch": 1.8391697260660829, "grad_norm": 3.928316636069356, "learning_rate": 6.877661345743587e-06, "loss": 0.6011, "step": 13025 }, { "epoch": 1.8393109291160687, "grad_norm": 4.011640022806216, "learning_rate": 6.876213143686965e-06, "loss": 0.5269, "step": 13026 }, { "epoch": 1.8394521321660546, "grad_norm": 3.6274148876941315, "learning_rate": 6.874765014225804e-06, "loss": 0.5628, "step": 13027 }, { "epoch": 1.8395933352160405, "grad_norm": 4.645423163851433, "learning_rate": 6.873316957393752e-06, "loss": 0.6017, "step": 13028 }, { "epoch": 1.8397345382660264, "grad_norm": 2.7150057290410925, "learning_rate": 6.871868973224462e-06, "loss": 0.4436, "step": 13029 }, { "epoch": 1.8398757413160123, "grad_norm": 3.8046919934852093, "learning_rate": 6.87042106175159e-06, "loss": 0.5471, "step": 13030 }, { "epoch": 1.8400169443659982, "grad_norm": 4.234192039360731, "learning_rate": 6.868973223008781e-06, "loss": 0.644, "step": 13031 }, { "epoch": 1.840158147415984, "grad_norm": 3.1287981791762842, "learning_rate": 6.867525457029682e-06, "loss": 0.4094, "step": 13032 }, { "epoch": 1.84029935046597, "grad_norm": 3.6834451614589065, "learning_rate": 6.866077763847937e-06, "loss": 0.4957, "step": 13033 }, { "epoch": 1.8404405535159558, "grad_norm": 3.6931413868123597, "learning_rate": 6.864630143497191e-06, "loss": 0.483, "step": 13034 }, { "epoch": 1.8405817565659417, "grad_norm": 3.378501480811915, "learning_rate": 6.8631825960110866e-06, "loss": 0.4926, "step": 13035 }, { "epoch": 1.8407229596159276, "grad_norm": 4.029508702159001, "learning_rate": 6.861735121423264e-06, "loss": 0.5376, "step": 13036 }, { "epoch": 1.8408641626659135, "grad_norm": 3.656874209167187, "learning_rate": 6.860287719767362e-06, "loss": 0.5308, "step": 13037 }, { "epoch": 1.8410053657158993, "grad_norm": 2.927930795090184, "learning_rate": 6.858840391077017e-06, "loss": 0.491, "step": 13038 }, { "epoch": 1.8411465687658852, "grad_norm": 4.2201038579931, "learning_rate": 6.857393135385866e-06, "loss": 0.6178, "step": 13039 }, { "epoch": 1.841287771815871, "grad_norm": 3.5585677517463314, "learning_rate": 6.8559459527275426e-06, "loss": 0.6186, "step": 13040 }, { "epoch": 1.841428974865857, "grad_norm": 4.408098395093376, "learning_rate": 6.8544988431356755e-06, "loss": 0.6829, "step": 13041 }, { "epoch": 1.8415701779158429, "grad_norm": 3.62741665457451, "learning_rate": 6.853051806643898e-06, "loss": 0.5815, "step": 13042 }, { "epoch": 1.8417113809658288, "grad_norm": 3.312570087878604, "learning_rate": 6.851604843285835e-06, "loss": 0.5559, "step": 13043 }, { "epoch": 1.8418525840158146, "grad_norm": 3.122379175760547, "learning_rate": 6.850157953095117e-06, "loss": 0.4385, "step": 13044 }, { "epoch": 1.8419937870658005, "grad_norm": 4.140891910963143, "learning_rate": 6.848711136105368e-06, "loss": 0.6058, "step": 13045 }, { "epoch": 1.8421349901157864, "grad_norm": 4.001685898667218, "learning_rate": 6.84726439235021e-06, "loss": 0.5267, "step": 13046 }, { "epoch": 1.8422761931657723, "grad_norm": 3.5650653906880674, "learning_rate": 6.845817721863267e-06, "loss": 0.4557, "step": 13047 }, { "epoch": 1.8424173962157582, "grad_norm": 3.2096054155631775, "learning_rate": 6.844371124678161e-06, "loss": 0.4321, "step": 13048 }, { "epoch": 1.842558599265744, "grad_norm": 3.452462047519633, "learning_rate": 6.8429246008285046e-06, "loss": 0.5292, "step": 13049 }, { "epoch": 1.84269980231573, "grad_norm": 3.7793251118856563, "learning_rate": 6.841478150347918e-06, "loss": 0.5563, "step": 13050 }, { "epoch": 1.8428410053657158, "grad_norm": 3.204073320838366, "learning_rate": 6.840031773270011e-06, "loss": 0.465, "step": 13051 }, { "epoch": 1.8429822084157017, "grad_norm": 4.481342358710885, "learning_rate": 6.838585469628405e-06, "loss": 0.5994, "step": 13052 }, { "epoch": 1.8431234114656876, "grad_norm": 3.8309132907575254, "learning_rate": 6.837139239456706e-06, "loss": 0.4823, "step": 13053 }, { "epoch": 1.8432646145156735, "grad_norm": 4.185046160443699, "learning_rate": 6.8356930827885256e-06, "loss": 0.5965, "step": 13054 }, { "epoch": 1.8434058175656594, "grad_norm": 3.6433729020238492, "learning_rate": 6.83424699965747e-06, "loss": 0.6031, "step": 13055 }, { "epoch": 1.8435470206156452, "grad_norm": 3.419474893240668, "learning_rate": 6.832800990097148e-06, "loss": 0.5126, "step": 13056 }, { "epoch": 1.8436882236656311, "grad_norm": 2.8802853102732637, "learning_rate": 6.831355054141167e-06, "loss": 0.4329, "step": 13057 }, { "epoch": 1.843829426715617, "grad_norm": 3.39032453280376, "learning_rate": 6.829909191823121e-06, "loss": 0.3999, "step": 13058 }, { "epoch": 1.8439706297656029, "grad_norm": 4.138369641931917, "learning_rate": 6.828463403176619e-06, "loss": 0.6083, "step": 13059 }, { "epoch": 1.8441118328155888, "grad_norm": 4.382449423747757, "learning_rate": 6.827017688235255e-06, "loss": 0.6343, "step": 13060 }, { "epoch": 1.8442530358655747, "grad_norm": 3.8566278062713835, "learning_rate": 6.825572047032631e-06, "loss": 0.5486, "step": 13061 }, { "epoch": 1.8443942389155605, "grad_norm": 4.64398193378965, "learning_rate": 6.824126479602342e-06, "loss": 0.7301, "step": 13062 }, { "epoch": 1.8445354419655464, "grad_norm": 4.088316942192737, "learning_rate": 6.822680985977981e-06, "loss": 0.5754, "step": 13063 }, { "epoch": 1.8446766450155323, "grad_norm": 3.331222689538483, "learning_rate": 6.821235566193143e-06, "loss": 0.5324, "step": 13064 }, { "epoch": 1.8448178480655182, "grad_norm": 3.496662540262184, "learning_rate": 6.819790220281419e-06, "loss": 0.5635, "step": 13065 }, { "epoch": 1.844959051115504, "grad_norm": 4.368564527264109, "learning_rate": 6.818344948276397e-06, "loss": 0.5982, "step": 13066 }, { "epoch": 1.84510025416549, "grad_norm": 4.018651763025937, "learning_rate": 6.816899750211662e-06, "loss": 0.6362, "step": 13067 }, { "epoch": 1.8452414572154758, "grad_norm": 3.13911551150017, "learning_rate": 6.815454626120804e-06, "loss": 0.4399, "step": 13068 }, { "epoch": 1.8453826602654617, "grad_norm": 4.709398180193074, "learning_rate": 6.814009576037401e-06, "loss": 0.6822, "step": 13069 }, { "epoch": 1.8455238633154476, "grad_norm": 3.9961081870164747, "learning_rate": 6.812564599995042e-06, "loss": 0.5371, "step": 13070 }, { "epoch": 1.8456650663654335, "grad_norm": 4.572040309855932, "learning_rate": 6.811119698027307e-06, "loss": 0.774, "step": 13071 }, { "epoch": 1.8458062694154194, "grad_norm": 3.495209834569317, "learning_rate": 6.809674870167768e-06, "loss": 0.598, "step": 13072 }, { "epoch": 1.8459474724654052, "grad_norm": 3.8499092741833434, "learning_rate": 6.808230116450012e-06, "loss": 0.5956, "step": 13073 }, { "epoch": 1.8460886755153911, "grad_norm": 3.3129191524042794, "learning_rate": 6.80678543690761e-06, "loss": 0.5083, "step": 13074 }, { "epoch": 1.846229878565377, "grad_norm": 3.3148960715697307, "learning_rate": 6.805340831574135e-06, "loss": 0.4707, "step": 13075 }, { "epoch": 1.846371081615363, "grad_norm": 3.2693997877370493, "learning_rate": 6.803896300483159e-06, "loss": 0.4845, "step": 13076 }, { "epoch": 1.8465122846653488, "grad_norm": 3.1775377575426695, "learning_rate": 6.802451843668249e-06, "loss": 0.4902, "step": 13077 }, { "epoch": 1.8466534877153347, "grad_norm": 3.7416205054708738, "learning_rate": 6.8010074611629815e-06, "loss": 0.5184, "step": 13078 }, { "epoch": 1.8467946907653205, "grad_norm": 3.183010921895056, "learning_rate": 6.799563153000919e-06, "loss": 0.4173, "step": 13079 }, { "epoch": 1.8469358938153064, "grad_norm": 3.0111762591850164, "learning_rate": 6.798118919215625e-06, "loss": 0.442, "step": 13080 }, { "epoch": 1.8470770968652923, "grad_norm": 3.5036289780116214, "learning_rate": 6.7966747598406625e-06, "loss": 0.5281, "step": 13081 }, { "epoch": 1.8472182999152782, "grad_norm": 2.8188643944298413, "learning_rate": 6.795230674909601e-06, "loss": 0.4138, "step": 13082 }, { "epoch": 1.847359502965264, "grad_norm": 3.933975929806643, "learning_rate": 6.793786664455992e-06, "loss": 0.5154, "step": 13083 }, { "epoch": 1.84750070601525, "grad_norm": 3.689280996258638, "learning_rate": 6.7923427285133945e-06, "loss": 0.5283, "step": 13084 }, { "epoch": 1.8476419090652358, "grad_norm": 4.230289474288531, "learning_rate": 6.790898867115368e-06, "loss": 0.7521, "step": 13085 }, { "epoch": 1.8477831121152217, "grad_norm": 3.350836655520724, "learning_rate": 6.789455080295464e-06, "loss": 0.5472, "step": 13086 }, { "epoch": 1.8479243151652076, "grad_norm": 3.026929105937988, "learning_rate": 6.788011368087239e-06, "loss": 0.4157, "step": 13087 }, { "epoch": 1.8480655182151935, "grad_norm": 3.4634934694148867, "learning_rate": 6.786567730524243e-06, "loss": 0.5804, "step": 13088 }, { "epoch": 1.8482067212651794, "grad_norm": 3.8032417006001373, "learning_rate": 6.785124167640024e-06, "loss": 0.5181, "step": 13089 }, { "epoch": 1.8483479243151653, "grad_norm": 3.0420925090824507, "learning_rate": 6.783680679468132e-06, "loss": 0.4914, "step": 13090 }, { "epoch": 1.8484891273651511, "grad_norm": 3.543399742681195, "learning_rate": 6.782237266042113e-06, "loss": 0.5275, "step": 13091 }, { "epoch": 1.848630330415137, "grad_norm": 3.965937173537493, "learning_rate": 6.78079392739551e-06, "loss": 0.5368, "step": 13092 }, { "epoch": 1.848771533465123, "grad_norm": 3.5237579735437596, "learning_rate": 6.779350663561866e-06, "loss": 0.5556, "step": 13093 }, { "epoch": 1.8489127365151088, "grad_norm": 3.148869908137464, "learning_rate": 6.777907474574718e-06, "loss": 0.5379, "step": 13094 }, { "epoch": 1.8490539395650947, "grad_norm": 4.022133592110588, "learning_rate": 6.776464360467612e-06, "loss": 0.5872, "step": 13095 }, { "epoch": 1.8491951426150806, "grad_norm": 3.604395909089462, "learning_rate": 6.775021321274082e-06, "loss": 0.5331, "step": 13096 }, { "epoch": 1.8493363456650664, "grad_norm": 3.170710675596997, "learning_rate": 6.773578357027663e-06, "loss": 0.4818, "step": 13097 }, { "epoch": 1.8494775487150523, "grad_norm": 3.6832256942273767, "learning_rate": 6.772135467761889e-06, "loss": 0.5306, "step": 13098 }, { "epoch": 1.8496187517650382, "grad_norm": 3.68538872524263, "learning_rate": 6.770692653510298e-06, "loss": 0.5214, "step": 13099 }, { "epoch": 1.849759954815024, "grad_norm": 2.884190096524447, "learning_rate": 6.769249914306408e-06, "loss": 0.4183, "step": 13100 }, { "epoch": 1.84990115786501, "grad_norm": 3.9703138565317837, "learning_rate": 6.7678072501837575e-06, "loss": 0.6118, "step": 13101 }, { "epoch": 1.8500423609149959, "grad_norm": 3.157783159407482, "learning_rate": 6.766364661175872e-06, "loss": 0.5218, "step": 13102 }, { "epoch": 1.8501835639649817, "grad_norm": 3.8373369842682714, "learning_rate": 6.7649221473162705e-06, "loss": 0.5184, "step": 13103 }, { "epoch": 1.8503247670149676, "grad_norm": 4.3834388670315745, "learning_rate": 6.763479708638485e-06, "loss": 0.7996, "step": 13104 }, { "epoch": 1.8504659700649535, "grad_norm": 4.324165680170939, "learning_rate": 6.762037345176034e-06, "loss": 0.5653, "step": 13105 }, { "epoch": 1.8506071731149394, "grad_norm": 3.2994406258364637, "learning_rate": 6.7605950569624335e-06, "loss": 0.4666, "step": 13106 }, { "epoch": 1.8507483761649253, "grad_norm": 3.7796980624048153, "learning_rate": 6.759152844031207e-06, "loss": 0.624, "step": 13107 }, { "epoch": 1.8508895792149112, "grad_norm": 3.2514149408468023, "learning_rate": 6.757710706415872e-06, "loss": 0.4232, "step": 13108 }, { "epoch": 1.851030782264897, "grad_norm": 3.487641472513069, "learning_rate": 6.756268644149937e-06, "loss": 0.5797, "step": 13109 }, { "epoch": 1.851171985314883, "grad_norm": 3.307199768673497, "learning_rate": 6.754826657266918e-06, "loss": 0.5566, "step": 13110 }, { "epoch": 1.8513131883648688, "grad_norm": 3.85226359538764, "learning_rate": 6.753384745800323e-06, "loss": 0.559, "step": 13111 }, { "epoch": 1.8514543914148547, "grad_norm": 2.8679677599527413, "learning_rate": 6.7519429097836675e-06, "loss": 0.3939, "step": 13112 }, { "epoch": 1.8515955944648406, "grad_norm": 3.7060414432084157, "learning_rate": 6.750501149250456e-06, "loss": 0.5533, "step": 13113 }, { "epoch": 1.8517367975148264, "grad_norm": 4.09989522133206, "learning_rate": 6.749059464234193e-06, "loss": 0.5577, "step": 13114 }, { "epoch": 1.8518780005648123, "grad_norm": 3.4591118725350807, "learning_rate": 6.747617854768384e-06, "loss": 0.5244, "step": 13115 }, { "epoch": 1.8520192036147982, "grad_norm": 2.9961117988610804, "learning_rate": 6.746176320886537e-06, "loss": 0.4481, "step": 13116 }, { "epoch": 1.852160406664784, "grad_norm": 3.3508685951080093, "learning_rate": 6.74473486262214e-06, "loss": 0.5461, "step": 13117 }, { "epoch": 1.85230160971477, "grad_norm": 3.783042260360074, "learning_rate": 6.743293480008703e-06, "loss": 0.5308, "step": 13118 }, { "epoch": 1.8524428127647559, "grad_norm": 4.117493158412321, "learning_rate": 6.7418521730797175e-06, "loss": 0.5666, "step": 13119 }, { "epoch": 1.8525840158147417, "grad_norm": 3.4205811816104585, "learning_rate": 6.740410941868678e-06, "loss": 0.4995, "step": 13120 }, { "epoch": 1.8527252188647276, "grad_norm": 2.878786239601334, "learning_rate": 6.738969786409084e-06, "loss": 0.4281, "step": 13121 }, { "epoch": 1.8528664219147135, "grad_norm": 2.821309256526727, "learning_rate": 6.737528706734423e-06, "loss": 0.4784, "step": 13122 }, { "epoch": 1.8530076249646994, "grad_norm": 4.11967329139101, "learning_rate": 6.736087702878184e-06, "loss": 0.5713, "step": 13123 }, { "epoch": 1.8531488280146853, "grad_norm": 4.086038772971897, "learning_rate": 6.734646774873863e-06, "loss": 0.6332, "step": 13124 }, { "epoch": 1.8532900310646712, "grad_norm": 3.2797519348146764, "learning_rate": 6.733205922754935e-06, "loss": 0.4412, "step": 13125 }, { "epoch": 1.8534312341146568, "grad_norm": 3.2819511546815336, "learning_rate": 6.731765146554891e-06, "loss": 0.5208, "step": 13126 }, { "epoch": 1.8535724371646427, "grad_norm": 4.237105433611935, "learning_rate": 6.730324446307217e-06, "loss": 0.771, "step": 13127 }, { "epoch": 1.8537136402146286, "grad_norm": 3.647457081672026, "learning_rate": 6.728883822045389e-06, "loss": 0.5972, "step": 13128 }, { "epoch": 1.8538548432646145, "grad_norm": 3.940859323027391, "learning_rate": 6.727443273802885e-06, "loss": 0.6026, "step": 13129 }, { "epoch": 1.8539960463146004, "grad_norm": 2.9861249254050892, "learning_rate": 6.726002801613189e-06, "loss": 0.427, "step": 13130 }, { "epoch": 1.8541372493645862, "grad_norm": 3.3952705695280323, "learning_rate": 6.724562405509775e-06, "loss": 0.5192, "step": 13131 }, { "epoch": 1.8542784524145721, "grad_norm": 3.609157372697066, "learning_rate": 6.723122085526113e-06, "loss": 0.5409, "step": 13132 }, { "epoch": 1.854419655464558, "grad_norm": 4.113757548739018, "learning_rate": 6.721681841695684e-06, "loss": 0.6379, "step": 13133 }, { "epoch": 1.8545608585145439, "grad_norm": 3.1597205351500808, "learning_rate": 6.720241674051948e-06, "loss": 0.4846, "step": 13134 }, { "epoch": 1.8547020615645298, "grad_norm": 3.2350819632305052, "learning_rate": 6.718801582628382e-06, "loss": 0.4667, "step": 13135 }, { "epoch": 1.8548432646145157, "grad_norm": 3.5708744269335746, "learning_rate": 6.717361567458449e-06, "loss": 0.5791, "step": 13136 }, { "epoch": 1.8549844676645015, "grad_norm": 2.9508234371242708, "learning_rate": 6.7159216285756136e-06, "loss": 0.4636, "step": 13137 }, { "epoch": 1.8551256707144874, "grad_norm": 3.4258585562427566, "learning_rate": 6.714481766013343e-06, "loss": 0.4654, "step": 13138 }, { "epoch": 1.8552668737644733, "grad_norm": 3.7640246918880753, "learning_rate": 6.713041979805098e-06, "loss": 0.4919, "step": 13139 }, { "epoch": 1.8554080768144592, "grad_norm": 3.5634126378827338, "learning_rate": 6.711602269984339e-06, "loss": 0.5251, "step": 13140 }, { "epoch": 1.855549279864445, "grad_norm": 3.1346662329080135, "learning_rate": 6.710162636584523e-06, "loss": 0.4941, "step": 13141 }, { "epoch": 1.855690482914431, "grad_norm": 4.740619663834005, "learning_rate": 6.7087230796391035e-06, "loss": 0.5694, "step": 13142 }, { "epoch": 1.8558316859644168, "grad_norm": 3.4944571963537348, "learning_rate": 6.707283599181539e-06, "loss": 0.4434, "step": 13143 }, { "epoch": 1.8559728890144027, "grad_norm": 3.0204521163123625, "learning_rate": 6.705844195245283e-06, "loss": 0.4872, "step": 13144 }, { "epoch": 1.8561140920643886, "grad_norm": 4.191052787416471, "learning_rate": 6.704404867863785e-06, "loss": 0.5735, "step": 13145 }, { "epoch": 1.8562552951143745, "grad_norm": 2.9452209389480477, "learning_rate": 6.702965617070492e-06, "loss": 0.4326, "step": 13146 }, { "epoch": 1.8563964981643604, "grad_norm": 3.6101116522698873, "learning_rate": 6.701526442898855e-06, "loss": 0.6204, "step": 13147 }, { "epoch": 1.8565377012143462, "grad_norm": 3.100857695475702, "learning_rate": 6.7000873453823225e-06, "loss": 0.4886, "step": 13148 }, { "epoch": 1.8566789042643321, "grad_norm": 3.2494731154632506, "learning_rate": 6.698648324554331e-06, "loss": 0.488, "step": 13149 }, { "epoch": 1.856820107314318, "grad_norm": 3.5855612556035656, "learning_rate": 6.697209380448333e-06, "loss": 0.5552, "step": 13150 }, { "epoch": 1.856961310364304, "grad_norm": 3.7446187345342334, "learning_rate": 6.695770513097756e-06, "loss": 0.5325, "step": 13151 }, { "epoch": 1.8571025134142898, "grad_norm": 3.6266193772049435, "learning_rate": 6.6943317225360474e-06, "loss": 0.5474, "step": 13152 }, { "epoch": 1.8572437164642757, "grad_norm": 3.3781978850750596, "learning_rate": 6.692893008796643e-06, "loss": 0.5177, "step": 13153 }, { "epoch": 1.8573849195142615, "grad_norm": 3.711356342954198, "learning_rate": 6.691454371912974e-06, "loss": 0.5232, "step": 13154 }, { "epoch": 1.8575261225642474, "grad_norm": 3.831444173766949, "learning_rate": 6.690015811918478e-06, "loss": 0.5045, "step": 13155 }, { "epoch": 1.8576673256142333, "grad_norm": 4.224663475850468, "learning_rate": 6.688577328846586e-06, "loss": 0.6206, "step": 13156 }, { "epoch": 1.8578085286642192, "grad_norm": 3.798245443093454, "learning_rate": 6.687138922730726e-06, "loss": 0.5331, "step": 13157 }, { "epoch": 1.857949731714205, "grad_norm": 4.443212577118079, "learning_rate": 6.685700593604329e-06, "loss": 0.6278, "step": 13158 }, { "epoch": 1.858090934764191, "grad_norm": 4.097600876465136, "learning_rate": 6.684262341500818e-06, "loss": 0.5878, "step": 13159 }, { "epoch": 1.8582321378141766, "grad_norm": 3.8422551657040183, "learning_rate": 6.6828241664536145e-06, "loss": 0.4856, "step": 13160 }, { "epoch": 1.8583733408641625, "grad_norm": 4.445297057837859, "learning_rate": 6.681386068496147e-06, "loss": 0.6913, "step": 13161 }, { "epoch": 1.8585145439141484, "grad_norm": 3.831504516883622, "learning_rate": 6.679948047661835e-06, "loss": 0.6514, "step": 13162 }, { "epoch": 1.8586557469641343, "grad_norm": 4.173824116729072, "learning_rate": 6.678510103984095e-06, "loss": 0.6778, "step": 13163 }, { "epoch": 1.8587969500141202, "grad_norm": 4.4564242170252975, "learning_rate": 6.677072237496347e-06, "loss": 0.6755, "step": 13164 }, { "epoch": 1.858938153064106, "grad_norm": 2.818912871538394, "learning_rate": 6.6756344482320046e-06, "loss": 0.3994, "step": 13165 }, { "epoch": 1.859079356114092, "grad_norm": 3.605395629625173, "learning_rate": 6.674196736224481e-06, "loss": 0.5349, "step": 13166 }, { "epoch": 1.8592205591640778, "grad_norm": 3.797792206410664, "learning_rate": 6.672759101507194e-06, "loss": 0.5109, "step": 13167 }, { "epoch": 1.8593617622140637, "grad_norm": 2.9021734759222766, "learning_rate": 6.6713215441135424e-06, "loss": 0.4372, "step": 13168 }, { "epoch": 1.8595029652640496, "grad_norm": 2.7199984933934798, "learning_rate": 6.669884064076944e-06, "loss": 0.4133, "step": 13169 }, { "epoch": 1.8596441683140355, "grad_norm": 3.390475526353319, "learning_rate": 6.668446661430801e-06, "loss": 0.5075, "step": 13170 }, { "epoch": 1.8597853713640213, "grad_norm": 3.53192668759502, "learning_rate": 6.667009336208519e-06, "loss": 0.6084, "step": 13171 }, { "epoch": 1.8599265744140072, "grad_norm": 5.113660762805377, "learning_rate": 6.665572088443497e-06, "loss": 0.5475, "step": 13172 }, { "epoch": 1.860067777463993, "grad_norm": 3.6827022856035554, "learning_rate": 6.664134918169142e-06, "loss": 0.5938, "step": 13173 }, { "epoch": 1.860208980513979, "grad_norm": 3.807741885754998, "learning_rate": 6.662697825418853e-06, "loss": 0.5218, "step": 13174 }, { "epoch": 1.8603501835639649, "grad_norm": 2.7613123424349144, "learning_rate": 6.6612608102260265e-06, "loss": 0.4454, "step": 13175 }, { "epoch": 1.8604913866139507, "grad_norm": 3.1601724704165366, "learning_rate": 6.659823872624054e-06, "loss": 0.4343, "step": 13176 }, { "epoch": 1.8606325896639366, "grad_norm": 4.42148415440738, "learning_rate": 6.65838701264633e-06, "loss": 0.5935, "step": 13177 }, { "epoch": 1.8607737927139225, "grad_norm": 4.044355040478655, "learning_rate": 6.656950230326251e-06, "loss": 0.567, "step": 13178 }, { "epoch": 1.8609149957639084, "grad_norm": 3.210090182292342, "learning_rate": 6.655513525697206e-06, "loss": 0.4883, "step": 13179 }, { "epoch": 1.8610561988138943, "grad_norm": 3.6765869524824324, "learning_rate": 6.65407689879258e-06, "loss": 0.6429, "step": 13180 }, { "epoch": 1.8611974018638802, "grad_norm": 3.3801477984714197, "learning_rate": 6.652640349645763e-06, "loss": 0.4766, "step": 13181 }, { "epoch": 1.861338604913866, "grad_norm": 3.741155500066469, "learning_rate": 6.651203878290139e-06, "loss": 0.5556, "step": 13182 }, { "epoch": 1.861479807963852, "grad_norm": 4.429531478640785, "learning_rate": 6.649767484759091e-06, "loss": 0.5864, "step": 13183 }, { "epoch": 1.8616210110138378, "grad_norm": 3.1326312882872607, "learning_rate": 6.648331169086002e-06, "loss": 0.4857, "step": 13184 }, { "epoch": 1.8617622140638237, "grad_norm": 3.012129498114597, "learning_rate": 6.646894931304244e-06, "loss": 0.5548, "step": 13185 }, { "epoch": 1.8619034171138096, "grad_norm": 4.585220395427646, "learning_rate": 6.645458771447202e-06, "loss": 0.7124, "step": 13186 }, { "epoch": 1.8620446201637955, "grad_norm": 4.129407792413521, "learning_rate": 6.64402268954825e-06, "loss": 0.7036, "step": 13187 }, { "epoch": 1.8621858232137813, "grad_norm": 2.8541716504993153, "learning_rate": 6.642586685640761e-06, "loss": 0.4912, "step": 13188 }, { "epoch": 1.8623270262637672, "grad_norm": 4.233359860466865, "learning_rate": 6.641150759758106e-06, "loss": 0.6204, "step": 13189 }, { "epoch": 1.862468229313753, "grad_norm": 2.9332293809772767, "learning_rate": 6.639714911933658e-06, "loss": 0.4589, "step": 13190 }, { "epoch": 1.862609432363739, "grad_norm": 3.8358346673957215, "learning_rate": 6.638279142200785e-06, "loss": 0.6357, "step": 13191 }, { "epoch": 1.8627506354137249, "grad_norm": 3.4156955428229114, "learning_rate": 6.636843450592854e-06, "loss": 0.5747, "step": 13192 }, { "epoch": 1.8628918384637108, "grad_norm": 3.2224823042488278, "learning_rate": 6.635407837143228e-06, "loss": 0.4613, "step": 13193 }, { "epoch": 1.8630330415136966, "grad_norm": 3.2379700706519303, "learning_rate": 6.633972301885268e-06, "loss": 0.5676, "step": 13194 }, { "epoch": 1.8631742445636825, "grad_norm": 3.7897943448465283, "learning_rate": 6.6325368448523395e-06, "loss": 0.553, "step": 13195 }, { "epoch": 1.8633154476136684, "grad_norm": 3.0750355700643217, "learning_rate": 6.631101466077801e-06, "loss": 0.4373, "step": 13196 }, { "epoch": 1.8634566506636543, "grad_norm": 3.232610584690483, "learning_rate": 6.629666165595008e-06, "loss": 0.4728, "step": 13197 }, { "epoch": 1.8635978537136402, "grad_norm": 3.186165804370411, "learning_rate": 6.628230943437319e-06, "loss": 0.4368, "step": 13198 }, { "epoch": 1.863739056763626, "grad_norm": 3.9927214798940467, "learning_rate": 6.626795799638087e-06, "loss": 0.6458, "step": 13199 }, { "epoch": 1.863880259813612, "grad_norm": 3.738156165141249, "learning_rate": 6.625360734230663e-06, "loss": 0.5925, "step": 13200 }, { "epoch": 1.8640214628635978, "grad_norm": 3.0906346877749895, "learning_rate": 6.623925747248403e-06, "loss": 0.4898, "step": 13201 }, { "epoch": 1.8641626659135837, "grad_norm": 3.7464767933131897, "learning_rate": 6.6224908387246466e-06, "loss": 0.6464, "step": 13202 }, { "epoch": 1.8643038689635696, "grad_norm": 4.629532250194823, "learning_rate": 6.621056008692741e-06, "loss": 0.6588, "step": 13203 }, { "epoch": 1.8644450720135555, "grad_norm": 3.1660357845161196, "learning_rate": 6.619621257186039e-06, "loss": 0.5372, "step": 13204 }, { "epoch": 1.8645862750635414, "grad_norm": 3.5775661221857926, "learning_rate": 6.618186584237878e-06, "loss": 0.6156, "step": 13205 }, { "epoch": 1.8647274781135272, "grad_norm": 3.3921242346085343, "learning_rate": 6.616751989881598e-06, "loss": 0.5354, "step": 13206 }, { "epoch": 1.8648686811635131, "grad_norm": 3.517663575738852, "learning_rate": 6.6153174741505445e-06, "loss": 0.5172, "step": 13207 }, { "epoch": 1.865009884213499, "grad_norm": 3.7536978653964606, "learning_rate": 6.613883037078048e-06, "loss": 0.4841, "step": 13208 }, { "epoch": 1.8651510872634849, "grad_norm": 4.409619305926845, "learning_rate": 6.612448678697452e-06, "loss": 0.6553, "step": 13209 }, { "epoch": 1.8652922903134708, "grad_norm": 3.817908493054709, "learning_rate": 6.6110143990420824e-06, "loss": 0.5406, "step": 13210 }, { "epoch": 1.8654334933634567, "grad_norm": 3.0210053265079004, "learning_rate": 6.6095801981452735e-06, "loss": 0.4269, "step": 13211 }, { "epoch": 1.8655746964134425, "grad_norm": 4.141665234498497, "learning_rate": 6.608146076040358e-06, "loss": 0.611, "step": 13212 }, { "epoch": 1.8657158994634284, "grad_norm": 2.929247158395267, "learning_rate": 6.606712032760663e-06, "loss": 0.4732, "step": 13213 }, { "epoch": 1.8658571025134143, "grad_norm": 3.6540823968137395, "learning_rate": 6.605278068339516e-06, "loss": 0.5445, "step": 13214 }, { "epoch": 1.8659983055634002, "grad_norm": 3.3519448042672133, "learning_rate": 6.603844182810238e-06, "loss": 0.4589, "step": 13215 }, { "epoch": 1.866139508613386, "grad_norm": 3.2319730611865514, "learning_rate": 6.602410376206154e-06, "loss": 0.4312, "step": 13216 }, { "epoch": 1.866280711663372, "grad_norm": 3.0712457750424136, "learning_rate": 6.600976648560588e-06, "loss": 0.5097, "step": 13217 }, { "epoch": 1.8664219147133578, "grad_norm": 3.6400458983703703, "learning_rate": 6.599542999906858e-06, "loss": 0.5132, "step": 13218 }, { "epoch": 1.8665631177633437, "grad_norm": 3.528596644044079, "learning_rate": 6.598109430278279e-06, "loss": 0.5947, "step": 13219 }, { "epoch": 1.8667043208133296, "grad_norm": 3.7707860703714697, "learning_rate": 6.596675939708166e-06, "loss": 0.6011, "step": 13220 }, { "epoch": 1.8668455238633155, "grad_norm": 3.3337937392635593, "learning_rate": 6.595242528229835e-06, "loss": 0.4915, "step": 13221 }, { "epoch": 1.8669867269133014, "grad_norm": 4.500622294597003, "learning_rate": 6.593809195876597e-06, "loss": 0.6355, "step": 13222 }, { "epoch": 1.8671279299632872, "grad_norm": 3.746844852212628, "learning_rate": 6.5923759426817615e-06, "loss": 0.4833, "step": 13223 }, { "epoch": 1.8672691330132731, "grad_norm": 3.5216631159715597, "learning_rate": 6.5909427686786386e-06, "loss": 0.5094, "step": 13224 }, { "epoch": 1.867410336063259, "grad_norm": 3.7665884273595895, "learning_rate": 6.589509673900534e-06, "loss": 0.5914, "step": 13225 }, { "epoch": 1.867551539113245, "grad_norm": 3.835721010215854, "learning_rate": 6.588076658380754e-06, "loss": 0.4735, "step": 13226 }, { "epoch": 1.8676927421632308, "grad_norm": 3.7155716989122833, "learning_rate": 6.586643722152597e-06, "loss": 0.508, "step": 13227 }, { "epoch": 1.8678339452132167, "grad_norm": 4.089702456313327, "learning_rate": 6.585210865249363e-06, "loss": 0.5398, "step": 13228 }, { "epoch": 1.8679751482632025, "grad_norm": 4.527129741516544, "learning_rate": 6.5837780877043555e-06, "loss": 0.6788, "step": 13229 }, { "epoch": 1.8681163513131884, "grad_norm": 5.550468427981594, "learning_rate": 6.58234538955087e-06, "loss": 0.8167, "step": 13230 }, { "epoch": 1.8682575543631743, "grad_norm": 3.834967983788457, "learning_rate": 6.580912770822203e-06, "loss": 0.5485, "step": 13231 }, { "epoch": 1.8683987574131602, "grad_norm": 3.7253343281102023, "learning_rate": 6.579480231551644e-06, "loss": 0.5816, "step": 13232 }, { "epoch": 1.868539960463146, "grad_norm": 4.171048777463959, "learning_rate": 6.578047771772489e-06, "loss": 0.5228, "step": 13233 }, { "epoch": 1.868681163513132, "grad_norm": 3.4376079327021665, "learning_rate": 6.576615391518026e-06, "loss": 0.507, "step": 13234 }, { "epoch": 1.8688223665631178, "grad_norm": 3.0862798114155856, "learning_rate": 6.5751830908215445e-06, "loss": 0.5099, "step": 13235 }, { "epoch": 1.8689635696131037, "grad_norm": 3.995719063479375, "learning_rate": 6.573750869716327e-06, "loss": 0.5899, "step": 13236 }, { "epoch": 1.8691047726630896, "grad_norm": 4.053227446274229, "learning_rate": 6.572318728235658e-06, "loss": 0.5047, "step": 13237 }, { "epoch": 1.8692459757130755, "grad_norm": 3.3189551282305816, "learning_rate": 6.570886666412823e-06, "loss": 0.4606, "step": 13238 }, { "epoch": 1.8693871787630614, "grad_norm": 3.80614266736162, "learning_rate": 6.569454684281102e-06, "loss": 0.5826, "step": 13239 }, { "epoch": 1.8695283818130473, "grad_norm": 4.0220582457978855, "learning_rate": 6.5680227818737695e-06, "loss": 0.5096, "step": 13240 }, { "epoch": 1.8696695848630331, "grad_norm": 3.6519687199144517, "learning_rate": 6.566590959224109e-06, "loss": 0.6232, "step": 13241 }, { "epoch": 1.869810787913019, "grad_norm": 3.884859702749816, "learning_rate": 6.5651592163653885e-06, "loss": 0.5262, "step": 13242 }, { "epoch": 1.869951990963005, "grad_norm": 3.6746511824842494, "learning_rate": 6.56372755333089e-06, "loss": 0.5085, "step": 13243 }, { "epoch": 1.8700931940129908, "grad_norm": 4.367210289639769, "learning_rate": 6.562295970153875e-06, "loss": 0.6955, "step": 13244 }, { "epoch": 1.8702343970629767, "grad_norm": 2.959460677317954, "learning_rate": 6.560864466867616e-06, "loss": 0.4334, "step": 13245 }, { "epoch": 1.8703756001129626, "grad_norm": 3.8650677130261313, "learning_rate": 6.559433043505383e-06, "loss": 0.6255, "step": 13246 }, { "epoch": 1.8705168031629484, "grad_norm": 3.5870292720167356, "learning_rate": 6.558001700100441e-06, "loss": 0.5219, "step": 13247 }, { "epoch": 1.8706580062129343, "grad_norm": 3.779098333116538, "learning_rate": 6.556570436686052e-06, "loss": 0.5694, "step": 13248 }, { "epoch": 1.8707992092629202, "grad_norm": 4.110056802257806, "learning_rate": 6.555139253295477e-06, "loss": 0.6495, "step": 13249 }, { "epoch": 1.870940412312906, "grad_norm": 3.358993328099325, "learning_rate": 6.553708149961979e-06, "loss": 0.5509, "step": 13250 }, { "epoch": 1.871081615362892, "grad_norm": 3.293742877721644, "learning_rate": 6.552277126718818e-06, "loss": 0.4884, "step": 13251 }, { "epoch": 1.8712228184128779, "grad_norm": 3.257602986908871, "learning_rate": 6.550846183599249e-06, "loss": 0.5314, "step": 13252 }, { "epoch": 1.8713640214628637, "grad_norm": 3.7510117738060713, "learning_rate": 6.549415320636521e-06, "loss": 0.5448, "step": 13253 }, { "epoch": 1.8715052245128496, "grad_norm": 3.3669070967586516, "learning_rate": 6.5479845378638905e-06, "loss": 0.4987, "step": 13254 }, { "epoch": 1.8716464275628355, "grad_norm": 4.209737643909711, "learning_rate": 6.5465538353146106e-06, "loss": 0.6557, "step": 13255 }, { "epoch": 1.8717876306128214, "grad_norm": 3.6612954158584663, "learning_rate": 6.545123213021927e-06, "loss": 0.4986, "step": 13256 }, { "epoch": 1.8719288336628073, "grad_norm": 2.899452287408762, "learning_rate": 6.543692671019086e-06, "loss": 0.4209, "step": 13257 }, { "epoch": 1.8720700367127932, "grad_norm": 3.7316288813116025, "learning_rate": 6.542262209339339e-06, "loss": 0.6227, "step": 13258 }, { "epoch": 1.872211239762779, "grad_norm": 2.9508623323348724, "learning_rate": 6.5408318280159225e-06, "loss": 0.4597, "step": 13259 }, { "epoch": 1.872352442812765, "grad_norm": 3.754647676355563, "learning_rate": 6.539401527082083e-06, "loss": 0.6288, "step": 13260 }, { "epoch": 1.8724936458627508, "grad_norm": 4.5952899856987015, "learning_rate": 6.537971306571056e-06, "loss": 0.723, "step": 13261 }, { "epoch": 1.8726348489127365, "grad_norm": 2.904328001710089, "learning_rate": 6.536541166516079e-06, "loss": 0.4138, "step": 13262 }, { "epoch": 1.8727760519627223, "grad_norm": 3.8070860535370077, "learning_rate": 6.535111106950389e-06, "loss": 0.5581, "step": 13263 }, { "epoch": 1.8729172550127082, "grad_norm": 3.876310629445477, "learning_rate": 6.533681127907224e-06, "loss": 0.5434, "step": 13264 }, { "epoch": 1.873058458062694, "grad_norm": 3.7364774946944204, "learning_rate": 6.53225122941981e-06, "loss": 0.6367, "step": 13265 }, { "epoch": 1.87319966111268, "grad_norm": 3.323247140488036, "learning_rate": 6.5308214115213785e-06, "loss": 0.4401, "step": 13266 }, { "epoch": 1.8733408641626659, "grad_norm": 3.492196199760023, "learning_rate": 6.529391674245162e-06, "loss": 0.5128, "step": 13267 }, { "epoch": 1.8734820672126518, "grad_norm": 2.9732896247567946, "learning_rate": 6.527962017624383e-06, "loss": 0.4394, "step": 13268 }, { "epoch": 1.8736232702626376, "grad_norm": 3.5502940011461677, "learning_rate": 6.52653244169227e-06, "loss": 0.5024, "step": 13269 }, { "epoch": 1.8737644733126235, "grad_norm": 3.8506897935388227, "learning_rate": 6.52510294648204e-06, "loss": 0.5311, "step": 13270 }, { "epoch": 1.8739056763626094, "grad_norm": 4.151487972040856, "learning_rate": 6.523673532026916e-06, "loss": 0.6355, "step": 13271 }, { "epoch": 1.8740468794125953, "grad_norm": 3.2596542079799504, "learning_rate": 6.522244198360119e-06, "loss": 0.535, "step": 13272 }, { "epoch": 1.8741880824625812, "grad_norm": 3.091881292247918, "learning_rate": 6.520814945514866e-06, "loss": 0.481, "step": 13273 }, { "epoch": 1.874329285512567, "grad_norm": 3.7037364931546364, "learning_rate": 6.519385773524371e-06, "loss": 0.5992, "step": 13274 }, { "epoch": 1.874470488562553, "grad_norm": 3.600225803029264, "learning_rate": 6.517956682421845e-06, "loss": 0.5096, "step": 13275 }, { "epoch": 1.8746116916125388, "grad_norm": 2.88656877277685, "learning_rate": 6.516527672240504e-06, "loss": 0.4774, "step": 13276 }, { "epoch": 1.8747528946625247, "grad_norm": 3.7711505685858335, "learning_rate": 6.515098743013557e-06, "loss": 0.5997, "step": 13277 }, { "epoch": 1.8748940977125106, "grad_norm": 3.3614375607947555, "learning_rate": 6.513669894774209e-06, "loss": 0.5629, "step": 13278 }, { "epoch": 1.8750353007624965, "grad_norm": 3.6821542139177437, "learning_rate": 6.512241127555665e-06, "loss": 0.6094, "step": 13279 }, { "epoch": 1.8751765038124824, "grad_norm": 3.6927919264653943, "learning_rate": 6.510812441391131e-06, "loss": 0.5035, "step": 13280 }, { "epoch": 1.8753177068624682, "grad_norm": 3.9845129731177553, "learning_rate": 6.509383836313811e-06, "loss": 0.6768, "step": 13281 }, { "epoch": 1.8754589099124541, "grad_norm": 4.602094716953846, "learning_rate": 6.507955312356903e-06, "loss": 0.5424, "step": 13282 }, { "epoch": 1.87560011296244, "grad_norm": 3.5802608856595484, "learning_rate": 6.506526869553602e-06, "loss": 0.5582, "step": 13283 }, { "epoch": 1.8757413160124259, "grad_norm": 3.4384454898394945, "learning_rate": 6.505098507937111e-06, "loss": 0.4913, "step": 13284 }, { "epoch": 1.8758825190624118, "grad_norm": 4.651638684391711, "learning_rate": 6.503670227540624e-06, "loss": 0.5908, "step": 13285 }, { "epoch": 1.8760237221123977, "grad_norm": 5.819995341733787, "learning_rate": 6.502242028397328e-06, "loss": 0.5318, "step": 13286 }, { "epoch": 1.8761649251623835, "grad_norm": 3.0690932348209725, "learning_rate": 6.5008139105404175e-06, "loss": 0.4236, "step": 13287 }, { "epoch": 1.8763061282123694, "grad_norm": 3.7708567802896984, "learning_rate": 6.499385874003077e-06, "loss": 0.4597, "step": 13288 }, { "epoch": 1.8764473312623553, "grad_norm": 3.9593706697466247, "learning_rate": 6.4979579188185e-06, "loss": 0.6371, "step": 13289 }, { "epoch": 1.8765885343123412, "grad_norm": 3.5416365358914508, "learning_rate": 6.496530045019869e-06, "loss": 0.5762, "step": 13290 }, { "epoch": 1.876729737362327, "grad_norm": 4.206089926380315, "learning_rate": 6.495102252640366e-06, "loss": 0.7409, "step": 13291 }, { "epoch": 1.876870940412313, "grad_norm": 3.117457412710474, "learning_rate": 6.493674541713172e-06, "loss": 0.4055, "step": 13292 }, { "epoch": 1.8770121434622988, "grad_norm": 4.67473719947955, "learning_rate": 6.492246912271468e-06, "loss": 0.6948, "step": 13293 }, { "epoch": 1.8771533465122847, "grad_norm": 3.1219221800702033, "learning_rate": 6.490819364348434e-06, "loss": 0.5519, "step": 13294 }, { "epoch": 1.8772945495622706, "grad_norm": 3.522146490098211, "learning_rate": 6.4893918979772395e-06, "loss": 0.5258, "step": 13295 }, { "epoch": 1.8774357526122563, "grad_norm": 3.5681076743752547, "learning_rate": 6.4879645131910626e-06, "loss": 0.5677, "step": 13296 }, { "epoch": 1.8775769556622421, "grad_norm": 3.1061815444201417, "learning_rate": 6.4865372100230695e-06, "loss": 0.4457, "step": 13297 }, { "epoch": 1.877718158712228, "grad_norm": 3.9056517850329597, "learning_rate": 6.485109988506439e-06, "loss": 0.5507, "step": 13298 }, { "epoch": 1.877859361762214, "grad_norm": 4.065038840040629, "learning_rate": 6.483682848674332e-06, "loss": 0.5615, "step": 13299 }, { "epoch": 1.8780005648121998, "grad_norm": 3.243149879002145, "learning_rate": 6.4822557905599156e-06, "loss": 0.4422, "step": 13300 }, { "epoch": 1.8781417678621857, "grad_norm": 4.13133332778356, "learning_rate": 6.480828814196358e-06, "loss": 0.7124, "step": 13301 }, { "epoch": 1.8782829709121716, "grad_norm": 3.4284311096482236, "learning_rate": 6.479401919616821e-06, "loss": 0.414, "step": 13302 }, { "epoch": 1.8784241739621574, "grad_norm": 3.12588485211655, "learning_rate": 6.477975106854462e-06, "loss": 0.4449, "step": 13303 }, { "epoch": 1.8785653770121433, "grad_norm": 3.7166749320989387, "learning_rate": 6.476548375942437e-06, "loss": 0.58, "step": 13304 }, { "epoch": 1.8787065800621292, "grad_norm": 3.582069565829473, "learning_rate": 6.475121726913909e-06, "loss": 0.5004, "step": 13305 }, { "epoch": 1.878847783112115, "grad_norm": 3.6182775928725017, "learning_rate": 6.473695159802027e-06, "loss": 0.586, "step": 13306 }, { "epoch": 1.878988986162101, "grad_norm": 3.482386547096671, "learning_rate": 6.472268674639949e-06, "loss": 0.4719, "step": 13307 }, { "epoch": 1.8791301892120869, "grad_norm": 3.617250072196595, "learning_rate": 6.470842271460823e-06, "loss": 0.5609, "step": 13308 }, { "epoch": 1.8792713922620727, "grad_norm": 4.055456132111511, "learning_rate": 6.469415950297797e-06, "loss": 0.6994, "step": 13309 }, { "epoch": 1.8794125953120586, "grad_norm": 3.215078435312352, "learning_rate": 6.467989711184021e-06, "loss": 0.4796, "step": 13310 }, { "epoch": 1.8795537983620445, "grad_norm": 3.4871408840878835, "learning_rate": 6.46656355415264e-06, "loss": 0.5348, "step": 13311 }, { "epoch": 1.8796950014120304, "grad_norm": 3.2392538721266284, "learning_rate": 6.465137479236796e-06, "loss": 0.5251, "step": 13312 }, { "epoch": 1.8798362044620163, "grad_norm": 3.2666248070982693, "learning_rate": 6.463711486469629e-06, "loss": 0.4967, "step": 13313 }, { "epoch": 1.8799774075120022, "grad_norm": 3.5462969166958964, "learning_rate": 6.4622855758842785e-06, "loss": 0.665, "step": 13314 }, { "epoch": 1.880118610561988, "grad_norm": 3.236249053090492, "learning_rate": 6.460859747513885e-06, "loss": 0.4847, "step": 13315 }, { "epoch": 1.880259813611974, "grad_norm": 3.7078834410358135, "learning_rate": 6.459434001391582e-06, "loss": 0.5004, "step": 13316 }, { "epoch": 1.8804010166619598, "grad_norm": 3.7332869687965076, "learning_rate": 6.458008337550505e-06, "loss": 0.5764, "step": 13317 }, { "epoch": 1.8805422197119457, "grad_norm": 3.183721214353656, "learning_rate": 6.456582756023781e-06, "loss": 0.4706, "step": 13318 }, { "epoch": 1.8806834227619316, "grad_norm": 3.216045639309333, "learning_rate": 6.455157256844549e-06, "loss": 0.5382, "step": 13319 }, { "epoch": 1.8808246258119174, "grad_norm": 3.0262829775980475, "learning_rate": 6.4537318400459295e-06, "loss": 0.4435, "step": 13320 }, { "epoch": 1.8809658288619033, "grad_norm": 3.601663123701112, "learning_rate": 6.45230650566105e-06, "loss": 0.5351, "step": 13321 }, { "epoch": 1.8811070319118892, "grad_norm": 3.536031239045356, "learning_rate": 6.450881253723035e-06, "loss": 0.5229, "step": 13322 }, { "epoch": 1.881248234961875, "grad_norm": 3.8568867580984763, "learning_rate": 6.449456084265005e-06, "loss": 0.4946, "step": 13323 }, { "epoch": 1.881389438011861, "grad_norm": 4.076750035137307, "learning_rate": 6.448030997320084e-06, "loss": 0.5562, "step": 13324 }, { "epoch": 1.8815306410618469, "grad_norm": 3.3246425770555765, "learning_rate": 6.446605992921389e-06, "loss": 0.5226, "step": 13325 }, { "epoch": 1.8816718441118327, "grad_norm": 3.3441983599220455, "learning_rate": 6.445181071102034e-06, "loss": 0.4858, "step": 13326 }, { "epoch": 1.8818130471618186, "grad_norm": 4.107283550169291, "learning_rate": 6.443756231895138e-06, "loss": 0.6832, "step": 13327 }, { "epoch": 1.8819542502118045, "grad_norm": 3.783570269032791, "learning_rate": 6.4423314753338115e-06, "loss": 0.6264, "step": 13328 }, { "epoch": 1.8820954532617904, "grad_norm": 3.2532549676951543, "learning_rate": 6.440906801451165e-06, "loss": 0.5022, "step": 13329 }, { "epoch": 1.8822366563117763, "grad_norm": 3.3893259298562186, "learning_rate": 6.439482210280308e-06, "loss": 0.4313, "step": 13330 }, { "epoch": 1.8823778593617622, "grad_norm": 3.124290506088274, "learning_rate": 6.438057701854344e-06, "loss": 0.4209, "step": 13331 }, { "epoch": 1.882519062411748, "grad_norm": 4.2072614425576775, "learning_rate": 6.436633276206381e-06, "loss": 0.6561, "step": 13332 }, { "epoch": 1.882660265461734, "grad_norm": 4.634076981016983, "learning_rate": 6.435208933369523e-06, "loss": 0.689, "step": 13333 }, { "epoch": 1.8828014685117198, "grad_norm": 3.31809737891892, "learning_rate": 6.43378467337687e-06, "loss": 0.4844, "step": 13334 }, { "epoch": 1.8829426715617057, "grad_norm": 2.766301111634548, "learning_rate": 6.432360496261521e-06, "loss": 0.429, "step": 13335 }, { "epoch": 1.8830838746116916, "grad_norm": 3.9527582020374945, "learning_rate": 6.430936402056577e-06, "loss": 0.598, "step": 13336 }, { "epoch": 1.8832250776616775, "grad_norm": 4.090213962614411, "learning_rate": 6.429512390795124e-06, "loss": 0.6648, "step": 13337 }, { "epoch": 1.8833662807116633, "grad_norm": 3.572855283539374, "learning_rate": 6.428088462510262e-06, "loss": 0.6352, "step": 13338 }, { "epoch": 1.8835074837616492, "grad_norm": 3.8264977623464795, "learning_rate": 6.4266646172350835e-06, "loss": 0.697, "step": 13339 }, { "epoch": 1.883648686811635, "grad_norm": 3.535553019708483, "learning_rate": 6.425240855002674e-06, "loss": 0.4849, "step": 13340 }, { "epoch": 1.883789889861621, "grad_norm": 3.6965797164142, "learning_rate": 6.4238171758461245e-06, "loss": 0.5482, "step": 13341 }, { "epoch": 1.8839310929116069, "grad_norm": 3.9424184405050244, "learning_rate": 6.422393579798519e-06, "loss": 0.574, "step": 13342 }, { "epoch": 1.8840722959615928, "grad_norm": 3.4856252672897297, "learning_rate": 6.42097006689294e-06, "loss": 0.5414, "step": 13343 }, { "epoch": 1.8842134990115786, "grad_norm": 4.10547619611018, "learning_rate": 6.4195466371624726e-06, "loss": 0.6549, "step": 13344 }, { "epoch": 1.8843547020615645, "grad_norm": 3.5693921668555277, "learning_rate": 6.418123290640199e-06, "loss": 0.4831, "step": 13345 }, { "epoch": 1.8844959051115504, "grad_norm": 3.3193630140798693, "learning_rate": 6.416700027359189e-06, "loss": 0.4772, "step": 13346 }, { "epoch": 1.8846371081615363, "grad_norm": 4.166108686257722, "learning_rate": 6.415276847352524e-06, "loss": 0.6904, "step": 13347 }, { "epoch": 1.8847783112115222, "grad_norm": 4.49900031110109, "learning_rate": 6.413853750653274e-06, "loss": 0.5952, "step": 13348 }, { "epoch": 1.884919514261508, "grad_norm": 3.64061251058164, "learning_rate": 6.412430737294517e-06, "loss": 0.5355, "step": 13349 }, { "epoch": 1.885060717311494, "grad_norm": 3.1493922113009307, "learning_rate": 6.41100780730932e-06, "loss": 0.4354, "step": 13350 }, { "epoch": 1.8852019203614798, "grad_norm": 3.874617622771981, "learning_rate": 6.4095849607307515e-06, "loss": 0.6274, "step": 13351 }, { "epoch": 1.8853431234114657, "grad_norm": 4.212925058159435, "learning_rate": 6.408162197591876e-06, "loss": 0.6312, "step": 13352 }, { "epoch": 1.8854843264614516, "grad_norm": 3.447109987579968, "learning_rate": 6.4067395179257664e-06, "loss": 0.5484, "step": 13353 }, { "epoch": 1.8856255295114375, "grad_norm": 4.268878414795226, "learning_rate": 6.405316921765472e-06, "loss": 0.5823, "step": 13354 }, { "epoch": 1.8857667325614234, "grad_norm": 3.0567200743393896, "learning_rate": 6.403894409144063e-06, "loss": 0.389, "step": 13355 }, { "epoch": 1.8859079356114092, "grad_norm": 2.9440503096902924, "learning_rate": 6.402471980094594e-06, "loss": 0.4489, "step": 13356 }, { "epoch": 1.8860491386613951, "grad_norm": 3.612524873968128, "learning_rate": 6.401049634650119e-06, "loss": 0.6261, "step": 13357 }, { "epoch": 1.886190341711381, "grad_norm": 3.566531938413125, "learning_rate": 6.399627372843699e-06, "loss": 0.5638, "step": 13358 }, { "epoch": 1.8863315447613669, "grad_norm": 3.7667542635566487, "learning_rate": 6.398205194708385e-06, "loss": 0.5159, "step": 13359 }, { "epoch": 1.8864727478113528, "grad_norm": 2.8115233745554233, "learning_rate": 6.396783100277224e-06, "loss": 0.4389, "step": 13360 }, { "epoch": 1.8866139508613387, "grad_norm": 3.4145266613884444, "learning_rate": 6.395361089583271e-06, "loss": 0.5224, "step": 13361 }, { "epoch": 1.8867551539113245, "grad_norm": 4.156954556308672, "learning_rate": 6.3939391626595704e-06, "loss": 0.6595, "step": 13362 }, { "epoch": 1.8868963569613104, "grad_norm": 4.212161359871705, "learning_rate": 6.3925173195391645e-06, "loss": 0.5791, "step": 13363 }, { "epoch": 1.8870375600112963, "grad_norm": 5.311981936263858, "learning_rate": 6.391095560255098e-06, "loss": 0.6401, "step": 13364 }, { "epoch": 1.8871787630612822, "grad_norm": 4.110452448564788, "learning_rate": 6.389673884840413e-06, "loss": 0.5758, "step": 13365 }, { "epoch": 1.887319966111268, "grad_norm": 4.969491231149269, "learning_rate": 6.388252293328143e-06, "loss": 0.7883, "step": 13366 }, { "epoch": 1.887461169161254, "grad_norm": 4.534689742710986, "learning_rate": 6.386830785751335e-06, "loss": 0.6616, "step": 13367 }, { "epoch": 1.8876023722112398, "grad_norm": 3.3363685844751396, "learning_rate": 6.385409362143018e-06, "loss": 0.5508, "step": 13368 }, { "epoch": 1.8877435752612257, "grad_norm": 3.364114353876827, "learning_rate": 6.383988022536225e-06, "loss": 0.4853, "step": 13369 }, { "epoch": 1.8878847783112116, "grad_norm": 4.105589005973737, "learning_rate": 6.382566766963993e-06, "loss": 0.6143, "step": 13370 }, { "epoch": 1.8880259813611975, "grad_norm": 3.4508133735906217, "learning_rate": 6.381145595459343e-06, "loss": 0.5599, "step": 13371 }, { "epoch": 1.8881671844111834, "grad_norm": 3.9198584550309867, "learning_rate": 6.3797245080553075e-06, "loss": 0.6371, "step": 13372 }, { "epoch": 1.8883083874611692, "grad_norm": 3.653590502397578, "learning_rate": 6.37830350478491e-06, "loss": 0.5454, "step": 13373 }, { "epoch": 1.8884495905111551, "grad_norm": 3.3059162575717247, "learning_rate": 6.376882585681174e-06, "loss": 0.5282, "step": 13374 }, { "epoch": 1.888590793561141, "grad_norm": 3.77895189424575, "learning_rate": 6.375461750777125e-06, "loss": 0.5425, "step": 13375 }, { "epoch": 1.888731996611127, "grad_norm": 3.534869872056998, "learning_rate": 6.374041000105779e-06, "loss": 0.5993, "step": 13376 }, { "epoch": 1.8888731996611128, "grad_norm": 3.7090006503191604, "learning_rate": 6.372620333700155e-06, "loss": 0.6007, "step": 13377 }, { "epoch": 1.8890144027110987, "grad_norm": 4.467466439209398, "learning_rate": 6.371199751593264e-06, "loss": 0.6743, "step": 13378 }, { "epoch": 1.8891556057610845, "grad_norm": 3.361240355155478, "learning_rate": 6.369779253818129e-06, "loss": 0.5003, "step": 13379 }, { "epoch": 1.8892968088110704, "grad_norm": 3.3261773652558357, "learning_rate": 6.368358840407754e-06, "loss": 0.4674, "step": 13380 }, { "epoch": 1.8894380118610563, "grad_norm": 3.3375334961220204, "learning_rate": 6.366938511395151e-06, "loss": 0.4981, "step": 13381 }, { "epoch": 1.8895792149110422, "grad_norm": 3.261397900985418, "learning_rate": 6.365518266813327e-06, "loss": 0.5072, "step": 13382 }, { "epoch": 1.889720417961028, "grad_norm": 3.107114023979929, "learning_rate": 6.364098106695289e-06, "loss": 0.486, "step": 13383 }, { "epoch": 1.889861621011014, "grad_norm": 3.745012110077049, "learning_rate": 6.362678031074041e-06, "loss": 0.6393, "step": 13384 }, { "epoch": 1.8900028240609998, "grad_norm": 3.041161686856863, "learning_rate": 6.361258039982585e-06, "loss": 0.5068, "step": 13385 }, { "epoch": 1.8901440271109857, "grad_norm": 3.5355806860337577, "learning_rate": 6.35983813345392e-06, "loss": 0.5554, "step": 13386 }, { "epoch": 1.8902852301609716, "grad_norm": 3.5259525200471895, "learning_rate": 6.358418311521048e-06, "loss": 0.5319, "step": 13387 }, { "epoch": 1.8904264332109575, "grad_norm": 3.5140474123647887, "learning_rate": 6.3569985742169594e-06, "loss": 0.6568, "step": 13388 }, { "epoch": 1.8905676362609434, "grad_norm": 3.4902702535237795, "learning_rate": 6.35557892157465e-06, "loss": 0.5388, "step": 13389 }, { "epoch": 1.8907088393109293, "grad_norm": 3.771715764451454, "learning_rate": 6.354159353627114e-06, "loss": 0.5914, "step": 13390 }, { "epoch": 1.8908500423609151, "grad_norm": 3.8114287716261535, "learning_rate": 6.352739870407336e-06, "loss": 0.577, "step": 13391 }, { "epoch": 1.890991245410901, "grad_norm": 3.7969442538094054, "learning_rate": 6.351320471948313e-06, "loss": 0.6118, "step": 13392 }, { "epoch": 1.891132448460887, "grad_norm": 3.350525658951351, "learning_rate": 6.349901158283025e-06, "loss": 0.5232, "step": 13393 }, { "epoch": 1.8912736515108728, "grad_norm": 3.8250529579809354, "learning_rate": 6.348481929444458e-06, "loss": 0.6286, "step": 13394 }, { "epoch": 1.8914148545608587, "grad_norm": 3.6061428150213355, "learning_rate": 6.347062785465592e-06, "loss": 0.5634, "step": 13395 }, { "epoch": 1.8915560576108446, "grad_norm": 3.8374964313894924, "learning_rate": 6.345643726379416e-06, "loss": 0.6316, "step": 13396 }, { "epoch": 1.8916972606608304, "grad_norm": 3.5551111375087237, "learning_rate": 6.344224752218894e-06, "loss": 0.5392, "step": 13397 }, { "epoch": 1.891838463710816, "grad_norm": 4.037649865688388, "learning_rate": 6.342805863017012e-06, "loss": 0.6602, "step": 13398 }, { "epoch": 1.891979666760802, "grad_norm": 3.2323793183035625, "learning_rate": 6.341387058806742e-06, "loss": 0.4729, "step": 13399 }, { "epoch": 1.8921208698107879, "grad_norm": 4.072824109278964, "learning_rate": 6.339968339621056e-06, "loss": 0.6628, "step": 13400 }, { "epoch": 1.8922620728607737, "grad_norm": 3.304145602212345, "learning_rate": 6.338549705492929e-06, "loss": 0.4996, "step": 13401 }, { "epoch": 1.8924032759107596, "grad_norm": 2.7814762602251135, "learning_rate": 6.337131156455323e-06, "loss": 0.437, "step": 13402 }, { "epoch": 1.8925444789607455, "grad_norm": 2.7690478026777243, "learning_rate": 6.335712692541205e-06, "loss": 0.397, "step": 13403 }, { "epoch": 1.8926856820107314, "grad_norm": 2.9162725628990978, "learning_rate": 6.334294313783549e-06, "loss": 0.3686, "step": 13404 }, { "epoch": 1.8928268850607173, "grad_norm": 3.361020902938387, "learning_rate": 6.332876020215303e-06, "loss": 0.5153, "step": 13405 }, { "epoch": 1.8929680881107032, "grad_norm": 3.7145787619061514, "learning_rate": 6.331457811869437e-06, "loss": 0.6155, "step": 13406 }, { "epoch": 1.893109291160689, "grad_norm": 3.3854951646953317, "learning_rate": 6.3300396887789064e-06, "loss": 0.5122, "step": 13407 }, { "epoch": 1.893250494210675, "grad_norm": 3.8256994856819633, "learning_rate": 6.32862165097667e-06, "loss": 0.4585, "step": 13408 }, { "epoch": 1.8933916972606608, "grad_norm": 4.322051634239428, "learning_rate": 6.327203698495677e-06, "loss": 0.5188, "step": 13409 }, { "epoch": 1.8935329003106467, "grad_norm": 3.2812758860466373, "learning_rate": 6.325785831368887e-06, "loss": 0.5173, "step": 13410 }, { "epoch": 1.8936741033606326, "grad_norm": 3.5247042201853533, "learning_rate": 6.324368049629247e-06, "loss": 0.4447, "step": 13411 }, { "epoch": 1.8938153064106185, "grad_norm": 3.742815567859955, "learning_rate": 6.322950353309705e-06, "loss": 0.5492, "step": 13412 }, { "epoch": 1.8939565094606043, "grad_norm": 3.305856478380341, "learning_rate": 6.321532742443214e-06, "loss": 0.4154, "step": 13413 }, { "epoch": 1.8940977125105902, "grad_norm": 4.428474304286832, "learning_rate": 6.320115217062708e-06, "loss": 0.61, "step": 13414 }, { "epoch": 1.894238915560576, "grad_norm": 3.6185164347820997, "learning_rate": 6.318697777201136e-06, "loss": 0.4654, "step": 13415 }, { "epoch": 1.894380118610562, "grad_norm": 3.9393518817696633, "learning_rate": 6.317280422891439e-06, "loss": 0.5228, "step": 13416 }, { "epoch": 1.8945213216605479, "grad_norm": 3.686562790667477, "learning_rate": 6.315863154166552e-06, "loss": 0.5992, "step": 13417 }, { "epoch": 1.8946625247105338, "grad_norm": 2.8924763590674396, "learning_rate": 6.314445971059416e-06, "loss": 0.4297, "step": 13418 }, { "epoch": 1.8948037277605196, "grad_norm": 3.857662275882954, "learning_rate": 6.313028873602964e-06, "loss": 0.6117, "step": 13419 }, { "epoch": 1.8949449308105055, "grad_norm": 3.682311142218315, "learning_rate": 6.311611861830129e-06, "loss": 0.4986, "step": 13420 }, { "epoch": 1.8950861338604914, "grad_norm": 3.500187988638975, "learning_rate": 6.3101949357738435e-06, "loss": 0.4973, "step": 13421 }, { "epoch": 1.8952273369104773, "grad_norm": 3.392362276039131, "learning_rate": 6.3087780954670306e-06, "loss": 0.5516, "step": 13422 }, { "epoch": 1.8953685399604632, "grad_norm": 4.096021084596819, "learning_rate": 6.307361340942623e-06, "loss": 0.5851, "step": 13423 }, { "epoch": 1.895509743010449, "grad_norm": 3.316633320338202, "learning_rate": 6.305944672233542e-06, "loss": 0.5026, "step": 13424 }, { "epoch": 1.895650946060435, "grad_norm": 4.3752702098309335, "learning_rate": 6.304528089372712e-06, "loss": 0.7208, "step": 13425 }, { "epoch": 1.8957921491104208, "grad_norm": 4.104722949167642, "learning_rate": 6.303111592393051e-06, "loss": 0.6428, "step": 13426 }, { "epoch": 1.8959333521604067, "grad_norm": 3.3739364782247003, "learning_rate": 6.301695181327482e-06, "loss": 0.5557, "step": 13427 }, { "epoch": 1.8960745552103926, "grad_norm": 3.8451021853390897, "learning_rate": 6.300278856208919e-06, "loss": 0.6002, "step": 13428 }, { "epoch": 1.8962157582603785, "grad_norm": 3.6901140238572223, "learning_rate": 6.298862617070277e-06, "loss": 0.5857, "step": 13429 }, { "epoch": 1.8963569613103644, "grad_norm": 3.5633327254260756, "learning_rate": 6.2974464639444745e-06, "loss": 0.5642, "step": 13430 }, { "epoch": 1.8964981643603502, "grad_norm": 2.858638542833089, "learning_rate": 6.2960303968644125e-06, "loss": 0.4307, "step": 13431 }, { "epoch": 1.896639367410336, "grad_norm": 3.3250048953514986, "learning_rate": 6.294614415863005e-06, "loss": 0.4451, "step": 13432 }, { "epoch": 1.8967805704603218, "grad_norm": 4.135491620831202, "learning_rate": 6.2931985209731586e-06, "loss": 0.6512, "step": 13433 }, { "epoch": 1.8969217735103077, "grad_norm": 4.24784567898859, "learning_rate": 6.291782712227776e-06, "loss": 0.563, "step": 13434 }, { "epoch": 1.8970629765602935, "grad_norm": 4.115088763429305, "learning_rate": 6.2903669896597645e-06, "loss": 0.5511, "step": 13435 }, { "epoch": 1.8972041796102794, "grad_norm": 3.448432054760496, "learning_rate": 6.28895135330202e-06, "loss": 0.5025, "step": 13436 }, { "epoch": 1.8973453826602653, "grad_norm": 3.4798574006570493, "learning_rate": 6.287535803187446e-06, "loss": 0.454, "step": 13437 }, { "epoch": 1.8974865857102512, "grad_norm": 3.711726181863225, "learning_rate": 6.286120339348935e-06, "loss": 0.5461, "step": 13438 }, { "epoch": 1.897627788760237, "grad_norm": 4.103743635482809, "learning_rate": 6.284704961819385e-06, "loss": 0.5905, "step": 13439 }, { "epoch": 1.897768991810223, "grad_norm": 3.4591780582248086, "learning_rate": 6.283289670631684e-06, "loss": 0.4903, "step": 13440 }, { "epoch": 1.8979101948602088, "grad_norm": 3.1723619915091064, "learning_rate": 6.281874465818727e-06, "loss": 0.422, "step": 13441 }, { "epoch": 1.8980513979101947, "grad_norm": 3.9345175956355085, "learning_rate": 6.280459347413402e-06, "loss": 0.5671, "step": 13442 }, { "epoch": 1.8981926009601806, "grad_norm": 4.293579203310036, "learning_rate": 6.279044315448595e-06, "loss": 0.5236, "step": 13443 }, { "epoch": 1.8983338040101665, "grad_norm": 3.5823078507523607, "learning_rate": 6.277629369957191e-06, "loss": 0.577, "step": 13444 }, { "epoch": 1.8984750070601524, "grad_norm": 3.477605356948143, "learning_rate": 6.276214510972074e-06, "loss": 0.4667, "step": 13445 }, { "epoch": 1.8986162101101383, "grad_norm": 3.59319449585428, "learning_rate": 6.274799738526125e-06, "loss": 0.5796, "step": 13446 }, { "epoch": 1.8987574131601241, "grad_norm": 3.8654935757970623, "learning_rate": 6.273385052652221e-06, "loss": 0.5569, "step": 13447 }, { "epoch": 1.89889861621011, "grad_norm": 3.4531695729438394, "learning_rate": 6.271970453383235e-06, "loss": 0.5127, "step": 13448 }, { "epoch": 1.899039819260096, "grad_norm": 3.3825740334469367, "learning_rate": 6.270555940752048e-06, "loss": 0.5218, "step": 13449 }, { "epoch": 1.8991810223100818, "grad_norm": 3.479506791495596, "learning_rate": 6.2691415147915306e-06, "loss": 0.5321, "step": 13450 }, { "epoch": 1.8993222253600677, "grad_norm": 3.5457010359204455, "learning_rate": 6.267727175534553e-06, "loss": 0.5192, "step": 13451 }, { "epoch": 1.8994634284100536, "grad_norm": 3.03673595400293, "learning_rate": 6.266312923013983e-06, "loss": 0.385, "step": 13452 }, { "epoch": 1.8996046314600394, "grad_norm": 3.240206378492022, "learning_rate": 6.26489875726269e-06, "loss": 0.4957, "step": 13453 }, { "epoch": 1.8997458345100253, "grad_norm": 3.194625803251091, "learning_rate": 6.263484678313536e-06, "loss": 0.4624, "step": 13454 }, { "epoch": 1.8998870375600112, "grad_norm": 3.3929030867402874, "learning_rate": 6.262070686199386e-06, "loss": 0.521, "step": 13455 }, { "epoch": 1.900028240609997, "grad_norm": 2.7643868637121054, "learning_rate": 6.260656780953098e-06, "loss": 0.4259, "step": 13456 }, { "epoch": 1.900169443659983, "grad_norm": 3.9243369454838475, "learning_rate": 6.2592429626075275e-06, "loss": 0.602, "step": 13457 }, { "epoch": 1.9003106467099689, "grad_norm": 3.6633559492120233, "learning_rate": 6.25782923119554e-06, "loss": 0.5963, "step": 13458 }, { "epoch": 1.9004518497599547, "grad_norm": 3.7349347708267544, "learning_rate": 6.2564155867499845e-06, "loss": 0.6204, "step": 13459 }, { "epoch": 1.9005930528099406, "grad_norm": 3.489559172484282, "learning_rate": 6.2550020293037095e-06, "loss": 0.4741, "step": 13460 }, { "epoch": 1.9007342558599265, "grad_norm": 3.1411824620365554, "learning_rate": 6.253588558889574e-06, "loss": 0.4453, "step": 13461 }, { "epoch": 1.9008754589099124, "grad_norm": 2.9236731460592265, "learning_rate": 6.2521751755404226e-06, "loss": 0.4667, "step": 13462 }, { "epoch": 1.9010166619598983, "grad_norm": 2.6878718262365084, "learning_rate": 6.250761879289103e-06, "loss": 0.3701, "step": 13463 }, { "epoch": 1.9011578650098842, "grad_norm": 4.042627706642249, "learning_rate": 6.2493486701684556e-06, "loss": 0.5441, "step": 13464 }, { "epoch": 1.90129906805987, "grad_norm": 4.404790604062429, "learning_rate": 6.247935548211324e-06, "loss": 0.572, "step": 13465 }, { "epoch": 1.901440271109856, "grad_norm": 3.3656212831646624, "learning_rate": 6.246522513450552e-06, "loss": 0.5335, "step": 13466 }, { "epoch": 1.9015814741598418, "grad_norm": 4.577944759719374, "learning_rate": 6.245109565918976e-06, "loss": 0.6387, "step": 13467 }, { "epoch": 1.9017226772098277, "grad_norm": 2.9637291767477945, "learning_rate": 6.243696705649432e-06, "loss": 0.3523, "step": 13468 }, { "epoch": 1.9018638802598136, "grad_norm": 3.2359656238349226, "learning_rate": 6.242283932674752e-06, "loss": 0.483, "step": 13469 }, { "epoch": 1.9020050833097994, "grad_norm": 3.55124655840089, "learning_rate": 6.240871247027774e-06, "loss": 0.5854, "step": 13470 }, { "epoch": 1.9021462863597853, "grad_norm": 3.235279043523479, "learning_rate": 6.2394586487413225e-06, "loss": 0.4653, "step": 13471 }, { "epoch": 1.9022874894097712, "grad_norm": 3.483502072499075, "learning_rate": 6.23804613784823e-06, "loss": 0.555, "step": 13472 }, { "epoch": 1.902428692459757, "grad_norm": 3.5061307283348504, "learning_rate": 6.23663371438132e-06, "loss": 0.4853, "step": 13473 }, { "epoch": 1.902569895509743, "grad_norm": 2.9753120736905108, "learning_rate": 6.235221378373415e-06, "loss": 0.4698, "step": 13474 }, { "epoch": 1.9027110985597289, "grad_norm": 3.4874116829860085, "learning_rate": 6.23380912985734e-06, "loss": 0.5942, "step": 13475 }, { "epoch": 1.9028523016097147, "grad_norm": 3.932341691813281, "learning_rate": 6.232396968865916e-06, "loss": 0.5674, "step": 13476 }, { "epoch": 1.9029935046597006, "grad_norm": 3.197416839462005, "learning_rate": 6.230984895431957e-06, "loss": 0.5397, "step": 13477 }, { "epoch": 1.9031347077096865, "grad_norm": 3.901535172890022, "learning_rate": 6.229572909588282e-06, "loss": 0.528, "step": 13478 }, { "epoch": 1.9032759107596724, "grad_norm": 3.5367989820110792, "learning_rate": 6.228161011367706e-06, "loss": 0.5403, "step": 13479 }, { "epoch": 1.9034171138096583, "grad_norm": 4.435100017125884, "learning_rate": 6.2267492008030395e-06, "loss": 0.8056, "step": 13480 }, { "epoch": 1.9035583168596442, "grad_norm": 3.287977744577752, "learning_rate": 6.225337477927092e-06, "loss": 0.4601, "step": 13481 }, { "epoch": 1.90369951990963, "grad_norm": 3.806724059770396, "learning_rate": 6.223925842772668e-06, "loss": 0.5308, "step": 13482 }, { "epoch": 1.903840722959616, "grad_norm": 3.148508643615088, "learning_rate": 6.222514295372579e-06, "loss": 0.4877, "step": 13483 }, { "epoch": 1.9039819260096018, "grad_norm": 3.804527430435709, "learning_rate": 6.221102835759626e-06, "loss": 0.4942, "step": 13484 }, { "epoch": 1.9041231290595877, "grad_norm": 3.673156309053446, "learning_rate": 6.21969146396661e-06, "loss": 0.4812, "step": 13485 }, { "epoch": 1.9042643321095736, "grad_norm": 3.749173471566194, "learning_rate": 6.2182801800263325e-06, "loss": 0.5813, "step": 13486 }, { "epoch": 1.9044055351595595, "grad_norm": 3.6173725295608086, "learning_rate": 6.216868983971591e-06, "loss": 0.6701, "step": 13487 }, { "epoch": 1.9045467382095453, "grad_norm": 3.7015405642538046, "learning_rate": 6.21545787583518e-06, "loss": 0.6082, "step": 13488 }, { "epoch": 1.9046879412595312, "grad_norm": 4.184283313805566, "learning_rate": 6.214046855649898e-06, "loss": 0.6809, "step": 13489 }, { "epoch": 1.904829144309517, "grad_norm": 3.781968486114172, "learning_rate": 6.212635923448526e-06, "loss": 0.6126, "step": 13490 }, { "epoch": 1.904970347359503, "grad_norm": 4.162485942149802, "learning_rate": 6.211225079263861e-06, "loss": 0.5773, "step": 13491 }, { "epoch": 1.9051115504094889, "grad_norm": 4.480103004071899, "learning_rate": 6.209814323128689e-06, "loss": 0.5347, "step": 13492 }, { "epoch": 1.9052527534594748, "grad_norm": 3.8877504395548463, "learning_rate": 6.208403655075797e-06, "loss": 0.5051, "step": 13493 }, { "epoch": 1.9053939565094606, "grad_norm": 3.648369398878296, "learning_rate": 6.2069930751379635e-06, "loss": 0.5209, "step": 13494 }, { "epoch": 1.9055351595594465, "grad_norm": 3.2069234380379816, "learning_rate": 6.205582583347974e-06, "loss": 0.5022, "step": 13495 }, { "epoch": 1.9056763626094324, "grad_norm": 3.9214667493410396, "learning_rate": 6.2041721797386075e-06, "loss": 0.6172, "step": 13496 }, { "epoch": 1.9058175656594183, "grad_norm": 3.6824811558907316, "learning_rate": 6.2027618643426425e-06, "loss": 0.4731, "step": 13497 }, { "epoch": 1.9059587687094042, "grad_norm": 4.214602276313725, "learning_rate": 6.201351637192849e-06, "loss": 0.5478, "step": 13498 }, { "epoch": 1.90609997175939, "grad_norm": 3.5293459118156787, "learning_rate": 6.199941498322004e-06, "loss": 0.4591, "step": 13499 }, { "epoch": 1.906241174809376, "grad_norm": 4.807394649678139, "learning_rate": 6.198531447762875e-06, "loss": 0.7148, "step": 13500 }, { "epoch": 1.9063823778593618, "grad_norm": 3.632114884957227, "learning_rate": 6.1971214855482356e-06, "loss": 0.603, "step": 13501 }, { "epoch": 1.9065235809093477, "grad_norm": 3.551633007477498, "learning_rate": 6.195711611710851e-06, "loss": 0.5242, "step": 13502 }, { "epoch": 1.9066647839593336, "grad_norm": 3.661535431365538, "learning_rate": 6.194301826283482e-06, "loss": 0.5458, "step": 13503 }, { "epoch": 1.9068059870093195, "grad_norm": 3.1531597283656896, "learning_rate": 6.192892129298898e-06, "loss": 0.4651, "step": 13504 }, { "epoch": 1.9069471900593054, "grad_norm": 3.8327274881705953, "learning_rate": 6.1914825207898566e-06, "loss": 0.5357, "step": 13505 }, { "epoch": 1.9070883931092912, "grad_norm": 3.8559468524331457, "learning_rate": 6.1900730007891185e-06, "loss": 0.6479, "step": 13506 }, { "epoch": 1.9072295961592771, "grad_norm": 3.0357380017259485, "learning_rate": 6.188663569329437e-06, "loss": 0.4086, "step": 13507 }, { "epoch": 1.907370799209263, "grad_norm": 3.8883329931847057, "learning_rate": 6.187254226443566e-06, "loss": 0.5999, "step": 13508 }, { "epoch": 1.9075120022592489, "grad_norm": 4.577885023170147, "learning_rate": 6.185844972164262e-06, "loss": 0.7173, "step": 13509 }, { "epoch": 1.9076532053092348, "grad_norm": 3.413859526654456, "learning_rate": 6.184435806524274e-06, "loss": 0.4953, "step": 13510 }, { "epoch": 1.9077944083592207, "grad_norm": 3.262064976748346, "learning_rate": 6.18302672955635e-06, "loss": 0.4225, "step": 13511 }, { "epoch": 1.9079356114092065, "grad_norm": 2.965394349435497, "learning_rate": 6.181617741293234e-06, "loss": 0.4792, "step": 13512 }, { "epoch": 1.9080768144591924, "grad_norm": 3.1037880051798457, "learning_rate": 6.180208841767674e-06, "loss": 0.5114, "step": 13513 }, { "epoch": 1.9082180175091783, "grad_norm": 3.8124164331738823, "learning_rate": 6.178800031012414e-06, "loss": 0.5502, "step": 13514 }, { "epoch": 1.9083592205591642, "grad_norm": 3.111656675250065, "learning_rate": 6.1773913090601885e-06, "loss": 0.481, "step": 13515 }, { "epoch": 1.90850042360915, "grad_norm": 2.999046367837904, "learning_rate": 6.175982675943739e-06, "loss": 0.4042, "step": 13516 }, { "epoch": 1.908641626659136, "grad_norm": 3.0739467931061895, "learning_rate": 6.174574131695797e-06, "loss": 0.4278, "step": 13517 }, { "epoch": 1.9087828297091218, "grad_norm": 3.3979989662420262, "learning_rate": 6.173165676349103e-06, "loss": 0.5961, "step": 13518 }, { "epoch": 1.9089240327591077, "grad_norm": 3.091348252451232, "learning_rate": 6.1717573099363856e-06, "loss": 0.4602, "step": 13519 }, { "epoch": 1.9090652358090936, "grad_norm": 3.717554374266332, "learning_rate": 6.1703490324903745e-06, "loss": 0.5919, "step": 13520 }, { "epoch": 1.9092064388590795, "grad_norm": 3.470340719364818, "learning_rate": 6.168940844043798e-06, "loss": 0.4467, "step": 13521 }, { "epoch": 1.9093476419090654, "grad_norm": 4.318080180480572, "learning_rate": 6.167532744629383e-06, "loss": 0.6636, "step": 13522 }, { "epoch": 1.9094888449590512, "grad_norm": 3.394017725245674, "learning_rate": 6.166124734279855e-06, "loss": 0.489, "step": 13523 }, { "epoch": 1.9096300480090371, "grad_norm": 3.1723969993033285, "learning_rate": 6.164716813027929e-06, "loss": 0.3914, "step": 13524 }, { "epoch": 1.909771251059023, "grad_norm": 3.515909400637687, "learning_rate": 6.163308980906325e-06, "loss": 0.5366, "step": 13525 }, { "epoch": 1.909912454109009, "grad_norm": 4.007155349360425, "learning_rate": 6.161901237947768e-06, "loss": 0.4827, "step": 13526 }, { "epoch": 1.9100536571589948, "grad_norm": 3.969105619031376, "learning_rate": 6.160493584184966e-06, "loss": 0.6022, "step": 13527 }, { "epoch": 1.9101948602089807, "grad_norm": 3.8830887341165017, "learning_rate": 6.159086019650635e-06, "loss": 0.5606, "step": 13528 }, { "epoch": 1.9103360632589665, "grad_norm": 3.244836146085893, "learning_rate": 6.157678544377486e-06, "loss": 0.5403, "step": 13529 }, { "epoch": 1.9104772663089524, "grad_norm": 3.2302744983757865, "learning_rate": 6.15627115839823e-06, "loss": 0.4633, "step": 13530 }, { "epoch": 1.9106184693589383, "grad_norm": 3.313051933301295, "learning_rate": 6.154863861745572e-06, "loss": 0.4123, "step": 13531 }, { "epoch": 1.9107596724089242, "grad_norm": 3.6637467968928346, "learning_rate": 6.1534566544522175e-06, "loss": 0.4728, "step": 13532 }, { "epoch": 1.91090087545891, "grad_norm": 3.4365651866362437, "learning_rate": 6.152049536550869e-06, "loss": 0.5196, "step": 13533 }, { "epoch": 1.911042078508896, "grad_norm": 4.673104483537478, "learning_rate": 6.150642508074225e-06, "loss": 0.785, "step": 13534 }, { "epoch": 1.9111832815588816, "grad_norm": 3.0815360384651327, "learning_rate": 6.149235569054989e-06, "loss": 0.4526, "step": 13535 }, { "epoch": 1.9113244846088675, "grad_norm": 3.7679388215482086, "learning_rate": 6.1478287195258545e-06, "loss": 0.5181, "step": 13536 }, { "epoch": 1.9114656876588534, "grad_norm": 3.502537151915502, "learning_rate": 6.1464219595195165e-06, "loss": 0.4431, "step": 13537 }, { "epoch": 1.9116068907088393, "grad_norm": 3.045800990373655, "learning_rate": 6.1450152890686684e-06, "loss": 0.4536, "step": 13538 }, { "epoch": 1.9117480937588252, "grad_norm": 3.2758310070994097, "learning_rate": 6.1436087082060016e-06, "loss": 0.477, "step": 13539 }, { "epoch": 1.911889296808811, "grad_norm": 3.9009353783444776, "learning_rate": 6.142202216964204e-06, "loss": 0.5623, "step": 13540 }, { "epoch": 1.912030499858797, "grad_norm": 2.872219961471856, "learning_rate": 6.1407958153759584e-06, "loss": 0.4414, "step": 13541 }, { "epoch": 1.9121717029087828, "grad_norm": 3.2577917763885544, "learning_rate": 6.139389503473951e-06, "loss": 0.4844, "step": 13542 }, { "epoch": 1.9123129059587687, "grad_norm": 3.3329042612416893, "learning_rate": 6.1379832812908645e-06, "loss": 0.5126, "step": 13543 }, { "epoch": 1.9124541090087546, "grad_norm": 3.553803280191966, "learning_rate": 6.13657714885938e-06, "loss": 0.5851, "step": 13544 }, { "epoch": 1.9125953120587404, "grad_norm": 3.720635014566452, "learning_rate": 6.135171106212172e-06, "loss": 0.5827, "step": 13545 }, { "epoch": 1.9127365151087263, "grad_norm": 3.9525363796902173, "learning_rate": 6.133765153381918e-06, "loss": 0.6083, "step": 13546 }, { "epoch": 1.9128777181587122, "grad_norm": 3.0726411828982223, "learning_rate": 6.132359290401294e-06, "loss": 0.4169, "step": 13547 }, { "epoch": 1.913018921208698, "grad_norm": 2.9150093956573593, "learning_rate": 6.130953517302972e-06, "loss": 0.4028, "step": 13548 }, { "epoch": 1.913160124258684, "grad_norm": 3.6550136124451797, "learning_rate": 6.129547834119618e-06, "loss": 0.6039, "step": 13549 }, { "epoch": 1.9133013273086699, "grad_norm": 3.3243467567096157, "learning_rate": 6.128142240883899e-06, "loss": 0.5308, "step": 13550 }, { "epoch": 1.9134425303586557, "grad_norm": 3.549920782020388, "learning_rate": 6.1267367376284806e-06, "loss": 0.5279, "step": 13551 }, { "epoch": 1.9135837334086416, "grad_norm": 3.2352285461853185, "learning_rate": 6.12533132438603e-06, "loss": 0.5002, "step": 13552 }, { "epoch": 1.9137249364586275, "grad_norm": 3.2576633826634995, "learning_rate": 6.1239260011892065e-06, "loss": 0.4877, "step": 13553 }, { "epoch": 1.9138661395086134, "grad_norm": 2.8374728833586733, "learning_rate": 6.122520768070669e-06, "loss": 0.3855, "step": 13554 }, { "epoch": 1.9140073425585993, "grad_norm": 4.410852843141075, "learning_rate": 6.121115625063072e-06, "loss": 0.5851, "step": 13555 }, { "epoch": 1.9141485456085852, "grad_norm": 3.209414470374029, "learning_rate": 6.119710572199075e-06, "loss": 0.366, "step": 13556 }, { "epoch": 1.914289748658571, "grad_norm": 3.2890412988106603, "learning_rate": 6.11830560951133e-06, "loss": 0.4857, "step": 13557 }, { "epoch": 1.914430951708557, "grad_norm": 3.5286376712353222, "learning_rate": 6.116900737032484e-06, "loss": 0.5163, "step": 13558 }, { "epoch": 1.9145721547585428, "grad_norm": 3.2451366572615563, "learning_rate": 6.115495954795187e-06, "loss": 0.5468, "step": 13559 }, { "epoch": 1.9147133578085287, "grad_norm": 2.8526481162127744, "learning_rate": 6.114091262832087e-06, "loss": 0.4907, "step": 13560 }, { "epoch": 1.9148545608585146, "grad_norm": 3.1051335601721703, "learning_rate": 6.1126866611758285e-06, "loss": 0.4752, "step": 13561 }, { "epoch": 1.9149957639085005, "grad_norm": 4.269662888694978, "learning_rate": 6.111282149859052e-06, "loss": 0.664, "step": 13562 }, { "epoch": 1.9151369669584863, "grad_norm": 3.2042839713201983, "learning_rate": 6.109877728914399e-06, "loss": 0.473, "step": 13563 }, { "epoch": 1.9152781700084722, "grad_norm": 3.858288973332044, "learning_rate": 6.108473398374509e-06, "loss": 0.5264, "step": 13564 }, { "epoch": 1.915419373058458, "grad_norm": 3.1049400498448203, "learning_rate": 6.107069158272018e-06, "loss": 0.4681, "step": 13565 }, { "epoch": 1.915560576108444, "grad_norm": 3.610705612551678, "learning_rate": 6.105665008639557e-06, "loss": 0.5476, "step": 13566 }, { "epoch": 1.9157017791584299, "grad_norm": 3.6442190961591105, "learning_rate": 6.1042609495097595e-06, "loss": 0.6075, "step": 13567 }, { "epoch": 1.9158429822084158, "grad_norm": 3.6549497499715256, "learning_rate": 6.102856980915253e-06, "loss": 0.5278, "step": 13568 }, { "epoch": 1.9159841852584014, "grad_norm": 3.582281737849151, "learning_rate": 6.101453102888668e-06, "loss": 0.6147, "step": 13569 }, { "epoch": 1.9161253883083873, "grad_norm": 3.019319900328289, "learning_rate": 6.10004931546263e-06, "loss": 0.4609, "step": 13570 }, { "epoch": 1.9162665913583732, "grad_norm": 3.390737869258694, "learning_rate": 6.098645618669763e-06, "loss": 0.471, "step": 13571 }, { "epoch": 1.916407794408359, "grad_norm": 2.620801538705652, "learning_rate": 6.097242012542683e-06, "loss": 0.4171, "step": 13572 }, { "epoch": 1.916548997458345, "grad_norm": 3.4813054064006717, "learning_rate": 6.095838497114017e-06, "loss": 0.5203, "step": 13573 }, { "epoch": 1.9166902005083308, "grad_norm": 4.227271443361867, "learning_rate": 6.094435072416379e-06, "loss": 0.7635, "step": 13574 }, { "epoch": 1.9168314035583167, "grad_norm": 3.2913629658695016, "learning_rate": 6.093031738482382e-06, "loss": 0.4179, "step": 13575 }, { "epoch": 1.9169726066083026, "grad_norm": 3.810538845406756, "learning_rate": 6.091628495344639e-06, "loss": 0.5746, "step": 13576 }, { "epoch": 1.9171138096582885, "grad_norm": 3.0652061665347605, "learning_rate": 6.090225343035761e-06, "loss": 0.4449, "step": 13577 }, { "epoch": 1.9172550127082744, "grad_norm": 3.3391346481787543, "learning_rate": 6.088822281588359e-06, "loss": 0.4395, "step": 13578 }, { "epoch": 1.9173962157582602, "grad_norm": 3.478485502970465, "learning_rate": 6.087419311035037e-06, "loss": 0.4929, "step": 13579 }, { "epoch": 1.9175374188082461, "grad_norm": 3.0885636705685817, "learning_rate": 6.0860164314084e-06, "loss": 0.4982, "step": 13580 }, { "epoch": 1.917678621858232, "grad_norm": 3.181171998915513, "learning_rate": 6.084613642741053e-06, "loss": 0.4996, "step": 13581 }, { "epoch": 1.917819824908218, "grad_norm": 3.461859649805699, "learning_rate": 6.083210945065595e-06, "loss": 0.5333, "step": 13582 }, { "epoch": 1.9179610279582038, "grad_norm": 3.8161195976709013, "learning_rate": 6.081808338414622e-06, "loss": 0.704, "step": 13583 }, { "epoch": 1.9181022310081897, "grad_norm": 3.6137685291659687, "learning_rate": 6.08040582282073e-06, "loss": 0.4822, "step": 13584 }, { "epoch": 1.9182434340581755, "grad_norm": 4.619329460674928, "learning_rate": 6.079003398316513e-06, "loss": 0.7857, "step": 13585 }, { "epoch": 1.9183846371081614, "grad_norm": 3.4720373716595976, "learning_rate": 6.077601064934565e-06, "loss": 0.5376, "step": 13586 }, { "epoch": 1.9185258401581473, "grad_norm": 2.962221244630581, "learning_rate": 6.0761988227074755e-06, "loss": 0.4817, "step": 13587 }, { "epoch": 1.9186670432081332, "grad_norm": 3.759782719783537, "learning_rate": 6.074796671667829e-06, "loss": 0.5656, "step": 13588 }, { "epoch": 1.918808246258119, "grad_norm": 2.834152426238622, "learning_rate": 6.073394611848211e-06, "loss": 0.4787, "step": 13589 }, { "epoch": 1.918949449308105, "grad_norm": 3.5890071399747185, "learning_rate": 6.071992643281209e-06, "loss": 0.5092, "step": 13590 }, { "epoch": 1.9190906523580908, "grad_norm": 3.076592295672931, "learning_rate": 6.070590765999404e-06, "loss": 0.4684, "step": 13591 }, { "epoch": 1.9192318554080767, "grad_norm": 3.391661835505643, "learning_rate": 6.06918898003537e-06, "loss": 0.5366, "step": 13592 }, { "epoch": 1.9193730584580626, "grad_norm": 3.389310149671312, "learning_rate": 6.067787285421687e-06, "loss": 0.5088, "step": 13593 }, { "epoch": 1.9195142615080485, "grad_norm": 3.5710960671210703, "learning_rate": 6.066385682190928e-06, "loss": 0.5519, "step": 13594 }, { "epoch": 1.9196554645580344, "grad_norm": 3.712448714946225, "learning_rate": 6.064984170375667e-06, "loss": 0.5592, "step": 13595 }, { "epoch": 1.9197966676080203, "grad_norm": 4.762380080558613, "learning_rate": 6.063582750008474e-06, "loss": 0.6387, "step": 13596 }, { "epoch": 1.9199378706580061, "grad_norm": 3.6402732569856093, "learning_rate": 6.062181421121917e-06, "loss": 0.5151, "step": 13597 }, { "epoch": 1.920079073707992, "grad_norm": 3.6937359442486914, "learning_rate": 6.0607801837485665e-06, "loss": 0.5459, "step": 13598 }, { "epoch": 1.920220276757978, "grad_norm": 2.747885758529308, "learning_rate": 6.059379037920985e-06, "loss": 0.3568, "step": 13599 }, { "epoch": 1.9203614798079638, "grad_norm": 3.119520243051187, "learning_rate": 6.05797798367173e-06, "loss": 0.4758, "step": 13600 }, { "epoch": 1.9205026828579497, "grad_norm": 3.708726591510971, "learning_rate": 6.056577021033364e-06, "loss": 0.6379, "step": 13601 }, { "epoch": 1.9206438859079356, "grad_norm": 3.412951956597504, "learning_rate": 6.055176150038445e-06, "loss": 0.4454, "step": 13602 }, { "epoch": 1.9207850889579214, "grad_norm": 4.210896875109653, "learning_rate": 6.053775370719527e-06, "loss": 0.634, "step": 13603 }, { "epoch": 1.9209262920079073, "grad_norm": 3.7734343576831892, "learning_rate": 6.052374683109168e-06, "loss": 0.5846, "step": 13604 }, { "epoch": 1.9210674950578932, "grad_norm": 3.5785825821963813, "learning_rate": 6.050974087239914e-06, "loss": 0.5085, "step": 13605 }, { "epoch": 1.921208698107879, "grad_norm": 3.1508824152331707, "learning_rate": 6.049573583144317e-06, "loss": 0.5047, "step": 13606 }, { "epoch": 1.921349901157865, "grad_norm": 3.894142587765325, "learning_rate": 6.04817317085493e-06, "loss": 0.5758, "step": 13607 }, { "epoch": 1.9214911042078509, "grad_norm": 4.0886094707083345, "learning_rate": 6.0467728504042844e-06, "loss": 0.6074, "step": 13608 }, { "epoch": 1.9216323072578367, "grad_norm": 3.0423790584947112, "learning_rate": 6.045372621824933e-06, "loss": 0.4421, "step": 13609 }, { "epoch": 1.9217735103078226, "grad_norm": 3.2756103373293435, "learning_rate": 6.043972485149414e-06, "loss": 0.4782, "step": 13610 }, { "epoch": 1.9219147133578085, "grad_norm": 3.4497795253288377, "learning_rate": 6.042572440410263e-06, "loss": 0.4279, "step": 13611 }, { "epoch": 1.9220559164077944, "grad_norm": 4.1314523430301415, "learning_rate": 6.041172487640024e-06, "loss": 0.6322, "step": 13612 }, { "epoch": 1.9221971194577803, "grad_norm": 3.8334415761867664, "learning_rate": 6.039772626871226e-06, "loss": 0.5951, "step": 13613 }, { "epoch": 1.9223383225077662, "grad_norm": 3.238188820751821, "learning_rate": 6.038372858136401e-06, "loss": 0.4152, "step": 13614 }, { "epoch": 1.922479525557752, "grad_norm": 3.725332506953525, "learning_rate": 6.036973181468078e-06, "loss": 0.6329, "step": 13615 }, { "epoch": 1.922620728607738, "grad_norm": 3.552989396765581, "learning_rate": 6.035573596898789e-06, "loss": 0.5787, "step": 13616 }, { "epoch": 1.9227619316577238, "grad_norm": 3.8514277539827946, "learning_rate": 6.034174104461058e-06, "loss": 0.5596, "step": 13617 }, { "epoch": 1.9229031347077097, "grad_norm": 4.189230346480115, "learning_rate": 6.0327747041874075e-06, "loss": 0.5708, "step": 13618 }, { "epoch": 1.9230443377576956, "grad_norm": 3.949472483144072, "learning_rate": 6.03137539611036e-06, "loss": 0.601, "step": 13619 }, { "epoch": 1.9231855408076814, "grad_norm": 2.592700351334432, "learning_rate": 6.029976180262431e-06, "loss": 0.3689, "step": 13620 }, { "epoch": 1.9233267438576673, "grad_norm": 3.3486171412005348, "learning_rate": 6.028577056676144e-06, "loss": 0.5323, "step": 13621 }, { "epoch": 1.9234679469076532, "grad_norm": 3.507860674298758, "learning_rate": 6.027178025384011e-06, "loss": 0.5221, "step": 13622 }, { "epoch": 1.923609149957639, "grad_norm": 3.713206626805462, "learning_rate": 6.025779086418542e-06, "loss": 0.4849, "step": 13623 }, { "epoch": 1.923750353007625, "grad_norm": 4.59671376097087, "learning_rate": 6.024380239812258e-06, "loss": 0.6278, "step": 13624 }, { "epoch": 1.9238915560576109, "grad_norm": 3.592267677505597, "learning_rate": 6.022981485597654e-06, "loss": 0.5405, "step": 13625 }, { "epoch": 1.9240327591075967, "grad_norm": 2.902778541893034, "learning_rate": 6.0215828238072446e-06, "loss": 0.4044, "step": 13626 }, { "epoch": 1.9241739621575826, "grad_norm": 4.08972368951894, "learning_rate": 6.020184254473532e-06, "loss": 0.6653, "step": 13627 }, { "epoch": 1.9243151652075685, "grad_norm": 3.0409383342439167, "learning_rate": 6.018785777629017e-06, "loss": 0.4693, "step": 13628 }, { "epoch": 1.9244563682575544, "grad_norm": 3.3176786739853785, "learning_rate": 6.017387393306203e-06, "loss": 0.3918, "step": 13629 }, { "epoch": 1.9245975713075403, "grad_norm": 3.303659159818569, "learning_rate": 6.015989101537586e-06, "loss": 0.4461, "step": 13630 }, { "epoch": 1.9247387743575262, "grad_norm": 3.5229710767531186, "learning_rate": 6.014590902355662e-06, "loss": 0.6161, "step": 13631 }, { "epoch": 1.924879977407512, "grad_norm": 3.182239001361612, "learning_rate": 6.013192795792922e-06, "loss": 0.4661, "step": 13632 }, { "epoch": 1.925021180457498, "grad_norm": 3.3677548915421176, "learning_rate": 6.011794781881866e-06, "loss": 0.5341, "step": 13633 }, { "epoch": 1.9251623835074838, "grad_norm": 2.917421332280244, "learning_rate": 6.01039686065497e-06, "loss": 0.4037, "step": 13634 }, { "epoch": 1.9253035865574697, "grad_norm": 3.124174648607816, "learning_rate": 6.008999032144731e-06, "loss": 0.4526, "step": 13635 }, { "epoch": 1.9254447896074556, "grad_norm": 4.548034251473878, "learning_rate": 6.007601296383629e-06, "loss": 0.6609, "step": 13636 }, { "epoch": 1.9255859926574415, "grad_norm": 4.525794141436628, "learning_rate": 6.006203653404149e-06, "loss": 0.7639, "step": 13637 }, { "epoch": 1.9257271957074273, "grad_norm": 3.4029999943974265, "learning_rate": 6.004806103238771e-06, "loss": 0.5091, "step": 13638 }, { "epoch": 1.9258683987574132, "grad_norm": 3.8394010238866545, "learning_rate": 6.003408645919977e-06, "loss": 0.6227, "step": 13639 }, { "epoch": 1.926009601807399, "grad_norm": 3.06487560436677, "learning_rate": 6.0020112814802355e-06, "loss": 0.5039, "step": 13640 }, { "epoch": 1.926150804857385, "grad_norm": 3.9914153516355944, "learning_rate": 6.000614009952033e-06, "loss": 0.7057, "step": 13641 }, { "epoch": 1.9262920079073709, "grad_norm": 3.0072049376254864, "learning_rate": 5.9992168313678264e-06, "loss": 0.4178, "step": 13642 }, { "epoch": 1.9264332109573568, "grad_norm": 3.7759269801323927, "learning_rate": 5.997819745760094e-06, "loss": 0.5494, "step": 13643 }, { "epoch": 1.9265744140073426, "grad_norm": 4.100227751364717, "learning_rate": 5.996422753161304e-06, "loss": 0.6016, "step": 13644 }, { "epoch": 1.9267156170573285, "grad_norm": 2.9960460284968504, "learning_rate": 5.99502585360392e-06, "loss": 0.4263, "step": 13645 }, { "epoch": 1.9268568201073144, "grad_norm": 2.7244867516768427, "learning_rate": 5.993629047120403e-06, "loss": 0.4465, "step": 13646 }, { "epoch": 1.9269980231573003, "grad_norm": 4.183471642996471, "learning_rate": 5.992232333743218e-06, "loss": 0.5688, "step": 13647 }, { "epoch": 1.9271392262072862, "grad_norm": 3.439135611152873, "learning_rate": 5.990835713504825e-06, "loss": 0.517, "step": 13648 }, { "epoch": 1.927280429257272, "grad_norm": 3.911613043925635, "learning_rate": 5.989439186437674e-06, "loss": 0.5166, "step": 13649 }, { "epoch": 1.927421632307258, "grad_norm": 3.7405548788496303, "learning_rate": 5.988042752574233e-06, "loss": 0.492, "step": 13650 }, { "epoch": 1.9275628353572438, "grad_norm": 2.9889449688637675, "learning_rate": 5.9866464119469366e-06, "loss": 0.4359, "step": 13651 }, { "epoch": 1.9277040384072297, "grad_norm": 3.827210697257077, "learning_rate": 5.9852501645882485e-06, "loss": 0.5872, "step": 13652 }, { "epoch": 1.9278452414572156, "grad_norm": 3.3200150703833264, "learning_rate": 5.983854010530612e-06, "loss": 0.4655, "step": 13653 }, { "epoch": 1.9279864445072015, "grad_norm": 3.3511122022803232, "learning_rate": 5.9824579498064725e-06, "loss": 0.5472, "step": 13654 }, { "epoch": 1.9281276475571874, "grad_norm": 3.1094790300202604, "learning_rate": 5.9810619824482775e-06, "loss": 0.4448, "step": 13655 }, { "epoch": 1.9282688506071732, "grad_norm": 3.367906720988765, "learning_rate": 5.979666108488464e-06, "loss": 0.5373, "step": 13656 }, { "epoch": 1.9284100536571591, "grad_norm": 3.347579850286576, "learning_rate": 5.978270327959477e-06, "loss": 0.4802, "step": 13657 }, { "epoch": 1.928551256707145, "grad_norm": 3.4698172935243554, "learning_rate": 5.976874640893751e-06, "loss": 0.579, "step": 13658 }, { "epoch": 1.9286924597571309, "grad_norm": 3.7598719262008236, "learning_rate": 5.975479047323718e-06, "loss": 0.6637, "step": 13659 }, { "epoch": 1.9288336628071168, "grad_norm": 3.5544129080957063, "learning_rate": 5.9740835472818145e-06, "loss": 0.5076, "step": 13660 }, { "epoch": 1.9289748658571026, "grad_norm": 3.9797898829366027, "learning_rate": 5.972688140800471e-06, "loss": 0.5499, "step": 13661 }, { "epoch": 1.9291160689070885, "grad_norm": 3.4268237200552174, "learning_rate": 5.971292827912117e-06, "loss": 0.4516, "step": 13662 }, { "epoch": 1.9292572719570744, "grad_norm": 3.6547781808334783, "learning_rate": 5.9698976086491745e-06, "loss": 0.5811, "step": 13663 }, { "epoch": 1.9293984750070603, "grad_norm": 3.298319348305385, "learning_rate": 5.968502483044074e-06, "loss": 0.4992, "step": 13664 }, { "epoch": 1.9295396780570462, "grad_norm": 3.6373737522311647, "learning_rate": 5.9671074511292335e-06, "loss": 0.5531, "step": 13665 }, { "epoch": 1.929680881107032, "grad_norm": 3.837363381971307, "learning_rate": 5.965712512937074e-06, "loss": 0.6243, "step": 13666 }, { "epoch": 1.929822084157018, "grad_norm": 3.6588827163377275, "learning_rate": 5.964317668500017e-06, "loss": 0.5374, "step": 13667 }, { "epoch": 1.9299632872070038, "grad_norm": 2.9205619085357166, "learning_rate": 5.962922917850471e-06, "loss": 0.4447, "step": 13668 }, { "epoch": 1.9301044902569897, "grad_norm": 3.725526912178215, "learning_rate": 5.9615282610208535e-06, "loss": 0.4864, "step": 13669 }, { "epoch": 1.9302456933069756, "grad_norm": 3.603580866145728, "learning_rate": 5.9601336980435756e-06, "loss": 0.5965, "step": 13670 }, { "epoch": 1.9303868963569613, "grad_norm": 3.4290863819229553, "learning_rate": 5.958739228951043e-06, "loss": 0.4589, "step": 13671 }, { "epoch": 1.9305280994069471, "grad_norm": 3.9752371050789406, "learning_rate": 5.957344853775668e-06, "loss": 0.5655, "step": 13672 }, { "epoch": 1.930669302456933, "grad_norm": 3.665325945554052, "learning_rate": 5.955950572549853e-06, "loss": 0.4454, "step": 13673 }, { "epoch": 1.930810505506919, "grad_norm": 3.5086278690845893, "learning_rate": 5.954556385306001e-06, "loss": 0.5546, "step": 13674 }, { "epoch": 1.9309517085569048, "grad_norm": 3.2635311744751765, "learning_rate": 5.953162292076512e-06, "loss": 0.4707, "step": 13675 }, { "epoch": 1.9310929116068907, "grad_norm": 4.506963373349829, "learning_rate": 5.9517682928937825e-06, "loss": 0.66, "step": 13676 }, { "epoch": 1.9312341146568766, "grad_norm": 4.523539078707294, "learning_rate": 5.950374387790207e-06, "loss": 0.6848, "step": 13677 }, { "epoch": 1.9313753177068624, "grad_norm": 3.7986039775746328, "learning_rate": 5.9489805767981845e-06, "loss": 0.5167, "step": 13678 }, { "epoch": 1.9315165207568483, "grad_norm": 3.428832680730377, "learning_rate": 5.947586859950103e-06, "loss": 0.4003, "step": 13679 }, { "epoch": 1.9316577238068342, "grad_norm": 3.972805162505941, "learning_rate": 5.946193237278352e-06, "loss": 0.6438, "step": 13680 }, { "epoch": 1.93179892685682, "grad_norm": 3.4645735752362086, "learning_rate": 5.9447997088153205e-06, "loss": 0.6047, "step": 13681 }, { "epoch": 1.931940129906806, "grad_norm": 3.226147248193192, "learning_rate": 5.9434062745933926e-06, "loss": 0.5454, "step": 13682 }, { "epoch": 1.9320813329567919, "grad_norm": 2.8333540314768264, "learning_rate": 5.942012934644949e-06, "loss": 0.4576, "step": 13683 }, { "epoch": 1.9322225360067777, "grad_norm": 3.8883253223838556, "learning_rate": 5.9406196890023786e-06, "loss": 0.5371, "step": 13684 }, { "epoch": 1.9323637390567636, "grad_norm": 4.088583830091247, "learning_rate": 5.9392265376980465e-06, "loss": 0.6727, "step": 13685 }, { "epoch": 1.9325049421067495, "grad_norm": 3.815753034701038, "learning_rate": 5.937833480764339e-06, "loss": 0.5212, "step": 13686 }, { "epoch": 1.9326461451567354, "grad_norm": 3.3486987726518684, "learning_rate": 5.936440518233627e-06, "loss": 0.5368, "step": 13687 }, { "epoch": 1.9327873482067213, "grad_norm": 3.126859204143963, "learning_rate": 5.935047650138282e-06, "loss": 0.4707, "step": 13688 }, { "epoch": 1.9329285512567072, "grad_norm": 3.327798063182012, "learning_rate": 5.933654876510673e-06, "loss": 0.4783, "step": 13689 }, { "epoch": 1.933069754306693, "grad_norm": 3.201636557914574, "learning_rate": 5.93226219738317e-06, "loss": 0.4515, "step": 13690 }, { "epoch": 1.933210957356679, "grad_norm": 3.754261697844495, "learning_rate": 5.930869612788136e-06, "loss": 0.5823, "step": 13691 }, { "epoch": 1.9333521604066648, "grad_norm": 3.56281624782598, "learning_rate": 5.929477122757938e-06, "loss": 0.5222, "step": 13692 }, { "epoch": 1.9334933634566507, "grad_norm": 3.9402739103220457, "learning_rate": 5.928084727324932e-06, "loss": 0.6567, "step": 13693 }, { "epoch": 1.9336345665066366, "grad_norm": 3.4920503233728337, "learning_rate": 5.926692426521474e-06, "loss": 0.4959, "step": 13694 }, { "epoch": 1.9337757695566224, "grad_norm": 4.38278127963404, "learning_rate": 5.92530022037993e-06, "loss": 0.7585, "step": 13695 }, { "epoch": 1.9339169726066083, "grad_norm": 3.841475732843702, "learning_rate": 5.923908108932648e-06, "loss": 0.5602, "step": 13696 }, { "epoch": 1.9340581756565942, "grad_norm": 3.599058247707388, "learning_rate": 5.9225160922119784e-06, "loss": 0.5172, "step": 13697 }, { "epoch": 1.93419937870658, "grad_norm": 3.77084930028437, "learning_rate": 5.921124170250276e-06, "loss": 0.5908, "step": 13698 }, { "epoch": 1.934340581756566, "grad_norm": 3.4877514279838095, "learning_rate": 5.919732343079887e-06, "loss": 0.5456, "step": 13699 }, { "epoch": 1.9344817848065519, "grad_norm": 4.2002285181941685, "learning_rate": 5.918340610733154e-06, "loss": 0.6438, "step": 13700 }, { "epoch": 1.9346229878565377, "grad_norm": 4.108356153185096, "learning_rate": 5.916948973242429e-06, "loss": 0.6009, "step": 13701 }, { "epoch": 1.9347641909065236, "grad_norm": 4.16774114108925, "learning_rate": 5.91555743064004e-06, "loss": 0.6562, "step": 13702 }, { "epoch": 1.9349053939565095, "grad_norm": 3.645123953693673, "learning_rate": 5.9141659829583345e-06, "loss": 0.5664, "step": 13703 }, { "epoch": 1.9350465970064954, "grad_norm": 3.8031686917343843, "learning_rate": 5.912774630229648e-06, "loss": 0.6661, "step": 13704 }, { "epoch": 1.935187800056481, "grad_norm": 3.835673833354061, "learning_rate": 5.911383372486312e-06, "loss": 0.5729, "step": 13705 }, { "epoch": 1.935329003106467, "grad_norm": 3.5932721949382684, "learning_rate": 5.90999220976066e-06, "loss": 0.5468, "step": 13706 }, { "epoch": 1.9354702061564528, "grad_norm": 4.118451596377352, "learning_rate": 5.908601142085024e-06, "loss": 0.6493, "step": 13707 }, { "epoch": 1.9356114092064387, "grad_norm": 3.5023250448517094, "learning_rate": 5.9072101694917305e-06, "loss": 0.5822, "step": 13708 }, { "epoch": 1.9357526122564246, "grad_norm": 3.110987525553427, "learning_rate": 5.905819292013107e-06, "loss": 0.5146, "step": 13709 }, { "epoch": 1.9358938153064105, "grad_norm": 3.7760083290904625, "learning_rate": 5.904428509681473e-06, "loss": 0.7114, "step": 13710 }, { "epoch": 1.9360350183563964, "grad_norm": 2.9679014256788134, "learning_rate": 5.903037822529151e-06, "loss": 0.4703, "step": 13711 }, { "epoch": 1.9361762214063822, "grad_norm": 3.5198847816694525, "learning_rate": 5.901647230588461e-06, "loss": 0.591, "step": 13712 }, { "epoch": 1.9363174244563681, "grad_norm": 3.4707906704060014, "learning_rate": 5.9002567338917204e-06, "loss": 0.4225, "step": 13713 }, { "epoch": 1.936458627506354, "grad_norm": 3.915006052111267, "learning_rate": 5.898866332471241e-06, "loss": 0.55, "step": 13714 }, { "epoch": 1.9365998305563399, "grad_norm": 3.7453986142283506, "learning_rate": 5.897476026359339e-06, "loss": 0.5473, "step": 13715 }, { "epoch": 1.9367410336063258, "grad_norm": 3.34776305764127, "learning_rate": 5.896085815588321e-06, "loss": 0.5279, "step": 13716 }, { "epoch": 1.9368822366563117, "grad_norm": 3.311507511989576, "learning_rate": 5.894695700190498e-06, "loss": 0.5798, "step": 13717 }, { "epoch": 1.9370234397062975, "grad_norm": 3.3428506833632348, "learning_rate": 5.893305680198175e-06, "loss": 0.4197, "step": 13718 }, { "epoch": 1.9371646427562834, "grad_norm": 4.760869814307227, "learning_rate": 5.89191575564365e-06, "loss": 0.609, "step": 13719 }, { "epoch": 1.9373058458062693, "grad_norm": 3.0275866197835164, "learning_rate": 5.8905259265592315e-06, "loss": 0.5147, "step": 13720 }, { "epoch": 1.9374470488562552, "grad_norm": 3.8773540660910246, "learning_rate": 5.889136192977216e-06, "loss": 0.6017, "step": 13721 }, { "epoch": 1.937588251906241, "grad_norm": 3.4488683038071146, "learning_rate": 5.887746554929899e-06, "loss": 0.5665, "step": 13722 }, { "epoch": 1.937729454956227, "grad_norm": 3.520262265248116, "learning_rate": 5.886357012449575e-06, "loss": 0.5482, "step": 13723 }, { "epoch": 1.9378706580062128, "grad_norm": 4.529110174513095, "learning_rate": 5.884967565568539e-06, "loss": 0.6764, "step": 13724 }, { "epoch": 1.9380118610561987, "grad_norm": 3.723047054401143, "learning_rate": 5.88357821431908e-06, "loss": 0.5902, "step": 13725 }, { "epoch": 1.9381530641061846, "grad_norm": 3.776142922145206, "learning_rate": 5.882188958733488e-06, "loss": 0.5692, "step": 13726 }, { "epoch": 1.9382942671561705, "grad_norm": 2.8991646244531943, "learning_rate": 5.880799798844044e-06, "loss": 0.4128, "step": 13727 }, { "epoch": 1.9384354702061564, "grad_norm": 4.11725693549263, "learning_rate": 5.87941073468303e-06, "loss": 0.5433, "step": 13728 }, { "epoch": 1.9385766732561422, "grad_norm": 3.2704463339613667, "learning_rate": 5.878021766282734e-06, "loss": 0.4857, "step": 13729 }, { "epoch": 1.9387178763061281, "grad_norm": 3.488287231351765, "learning_rate": 5.876632893675432e-06, "loss": 0.5272, "step": 13730 }, { "epoch": 1.938859079356114, "grad_norm": 3.8175470893915726, "learning_rate": 5.875244116893399e-06, "loss": 0.536, "step": 13731 }, { "epoch": 1.9390002824061, "grad_norm": 3.865514025634565, "learning_rate": 5.873855435968913e-06, "loss": 0.6378, "step": 13732 }, { "epoch": 1.9391414854560858, "grad_norm": 3.2647132980381084, "learning_rate": 5.872466850934247e-06, "loss": 0.5069, "step": 13733 }, { "epoch": 1.9392826885060717, "grad_norm": 4.357754156770756, "learning_rate": 5.871078361821667e-06, "loss": 0.6475, "step": 13734 }, { "epoch": 1.9394238915560575, "grad_norm": 3.8057649193976437, "learning_rate": 5.869689968663444e-06, "loss": 0.5281, "step": 13735 }, { "epoch": 1.9395650946060434, "grad_norm": 3.153409297977442, "learning_rate": 5.8683016714918416e-06, "loss": 0.4916, "step": 13736 }, { "epoch": 1.9397062976560293, "grad_norm": 3.392707086978055, "learning_rate": 5.866913470339122e-06, "loss": 0.4773, "step": 13737 }, { "epoch": 1.9398475007060152, "grad_norm": 4.129633386137401, "learning_rate": 5.86552536523755e-06, "loss": 0.6654, "step": 13738 }, { "epoch": 1.939988703756001, "grad_norm": 3.117092952212741, "learning_rate": 5.864137356219381e-06, "loss": 0.4832, "step": 13739 }, { "epoch": 1.940129906805987, "grad_norm": 3.851070025677055, "learning_rate": 5.8627494433168756e-06, "loss": 0.6835, "step": 13740 }, { "epoch": 1.9402711098559728, "grad_norm": 4.383728421216885, "learning_rate": 5.861361626562285e-06, "loss": 0.7755, "step": 13741 }, { "epoch": 1.9404123129059587, "grad_norm": 3.4332066454145416, "learning_rate": 5.859973905987866e-06, "loss": 0.4209, "step": 13742 }, { "epoch": 1.9405535159559446, "grad_norm": 3.0376027226100155, "learning_rate": 5.858586281625864e-06, "loss": 0.4269, "step": 13743 }, { "epoch": 1.9406947190059305, "grad_norm": 3.683301580150677, "learning_rate": 5.8571987535085286e-06, "loss": 0.5402, "step": 13744 }, { "epoch": 1.9408359220559164, "grad_norm": 3.011082985749689, "learning_rate": 5.855811321668104e-06, "loss": 0.4849, "step": 13745 }, { "epoch": 1.9409771251059023, "grad_norm": 3.959578105879021, "learning_rate": 5.8544239861368346e-06, "loss": 0.72, "step": 13746 }, { "epoch": 1.9411183281558881, "grad_norm": 3.193291572622729, "learning_rate": 5.853036746946961e-06, "loss": 0.4906, "step": 13747 }, { "epoch": 1.941259531205874, "grad_norm": 4.355281954004002, "learning_rate": 5.851649604130723e-06, "loss": 0.5843, "step": 13748 }, { "epoch": 1.94140073425586, "grad_norm": 4.615059538508084, "learning_rate": 5.850262557720355e-06, "loss": 0.6055, "step": 13749 }, { "epoch": 1.9415419373058458, "grad_norm": 3.8032804362228143, "learning_rate": 5.848875607748095e-06, "loss": 0.5844, "step": 13750 }, { "epoch": 1.9416831403558317, "grad_norm": 4.671596297743548, "learning_rate": 5.847488754246173e-06, "loss": 0.7691, "step": 13751 }, { "epoch": 1.9418243434058176, "grad_norm": 3.178304624653826, "learning_rate": 5.846101997246822e-06, "loss": 0.5129, "step": 13752 }, { "epoch": 1.9419655464558034, "grad_norm": 3.1632836317268738, "learning_rate": 5.844715336782263e-06, "loss": 0.492, "step": 13753 }, { "epoch": 1.9421067495057893, "grad_norm": 3.2310499502562737, "learning_rate": 5.8433287728847244e-06, "loss": 0.4564, "step": 13754 }, { "epoch": 1.9422479525557752, "grad_norm": 2.466316171533824, "learning_rate": 5.841942305586432e-06, "loss": 0.3552, "step": 13755 }, { "epoch": 1.942389155605761, "grad_norm": 2.8165633566021757, "learning_rate": 5.840555934919604e-06, "loss": 0.3617, "step": 13756 }, { "epoch": 1.942530358655747, "grad_norm": 3.4886370767727666, "learning_rate": 5.839169660916457e-06, "loss": 0.5568, "step": 13757 }, { "epoch": 1.9426715617057329, "grad_norm": 3.317207974281502, "learning_rate": 5.837783483609214e-06, "loss": 0.5092, "step": 13758 }, { "epoch": 1.9428127647557187, "grad_norm": 4.399203870737688, "learning_rate": 5.836397403030084e-06, "loss": 0.6128, "step": 13759 }, { "epoch": 1.9429539678057046, "grad_norm": 3.2075078474275722, "learning_rate": 5.835011419211285e-06, "loss": 0.5159, "step": 13760 }, { "epoch": 1.9430951708556905, "grad_norm": 3.022133997885696, "learning_rate": 5.833625532185013e-06, "loss": 0.4589, "step": 13761 }, { "epoch": 1.9432363739056764, "grad_norm": 3.1820024107020703, "learning_rate": 5.83223974198349e-06, "loss": 0.4969, "step": 13762 }, { "epoch": 1.9433775769556623, "grad_norm": 4.08775149623513, "learning_rate": 5.830854048638913e-06, "loss": 0.5815, "step": 13763 }, { "epoch": 1.9435187800056481, "grad_norm": 3.4101267454326067, "learning_rate": 5.829468452183489e-06, "loss": 0.4807, "step": 13764 }, { "epoch": 1.943659983055634, "grad_norm": 3.153640654124991, "learning_rate": 5.828082952649416e-06, "loss": 0.49, "step": 13765 }, { "epoch": 1.94380118610562, "grad_norm": 3.747866943841111, "learning_rate": 5.8266975500688945e-06, "loss": 0.5642, "step": 13766 }, { "epoch": 1.9439423891556058, "grad_norm": 3.771895744198372, "learning_rate": 5.825312244474115e-06, "loss": 0.5716, "step": 13767 }, { "epoch": 1.9440835922055917, "grad_norm": 3.8694386174022037, "learning_rate": 5.823927035897285e-06, "loss": 0.5732, "step": 13768 }, { "epoch": 1.9442247952555776, "grad_norm": 3.40566607824833, "learning_rate": 5.822541924370583e-06, "loss": 0.5514, "step": 13769 }, { "epoch": 1.9443659983055634, "grad_norm": 4.102690030683655, "learning_rate": 5.821156909926202e-06, "loss": 0.7516, "step": 13770 }, { "epoch": 1.9445072013555493, "grad_norm": 3.1714121307007863, "learning_rate": 5.8197719925963305e-06, "loss": 0.4997, "step": 13771 }, { "epoch": 1.9446484044055352, "grad_norm": 2.9174763733733333, "learning_rate": 5.818387172413151e-06, "loss": 0.4195, "step": 13772 }, { "epoch": 1.944789607455521, "grad_norm": 3.99556137492448, "learning_rate": 5.817002449408844e-06, "loss": 0.6678, "step": 13773 }, { "epoch": 1.944930810505507, "grad_norm": 2.7582950023349646, "learning_rate": 5.815617823615599e-06, "loss": 0.3958, "step": 13774 }, { "epoch": 1.9450720135554929, "grad_norm": 4.246422025136108, "learning_rate": 5.814233295065588e-06, "loss": 0.6632, "step": 13775 }, { "epoch": 1.9452132166054787, "grad_norm": 3.178236633369428, "learning_rate": 5.812848863790988e-06, "loss": 0.5369, "step": 13776 }, { "epoch": 1.9453544196554646, "grad_norm": 3.483258584170309, "learning_rate": 5.811464529823975e-06, "loss": 0.494, "step": 13777 }, { "epoch": 1.9454956227054505, "grad_norm": 3.5350551065851006, "learning_rate": 5.810080293196709e-06, "loss": 0.6006, "step": 13778 }, { "epoch": 1.9456368257554364, "grad_norm": 3.5691012710099614, "learning_rate": 5.808696153941374e-06, "loss": 0.5441, "step": 13779 }, { "epoch": 1.9457780288054223, "grad_norm": 3.374503810040913, "learning_rate": 5.807312112090129e-06, "loss": 0.496, "step": 13780 }, { "epoch": 1.9459192318554082, "grad_norm": 3.6227972384810387, "learning_rate": 5.805928167675139e-06, "loss": 0.5632, "step": 13781 }, { "epoch": 1.946060434905394, "grad_norm": 3.652366831131586, "learning_rate": 5.804544320728567e-06, "loss": 0.5508, "step": 13782 }, { "epoch": 1.94620163795538, "grad_norm": 3.5063688863046467, "learning_rate": 5.8031605712825736e-06, "loss": 0.4985, "step": 13783 }, { "epoch": 1.9463428410053658, "grad_norm": 3.7240665469447016, "learning_rate": 5.801776919369317e-06, "loss": 0.4996, "step": 13784 }, { "epoch": 1.9464840440553517, "grad_norm": 3.8861110331045547, "learning_rate": 5.800393365020951e-06, "loss": 0.5642, "step": 13785 }, { "epoch": 1.9466252471053376, "grad_norm": 3.5806370757476405, "learning_rate": 5.799009908269629e-06, "loss": 0.578, "step": 13786 }, { "epoch": 1.9467664501553235, "grad_norm": 3.6624356225320756, "learning_rate": 5.797626549147504e-06, "loss": 0.5771, "step": 13787 }, { "epoch": 1.9469076532053093, "grad_norm": 3.3105885611206203, "learning_rate": 5.7962432876867215e-06, "loss": 0.4901, "step": 13788 }, { "epoch": 1.9470488562552952, "grad_norm": 3.206785698449427, "learning_rate": 5.79486012391943e-06, "loss": 0.4367, "step": 13789 }, { "epoch": 1.947190059305281, "grad_norm": 3.4711654354660717, "learning_rate": 5.79347705787777e-06, "loss": 0.5143, "step": 13790 }, { "epoch": 1.947331262355267, "grad_norm": 3.4816488543827124, "learning_rate": 5.7920940895938896e-06, "loss": 0.5275, "step": 13791 }, { "epoch": 1.9474724654052529, "grad_norm": 3.916240785387009, "learning_rate": 5.790711219099925e-06, "loss": 0.4985, "step": 13792 }, { "epoch": 1.9476136684552388, "grad_norm": 3.52374684398223, "learning_rate": 5.789328446428014e-06, "loss": 0.6075, "step": 13793 }, { "epoch": 1.9477548715052246, "grad_norm": 3.709756373628427, "learning_rate": 5.787945771610296e-06, "loss": 0.5579, "step": 13794 }, { "epoch": 1.9478960745552105, "grad_norm": 3.819090608473388, "learning_rate": 5.786563194678892e-06, "loss": 0.5557, "step": 13795 }, { "epoch": 1.9480372776051964, "grad_norm": 4.114388115171828, "learning_rate": 5.785180715665941e-06, "loss": 0.6161, "step": 13796 }, { "epoch": 1.9481784806551823, "grad_norm": 4.558704542463181, "learning_rate": 5.783798334603572e-06, "loss": 0.6786, "step": 13797 }, { "epoch": 1.9483196837051682, "grad_norm": 3.1923698140343726, "learning_rate": 5.782416051523909e-06, "loss": 0.4566, "step": 13798 }, { "epoch": 1.948460886755154, "grad_norm": 3.5577115019385945, "learning_rate": 5.781033866459076e-06, "loss": 0.5477, "step": 13799 }, { "epoch": 1.94860208980514, "grad_norm": 5.025136758062498, "learning_rate": 5.779651779441192e-06, "loss": 0.6211, "step": 13800 }, { "epoch": 1.9487432928551258, "grad_norm": 3.935652991753846, "learning_rate": 5.778269790502379e-06, "loss": 0.6761, "step": 13801 }, { "epoch": 1.9488844959051117, "grad_norm": 4.10907752744415, "learning_rate": 5.776887899674752e-06, "loss": 0.5951, "step": 13802 }, { "epoch": 1.9490256989550976, "grad_norm": 3.3688068119751344, "learning_rate": 5.775506106990427e-06, "loss": 0.5124, "step": 13803 }, { "epoch": 1.9491669020050835, "grad_norm": 4.0128604043697855, "learning_rate": 5.774124412481517e-06, "loss": 0.563, "step": 13804 }, { "epoch": 1.9493081050550694, "grad_norm": 3.082276612774261, "learning_rate": 5.77274281618013e-06, "loss": 0.4383, "step": 13805 }, { "epoch": 1.9494493081050552, "grad_norm": 3.1943219807835965, "learning_rate": 5.771361318118374e-06, "loss": 0.4556, "step": 13806 }, { "epoch": 1.949590511155041, "grad_norm": 3.3401408724581962, "learning_rate": 5.76997991832835e-06, "loss": 0.4892, "step": 13807 }, { "epoch": 1.9497317142050268, "grad_norm": 3.6066079437612433, "learning_rate": 5.76859861684217e-06, "loss": 0.4805, "step": 13808 }, { "epoch": 1.9498729172550127, "grad_norm": 3.484788107325088, "learning_rate": 5.767217413691933e-06, "loss": 0.5242, "step": 13809 }, { "epoch": 1.9500141203049985, "grad_norm": 3.5791471640248163, "learning_rate": 5.765836308909731e-06, "loss": 0.5442, "step": 13810 }, { "epoch": 1.9501553233549844, "grad_norm": 3.7059679000377486, "learning_rate": 5.764455302527671e-06, "loss": 0.5531, "step": 13811 }, { "epoch": 1.9502965264049703, "grad_norm": 3.248891575953556, "learning_rate": 5.763074394577835e-06, "loss": 0.5501, "step": 13812 }, { "epoch": 1.9504377294549562, "grad_norm": 3.7001767864955935, "learning_rate": 5.761693585092321e-06, "loss": 0.5228, "step": 13813 }, { "epoch": 1.950578932504942, "grad_norm": 2.877351702339207, "learning_rate": 5.760312874103219e-06, "loss": 0.3711, "step": 13814 }, { "epoch": 1.950720135554928, "grad_norm": 3.964246159195276, "learning_rate": 5.7589322616426135e-06, "loss": 0.6033, "step": 13815 }, { "epoch": 1.9508613386049138, "grad_norm": 3.930087773436957, "learning_rate": 5.757551747742592e-06, "loss": 0.5532, "step": 13816 }, { "epoch": 1.9510025416548997, "grad_norm": 3.2780870321423166, "learning_rate": 5.756171332435237e-06, "loss": 0.4787, "step": 13817 }, { "epoch": 1.9511437447048856, "grad_norm": 3.7816514407170203, "learning_rate": 5.754791015752624e-06, "loss": 0.4823, "step": 13818 }, { "epoch": 1.9512849477548715, "grad_norm": 3.8796314979021242, "learning_rate": 5.753410797726837e-06, "loss": 0.4861, "step": 13819 }, { "epoch": 1.9514261508048574, "grad_norm": 3.20094998785402, "learning_rate": 5.752030678389948e-06, "loss": 0.4464, "step": 13820 }, { "epoch": 1.9515673538548433, "grad_norm": 3.702219167992484, "learning_rate": 5.750650657774032e-06, "loss": 0.5074, "step": 13821 }, { "epoch": 1.9517085569048291, "grad_norm": 3.176676291575915, "learning_rate": 5.749270735911159e-06, "loss": 0.4893, "step": 13822 }, { "epoch": 1.951849759954815, "grad_norm": 3.8205795923620065, "learning_rate": 5.7478909128334e-06, "loss": 0.5648, "step": 13823 }, { "epoch": 1.951990963004801, "grad_norm": 3.5363117043692824, "learning_rate": 5.746511188572814e-06, "loss": 0.4924, "step": 13824 }, { "epoch": 1.9521321660547868, "grad_norm": 3.4600751105115455, "learning_rate": 5.745131563161475e-06, "loss": 0.4855, "step": 13825 }, { "epoch": 1.9522733691047727, "grad_norm": 3.9018142772415043, "learning_rate": 5.743752036631443e-06, "loss": 0.6898, "step": 13826 }, { "epoch": 1.9524145721547586, "grad_norm": 3.714293682673438, "learning_rate": 5.742372609014773e-06, "loss": 0.5784, "step": 13827 }, { "epoch": 1.9525557752047444, "grad_norm": 4.485189287932414, "learning_rate": 5.740993280343529e-06, "loss": 0.669, "step": 13828 }, { "epoch": 1.9526969782547303, "grad_norm": 3.155621531524472, "learning_rate": 5.739614050649756e-06, "loss": 0.5118, "step": 13829 }, { "epoch": 1.9528381813047162, "grad_norm": 4.1372806227464265, "learning_rate": 5.738234919965516e-06, "loss": 0.5238, "step": 13830 }, { "epoch": 1.952979384354702, "grad_norm": 3.4569778488545895, "learning_rate": 5.736855888322854e-06, "loss": 0.5069, "step": 13831 }, { "epoch": 1.953120587404688, "grad_norm": 3.029349127087165, "learning_rate": 5.735476955753821e-06, "loss": 0.4925, "step": 13832 }, { "epoch": 1.9532617904546739, "grad_norm": 3.1786968758401417, "learning_rate": 5.734098122290463e-06, "loss": 0.5633, "step": 13833 }, { "epoch": 1.9534029935046597, "grad_norm": 4.170378422052908, "learning_rate": 5.732719387964822e-06, "loss": 0.5949, "step": 13834 }, { "epoch": 1.9535441965546456, "grad_norm": 4.898545234731712, "learning_rate": 5.731340752808939e-06, "loss": 0.6024, "step": 13835 }, { "epoch": 1.9536853996046315, "grad_norm": 3.0028863105505628, "learning_rate": 5.729962216854853e-06, "loss": 0.4309, "step": 13836 }, { "epoch": 1.9538266026546174, "grad_norm": 4.375925170640629, "learning_rate": 5.728583780134601e-06, "loss": 0.5303, "step": 13837 }, { "epoch": 1.9539678057046033, "grad_norm": 2.8615918615571716, "learning_rate": 5.727205442680218e-06, "loss": 0.4777, "step": 13838 }, { "epoch": 1.9541090087545891, "grad_norm": 3.5659509572053074, "learning_rate": 5.725827204523735e-06, "loss": 0.5963, "step": 13839 }, { "epoch": 1.954250211804575, "grad_norm": 3.2799098644343894, "learning_rate": 5.724449065697182e-06, "loss": 0.4677, "step": 13840 }, { "epoch": 1.9543914148545607, "grad_norm": 3.5694294594028744, "learning_rate": 5.723071026232582e-06, "loss": 0.5882, "step": 13841 }, { "epoch": 1.9545326179045466, "grad_norm": 4.425207179441219, "learning_rate": 5.721693086161968e-06, "loss": 0.5561, "step": 13842 }, { "epoch": 1.9546738209545325, "grad_norm": 3.583081729841661, "learning_rate": 5.7203152455173595e-06, "loss": 0.555, "step": 13843 }, { "epoch": 1.9548150240045183, "grad_norm": 3.4216192149915376, "learning_rate": 5.7189375043307775e-06, "loss": 0.4768, "step": 13844 }, { "epoch": 1.9549562270545042, "grad_norm": 4.079995758515541, "learning_rate": 5.717559862634241e-06, "loss": 0.6431, "step": 13845 }, { "epoch": 1.95509743010449, "grad_norm": 3.119579100780536, "learning_rate": 5.71618232045976e-06, "loss": 0.5184, "step": 13846 }, { "epoch": 1.955238633154476, "grad_norm": 3.495508010562949, "learning_rate": 5.714804877839348e-06, "loss": 0.5472, "step": 13847 }, { "epoch": 1.9553798362044619, "grad_norm": 3.966578011661628, "learning_rate": 5.713427534805025e-06, "loss": 0.6499, "step": 13848 }, { "epoch": 1.9555210392544478, "grad_norm": 3.9938511691205254, "learning_rate": 5.712050291388795e-06, "loss": 0.575, "step": 13849 }, { "epoch": 1.9556622423044336, "grad_norm": 3.9389384865325394, "learning_rate": 5.710673147622664e-06, "loss": 0.6028, "step": 13850 }, { "epoch": 1.9558034453544195, "grad_norm": 3.112825666880522, "learning_rate": 5.709296103538636e-06, "loss": 0.4854, "step": 13851 }, { "epoch": 1.9559446484044054, "grad_norm": 3.551370600151852, "learning_rate": 5.707919159168714e-06, "loss": 0.5187, "step": 13852 }, { "epoch": 1.9560858514543913, "grad_norm": 3.1280639918963087, "learning_rate": 5.7065423145448974e-06, "loss": 0.4599, "step": 13853 }, { "epoch": 1.9562270545043772, "grad_norm": 2.9386347629922898, "learning_rate": 5.7051655696991825e-06, "loss": 0.4716, "step": 13854 }, { "epoch": 1.956368257554363, "grad_norm": 4.787573359976234, "learning_rate": 5.703788924663566e-06, "loss": 0.7514, "step": 13855 }, { "epoch": 1.956509460604349, "grad_norm": 3.6040572966167015, "learning_rate": 5.702412379470039e-06, "loss": 0.5617, "step": 13856 }, { "epoch": 1.9566506636543348, "grad_norm": 3.961236249244886, "learning_rate": 5.701035934150592e-06, "loss": 0.6041, "step": 13857 }, { "epoch": 1.9567918667043207, "grad_norm": 4.296631759491418, "learning_rate": 5.6996595887372145e-06, "loss": 0.7383, "step": 13858 }, { "epoch": 1.9569330697543066, "grad_norm": 3.292921823211725, "learning_rate": 5.698283343261885e-06, "loss": 0.4993, "step": 13859 }, { "epoch": 1.9570742728042925, "grad_norm": 4.45270915797289, "learning_rate": 5.696907197756598e-06, "loss": 0.7326, "step": 13860 }, { "epoch": 1.9572154758542784, "grad_norm": 4.577343328969044, "learning_rate": 5.695531152253328e-06, "loss": 0.5969, "step": 13861 }, { "epoch": 1.9573566789042642, "grad_norm": 3.682938802122358, "learning_rate": 5.694155206784061e-06, "loss": 0.5649, "step": 13862 }, { "epoch": 1.9574978819542501, "grad_norm": 3.8205525043946285, "learning_rate": 5.692779361380762e-06, "loss": 0.4869, "step": 13863 }, { "epoch": 1.957639085004236, "grad_norm": 3.8363379839323826, "learning_rate": 5.691403616075409e-06, "loss": 0.5763, "step": 13864 }, { "epoch": 1.9577802880542219, "grad_norm": 2.8956428080460306, "learning_rate": 5.690027970899976e-06, "loss": 0.4346, "step": 13865 }, { "epoch": 1.9579214911042078, "grad_norm": 3.619008266533831, "learning_rate": 5.688652425886434e-06, "loss": 0.5115, "step": 13866 }, { "epoch": 1.9580626941541937, "grad_norm": 3.8200475597530823, "learning_rate": 5.687276981066746e-06, "loss": 0.5654, "step": 13867 }, { "epoch": 1.9582038972041795, "grad_norm": 3.179062620492597, "learning_rate": 5.6859016364728795e-06, "loss": 0.6054, "step": 13868 }, { "epoch": 1.9583451002541654, "grad_norm": 3.40841421157824, "learning_rate": 5.684526392136795e-06, "loss": 0.5713, "step": 13869 }, { "epoch": 1.9584863033041513, "grad_norm": 3.525822698580449, "learning_rate": 5.683151248090455e-06, "loss": 0.484, "step": 13870 }, { "epoch": 1.9586275063541372, "grad_norm": 4.3823293831094166, "learning_rate": 5.681776204365814e-06, "loss": 0.7148, "step": 13871 }, { "epoch": 1.958768709404123, "grad_norm": 4.239649864259085, "learning_rate": 5.68040126099483e-06, "loss": 0.8033, "step": 13872 }, { "epoch": 1.958909912454109, "grad_norm": 3.7789079900543023, "learning_rate": 5.679026418009455e-06, "loss": 0.6037, "step": 13873 }, { "epoch": 1.9590511155040948, "grad_norm": 3.4733570263843503, "learning_rate": 5.67765167544164e-06, "loss": 0.5751, "step": 13874 }, { "epoch": 1.9591923185540807, "grad_norm": 3.2631223086709347, "learning_rate": 5.676277033323333e-06, "loss": 0.462, "step": 13875 }, { "epoch": 1.9593335216040666, "grad_norm": 4.229301642082372, "learning_rate": 5.674902491686477e-06, "loss": 0.5815, "step": 13876 }, { "epoch": 1.9594747246540525, "grad_norm": 4.868895194444744, "learning_rate": 5.673528050563023e-06, "loss": 0.5287, "step": 13877 }, { "epoch": 1.9596159277040384, "grad_norm": 4.160999733429085, "learning_rate": 5.672153709984909e-06, "loss": 0.5596, "step": 13878 }, { "epoch": 1.9597571307540242, "grad_norm": 3.0742995196513743, "learning_rate": 5.670779469984077e-06, "loss": 0.4386, "step": 13879 }, { "epoch": 1.9598983338040101, "grad_norm": 3.754947539688105, "learning_rate": 5.669405330592457e-06, "loss": 0.5171, "step": 13880 }, { "epoch": 1.960039536853996, "grad_norm": 3.681520220121038, "learning_rate": 5.668031291841982e-06, "loss": 0.5486, "step": 13881 }, { "epoch": 1.960180739903982, "grad_norm": 4.254995752281185, "learning_rate": 5.666657353764594e-06, "loss": 0.7035, "step": 13882 }, { "epoch": 1.9603219429539678, "grad_norm": 3.3751069009941186, "learning_rate": 5.6652835163922175e-06, "loss": 0.5481, "step": 13883 }, { "epoch": 1.9604631460039537, "grad_norm": 3.0393066593083886, "learning_rate": 5.66390977975678e-06, "loss": 0.4687, "step": 13884 }, { "epoch": 1.9606043490539395, "grad_norm": 3.4159779750162085, "learning_rate": 5.662536143890207e-06, "loss": 0.4796, "step": 13885 }, { "epoch": 1.9607455521039254, "grad_norm": 4.033926258600158, "learning_rate": 5.66116260882442e-06, "loss": 0.5418, "step": 13886 }, { "epoch": 1.9608867551539113, "grad_norm": 3.2682517018447785, "learning_rate": 5.65978917459134e-06, "loss": 0.503, "step": 13887 }, { "epoch": 1.9610279582038972, "grad_norm": 4.835351631156877, "learning_rate": 5.658415841222887e-06, "loss": 0.7347, "step": 13888 }, { "epoch": 1.961169161253883, "grad_norm": 3.618412470826112, "learning_rate": 5.657042608750973e-06, "loss": 0.5863, "step": 13889 }, { "epoch": 1.961310364303869, "grad_norm": 3.7088054160165806, "learning_rate": 5.655669477207513e-06, "loss": 0.4978, "step": 13890 }, { "epoch": 1.9614515673538548, "grad_norm": 3.1364006258765063, "learning_rate": 5.6542964466244185e-06, "loss": 0.4523, "step": 13891 }, { "epoch": 1.9615927704038407, "grad_norm": 3.764609404713001, "learning_rate": 5.6529235170335995e-06, "loss": 0.639, "step": 13892 }, { "epoch": 1.9617339734538266, "grad_norm": 3.1898886127301855, "learning_rate": 5.651550688466956e-06, "loss": 0.5032, "step": 13893 }, { "epoch": 1.9618751765038125, "grad_norm": 4.015065969944727, "learning_rate": 5.650177960956399e-06, "loss": 0.5573, "step": 13894 }, { "epoch": 1.9620163795537984, "grad_norm": 2.809500306937714, "learning_rate": 5.648805334533829e-06, "loss": 0.3814, "step": 13895 }, { "epoch": 1.9621575826037843, "grad_norm": 2.877753316208816, "learning_rate": 5.647432809231147e-06, "loss": 0.4398, "step": 13896 }, { "epoch": 1.9622987856537701, "grad_norm": 3.7623274377365536, "learning_rate": 5.6460603850802435e-06, "loss": 0.5072, "step": 13897 }, { "epoch": 1.962439988703756, "grad_norm": 3.3750408930654276, "learning_rate": 5.644688062113012e-06, "loss": 0.4831, "step": 13898 }, { "epoch": 1.962581191753742, "grad_norm": 3.9120817849490495, "learning_rate": 5.6433158403613516e-06, "loss": 0.6725, "step": 13899 }, { "epoch": 1.9627223948037278, "grad_norm": 3.421279744670933, "learning_rate": 5.6419437198571525e-06, "loss": 0.5124, "step": 13900 }, { "epoch": 1.9628635978537137, "grad_norm": 3.1077835342019053, "learning_rate": 5.640571700632298e-06, "loss": 0.389, "step": 13901 }, { "epoch": 1.9630048009036996, "grad_norm": 3.5840775895832673, "learning_rate": 5.639199782718674e-06, "loss": 0.5772, "step": 13902 }, { "epoch": 1.9631460039536854, "grad_norm": 4.651180117211612, "learning_rate": 5.6378279661481635e-06, "loss": 0.7408, "step": 13903 }, { "epoch": 1.9632872070036713, "grad_norm": 4.408671616319314, "learning_rate": 5.6364562509526474e-06, "loss": 0.7948, "step": 13904 }, { "epoch": 1.9634284100536572, "grad_norm": 4.1238726425020324, "learning_rate": 5.635084637164005e-06, "loss": 0.4954, "step": 13905 }, { "epoch": 1.963569613103643, "grad_norm": 2.8992526370140292, "learning_rate": 5.6337131248141085e-06, "loss": 0.4686, "step": 13906 }, { "epoch": 1.963710816153629, "grad_norm": 3.407495166592999, "learning_rate": 5.632341713934835e-06, "loss": 0.4823, "step": 13907 }, { "epoch": 1.9638520192036149, "grad_norm": 3.595777180267852, "learning_rate": 5.630970404558053e-06, "loss": 0.503, "step": 13908 }, { "epoch": 1.9639932222536007, "grad_norm": 4.500689128465977, "learning_rate": 5.629599196715631e-06, "loss": 0.7194, "step": 13909 }, { "epoch": 1.9641344253035866, "grad_norm": 3.8618813448864673, "learning_rate": 5.628228090439434e-06, "loss": 0.5356, "step": 13910 }, { "epoch": 1.9642756283535725, "grad_norm": 2.9372484931710106, "learning_rate": 5.626857085761331e-06, "loss": 0.4678, "step": 13911 }, { "epoch": 1.9644168314035584, "grad_norm": 3.9847835672732965, "learning_rate": 5.625486182713181e-06, "loss": 0.6024, "step": 13912 }, { "epoch": 1.9645580344535443, "grad_norm": 3.7275966429078693, "learning_rate": 5.624115381326847e-06, "loss": 0.5785, "step": 13913 }, { "epoch": 1.9646992375035301, "grad_norm": 4.195000586687316, "learning_rate": 5.622744681634178e-06, "loss": 0.6171, "step": 13914 }, { "epoch": 1.964840440553516, "grad_norm": 3.992996347644937, "learning_rate": 5.6213740836670285e-06, "loss": 0.6246, "step": 13915 }, { "epoch": 1.964981643603502, "grad_norm": 3.8691525332238594, "learning_rate": 5.620003587457258e-06, "loss": 0.604, "step": 13916 }, { "epoch": 1.9651228466534878, "grad_norm": 3.208862281212403, "learning_rate": 5.618633193036712e-06, "loss": 0.4634, "step": 13917 }, { "epoch": 1.9652640497034737, "grad_norm": 3.3222301186778687, "learning_rate": 5.617262900437239e-06, "loss": 0.4698, "step": 13918 }, { "epoch": 1.9654052527534596, "grad_norm": 3.166429665404633, "learning_rate": 5.615892709690682e-06, "loss": 0.4683, "step": 13919 }, { "epoch": 1.9655464558034454, "grad_norm": 2.863714359211657, "learning_rate": 5.6145226208288875e-06, "loss": 0.4232, "step": 13920 }, { "epoch": 1.9656876588534313, "grad_norm": 4.100573021444812, "learning_rate": 5.613152633883692e-06, "loss": 0.6357, "step": 13921 }, { "epoch": 1.9658288619034172, "grad_norm": 3.5722495907989646, "learning_rate": 5.611782748886935e-06, "loss": 0.502, "step": 13922 }, { "epoch": 1.965970064953403, "grad_norm": 3.3816632883810063, "learning_rate": 5.610412965870451e-06, "loss": 0.4894, "step": 13923 }, { "epoch": 1.966111268003389, "grad_norm": 2.8731571946390018, "learning_rate": 5.609043284866076e-06, "loss": 0.3761, "step": 13924 }, { "epoch": 1.9662524710533749, "grad_norm": 4.693626033876007, "learning_rate": 5.607673705905636e-06, "loss": 0.7438, "step": 13925 }, { "epoch": 1.9663936741033607, "grad_norm": 3.3760792116896168, "learning_rate": 5.606304229020965e-06, "loss": 0.5802, "step": 13926 }, { "epoch": 1.9665348771533466, "grad_norm": 2.86667764898897, "learning_rate": 5.604934854243879e-06, "loss": 0.4117, "step": 13927 }, { "epoch": 1.9666760802033325, "grad_norm": 4.072060239343097, "learning_rate": 5.603565581606216e-06, "loss": 0.5244, "step": 13928 }, { "epoch": 1.9668172832533184, "grad_norm": 3.449454347487301, "learning_rate": 5.602196411139793e-06, "loss": 0.4985, "step": 13929 }, { "epoch": 1.9669584863033043, "grad_norm": 3.415834885954082, "learning_rate": 5.600827342876422e-06, "loss": 0.5998, "step": 13930 }, { "epoch": 1.9670996893532902, "grad_norm": 3.528506396413141, "learning_rate": 5.599458376847924e-06, "loss": 0.4795, "step": 13931 }, { "epoch": 1.967240892403276, "grad_norm": 4.002089303746843, "learning_rate": 5.598089513086108e-06, "loss": 0.6146, "step": 13932 }, { "epoch": 1.967382095453262, "grad_norm": 3.0284453771532127, "learning_rate": 5.596720751622795e-06, "loss": 0.5652, "step": 13933 }, { "epoch": 1.9675232985032478, "grad_norm": 3.3330797818966245, "learning_rate": 5.595352092489791e-06, "loss": 0.473, "step": 13934 }, { "epoch": 1.9676645015532337, "grad_norm": 4.421924599067661, "learning_rate": 5.593983535718901e-06, "loss": 0.6596, "step": 13935 }, { "epoch": 1.9678057046032196, "grad_norm": 2.9881172561662677, "learning_rate": 5.592615081341932e-06, "loss": 0.4221, "step": 13936 }, { "epoch": 1.9679469076532055, "grad_norm": 3.2234892751422155, "learning_rate": 5.591246729390685e-06, "loss": 0.5177, "step": 13937 }, { "epoch": 1.9680881107031913, "grad_norm": 3.3884810705597057, "learning_rate": 5.589878479896959e-06, "loss": 0.5358, "step": 13938 }, { "epoch": 1.9682293137531772, "grad_norm": 3.0319844307912023, "learning_rate": 5.588510332892553e-06, "loss": 0.4164, "step": 13939 }, { "epoch": 1.968370516803163, "grad_norm": 4.580975329435757, "learning_rate": 5.587142288409262e-06, "loss": 0.739, "step": 13940 }, { "epoch": 1.968511719853149, "grad_norm": 3.5589939560250414, "learning_rate": 5.585774346478878e-06, "loss": 0.4759, "step": 13941 }, { "epoch": 1.9686529229031349, "grad_norm": 3.354972960077759, "learning_rate": 5.584406507133191e-06, "loss": 0.4603, "step": 13942 }, { "epoch": 1.9687941259531205, "grad_norm": 4.237736647935482, "learning_rate": 5.583038770403991e-06, "loss": 0.6349, "step": 13943 }, { "epoch": 1.9689353290031064, "grad_norm": 3.055197444779421, "learning_rate": 5.581671136323059e-06, "loss": 0.4982, "step": 13944 }, { "epoch": 1.9690765320530923, "grad_norm": 4.055883558537785, "learning_rate": 5.580303604922185e-06, "loss": 0.603, "step": 13945 }, { "epoch": 1.9692177351030782, "grad_norm": 3.8350020412000654, "learning_rate": 5.578936176233149e-06, "loss": 0.5332, "step": 13946 }, { "epoch": 1.969358938153064, "grad_norm": 4.16051544313356, "learning_rate": 5.577568850287725e-06, "loss": 0.5598, "step": 13947 }, { "epoch": 1.96950014120305, "grad_norm": 2.9042158298248864, "learning_rate": 5.576201627117691e-06, "loss": 0.416, "step": 13948 }, { "epoch": 1.9696413442530358, "grad_norm": 4.018647805313028, "learning_rate": 5.574834506754821e-06, "loss": 0.7465, "step": 13949 }, { "epoch": 1.9697825473030217, "grad_norm": 4.392224314694918, "learning_rate": 5.573467489230879e-06, "loss": 0.6769, "step": 13950 }, { "epoch": 1.9699237503530076, "grad_norm": 3.3874195985228392, "learning_rate": 5.572100574577648e-06, "loss": 0.5016, "step": 13951 }, { "epoch": 1.9700649534029935, "grad_norm": 3.3222848503476783, "learning_rate": 5.5707337628268864e-06, "loss": 0.431, "step": 13952 }, { "epoch": 1.9702061564529794, "grad_norm": 3.174616566109569, "learning_rate": 5.56936705401036e-06, "loss": 0.4875, "step": 13953 }, { "epoch": 1.9703473595029652, "grad_norm": 3.0603360462735836, "learning_rate": 5.568000448159828e-06, "loss": 0.4486, "step": 13954 }, { "epoch": 1.9704885625529511, "grad_norm": 3.299887407522103, "learning_rate": 5.566633945307053e-06, "loss": 0.5137, "step": 13955 }, { "epoch": 1.970629765602937, "grad_norm": 3.450401027187521, "learning_rate": 5.56526754548379e-06, "loss": 0.419, "step": 13956 }, { "epoch": 1.970770968652923, "grad_norm": 3.2015975919602893, "learning_rate": 5.5639012487217945e-06, "loss": 0.4849, "step": 13957 }, { "epoch": 1.9709121717029088, "grad_norm": 3.527122845361631, "learning_rate": 5.562535055052818e-06, "loss": 0.4604, "step": 13958 }, { "epoch": 1.9710533747528947, "grad_norm": 3.054213717330431, "learning_rate": 5.56116896450861e-06, "loss": 0.3933, "step": 13959 }, { "epoch": 1.9711945778028805, "grad_norm": 3.7400792040283166, "learning_rate": 5.559802977120918e-06, "loss": 0.535, "step": 13960 }, { "epoch": 1.9713357808528664, "grad_norm": 4.0698205520909925, "learning_rate": 5.5584370929214874e-06, "loss": 0.6218, "step": 13961 }, { "epoch": 1.9714769839028523, "grad_norm": 3.94278740280085, "learning_rate": 5.557071311942057e-06, "loss": 0.5543, "step": 13962 }, { "epoch": 1.9716181869528382, "grad_norm": 3.9508432938343194, "learning_rate": 5.555705634214379e-06, "loss": 0.5875, "step": 13963 }, { "epoch": 1.971759390002824, "grad_norm": 3.2862039895587145, "learning_rate": 5.554340059770178e-06, "loss": 0.492, "step": 13964 }, { "epoch": 1.97190059305281, "grad_norm": 3.2279175421057773, "learning_rate": 5.552974588641194e-06, "loss": 0.5059, "step": 13965 }, { "epoch": 1.9720417961027958, "grad_norm": 4.09232669890172, "learning_rate": 5.55160922085916e-06, "loss": 0.6931, "step": 13966 }, { "epoch": 1.9721829991527817, "grad_norm": 3.1460761943533098, "learning_rate": 5.550243956455803e-06, "loss": 0.5708, "step": 13967 }, { "epoch": 1.9723242022027676, "grad_norm": 4.265408895571942, "learning_rate": 5.54887879546286e-06, "loss": 0.6534, "step": 13968 }, { "epoch": 1.9724654052527535, "grad_norm": 4.556159982190725, "learning_rate": 5.5475137379120495e-06, "loss": 0.6344, "step": 13969 }, { "epoch": 1.9726066083027394, "grad_norm": 4.348446896672211, "learning_rate": 5.546148783835097e-06, "loss": 0.5987, "step": 13970 }, { "epoch": 1.9727478113527253, "grad_norm": 4.257840678802924, "learning_rate": 5.5447839332637244e-06, "loss": 0.6468, "step": 13971 }, { "epoch": 1.9728890144027111, "grad_norm": 3.9390308186014926, "learning_rate": 5.5434191862296485e-06, "loss": 0.5694, "step": 13972 }, { "epoch": 1.973030217452697, "grad_norm": 3.3903924051176504, "learning_rate": 5.542054542764586e-06, "loss": 0.5587, "step": 13973 }, { "epoch": 1.973171420502683, "grad_norm": 3.7751479427479837, "learning_rate": 5.5406900029002504e-06, "loss": 0.6604, "step": 13974 }, { "epoch": 1.9733126235526688, "grad_norm": 3.6552286972476113, "learning_rate": 5.539325566668354e-06, "loss": 0.5074, "step": 13975 }, { "epoch": 1.9734538266026547, "grad_norm": 3.1378279759055383, "learning_rate": 5.5379612341006054e-06, "loss": 0.4482, "step": 13976 }, { "epoch": 1.9735950296526403, "grad_norm": 3.345437192555646, "learning_rate": 5.53659700522871e-06, "loss": 0.4989, "step": 13977 }, { "epoch": 1.9737362327026262, "grad_norm": 2.8573092317516964, "learning_rate": 5.535232880084373e-06, "loss": 0.3755, "step": 13978 }, { "epoch": 1.973877435752612, "grad_norm": 3.9713853927219116, "learning_rate": 5.533868858699291e-06, "loss": 0.5883, "step": 13979 }, { "epoch": 1.974018638802598, "grad_norm": 3.3196485438358536, "learning_rate": 5.532504941105176e-06, "loss": 0.5752, "step": 13980 }, { "epoch": 1.9741598418525839, "grad_norm": 3.1296260513888785, "learning_rate": 5.5311411273337124e-06, "loss": 0.4651, "step": 13981 }, { "epoch": 1.9743010449025697, "grad_norm": 3.323207368866015, "learning_rate": 5.529777417416599e-06, "loss": 0.5259, "step": 13982 }, { "epoch": 1.9744422479525556, "grad_norm": 4.118726109045666, "learning_rate": 5.528413811385529e-06, "loss": 0.6308, "step": 13983 }, { "epoch": 1.9745834510025415, "grad_norm": 3.926211046539317, "learning_rate": 5.527050309272187e-06, "loss": 0.696, "step": 13984 }, { "epoch": 1.9747246540525274, "grad_norm": 3.35776541472489, "learning_rate": 5.525686911108268e-06, "loss": 0.5293, "step": 13985 }, { "epoch": 1.9748658571025133, "grad_norm": 4.160218210045014, "learning_rate": 5.524323616925452e-06, "loss": 0.7608, "step": 13986 }, { "epoch": 1.9750070601524992, "grad_norm": 4.087195533204512, "learning_rate": 5.5229604267554235e-06, "loss": 0.5117, "step": 13987 }, { "epoch": 1.975148263202485, "grad_norm": 3.1495730488383806, "learning_rate": 5.52159734062986e-06, "loss": 0.4888, "step": 13988 }, { "epoch": 1.975289466252471, "grad_norm": 3.1699354246854483, "learning_rate": 5.520234358580441e-06, "loss": 0.4805, "step": 13989 }, { "epoch": 1.9754306693024568, "grad_norm": 3.295478958188065, "learning_rate": 5.5188714806388395e-06, "loss": 0.4913, "step": 13990 }, { "epoch": 1.9755718723524427, "grad_norm": 3.9948356257364153, "learning_rate": 5.5175087068367315e-06, "loss": 0.5636, "step": 13991 }, { "epoch": 1.9757130754024286, "grad_norm": 3.379309028112032, "learning_rate": 5.516146037205784e-06, "loss": 0.4732, "step": 13992 }, { "epoch": 1.9758542784524145, "grad_norm": 3.6819397293516825, "learning_rate": 5.514783471777667e-06, "loss": 0.5421, "step": 13993 }, { "epoch": 1.9759954815024003, "grad_norm": 4.014609068069663, "learning_rate": 5.513421010584044e-06, "loss": 0.6035, "step": 13994 }, { "epoch": 1.9761366845523862, "grad_norm": 4.125623895962094, "learning_rate": 5.51205865365658e-06, "loss": 0.6262, "step": 13995 }, { "epoch": 1.976277887602372, "grad_norm": 3.001239070149158, "learning_rate": 5.51069640102693e-06, "loss": 0.4428, "step": 13996 }, { "epoch": 1.976419090652358, "grad_norm": 4.119641359494949, "learning_rate": 5.509334252726765e-06, "loss": 0.5928, "step": 13997 }, { "epoch": 1.9765602937023439, "grad_norm": 3.021756214654428, "learning_rate": 5.507972208787728e-06, "loss": 0.4938, "step": 13998 }, { "epoch": 1.9767014967523298, "grad_norm": 3.3080244667737237, "learning_rate": 5.506610269241479e-06, "loss": 0.4522, "step": 13999 }, { "epoch": 1.9768426998023156, "grad_norm": 4.165893007373089, "learning_rate": 5.505248434119666e-06, "loss": 0.6541, "step": 14000 }, { "epoch": 1.9769839028523015, "grad_norm": 4.242147836666587, "learning_rate": 5.503886703453933e-06, "loss": 0.6373, "step": 14001 }, { "epoch": 1.9771251059022874, "grad_norm": 3.7347190776120067, "learning_rate": 5.502525077275937e-06, "loss": 0.4888, "step": 14002 }, { "epoch": 1.9772663089522733, "grad_norm": 3.6385314055490747, "learning_rate": 5.501163555617315e-06, "loss": 0.5523, "step": 14003 }, { "epoch": 1.9774075120022592, "grad_norm": 3.533821249691839, "learning_rate": 5.499802138509709e-06, "loss": 0.5368, "step": 14004 }, { "epoch": 1.977548715052245, "grad_norm": 3.2414158760505045, "learning_rate": 5.498440825984758e-06, "loss": 0.4289, "step": 14005 }, { "epoch": 1.977689918102231, "grad_norm": 3.3532772461544567, "learning_rate": 5.497079618074097e-06, "loss": 0.5046, "step": 14006 }, { "epoch": 1.9778311211522168, "grad_norm": 3.738283293758161, "learning_rate": 5.495718514809363e-06, "loss": 0.532, "step": 14007 }, { "epoch": 1.9779723242022027, "grad_norm": 3.3230503998844543, "learning_rate": 5.494357516222184e-06, "loss": 0.5178, "step": 14008 }, { "epoch": 1.9781135272521886, "grad_norm": 3.81292161830942, "learning_rate": 5.492996622344191e-06, "loss": 0.5485, "step": 14009 }, { "epoch": 1.9782547303021745, "grad_norm": 3.3562904374958493, "learning_rate": 5.49163583320701e-06, "loss": 0.5002, "step": 14010 }, { "epoch": 1.9783959333521604, "grad_norm": 3.4987435860341902, "learning_rate": 5.490275148842266e-06, "loss": 0.542, "step": 14011 }, { "epoch": 1.9785371364021462, "grad_norm": 3.6270815904742557, "learning_rate": 5.488914569281578e-06, "loss": 0.4715, "step": 14012 }, { "epoch": 1.9786783394521321, "grad_norm": 4.730952537891852, "learning_rate": 5.487554094556565e-06, "loss": 0.6648, "step": 14013 }, { "epoch": 1.978819542502118, "grad_norm": 3.7968177603514417, "learning_rate": 5.486193724698854e-06, "loss": 0.5803, "step": 14014 }, { "epoch": 1.9789607455521039, "grad_norm": 3.6202468321052903, "learning_rate": 5.484833459740047e-06, "loss": 0.6018, "step": 14015 }, { "epoch": 1.9791019486020898, "grad_norm": 3.914613135146988, "learning_rate": 5.48347329971176e-06, "loss": 0.5756, "step": 14016 }, { "epoch": 1.9792431516520756, "grad_norm": 3.9688751436293495, "learning_rate": 5.482113244645604e-06, "loss": 0.5549, "step": 14017 }, { "epoch": 1.9793843547020615, "grad_norm": 3.547649666202111, "learning_rate": 5.48075329457318e-06, "loss": 0.507, "step": 14018 }, { "epoch": 1.9795255577520474, "grad_norm": 4.173737951982291, "learning_rate": 5.479393449526102e-06, "loss": 0.6814, "step": 14019 }, { "epoch": 1.9796667608020333, "grad_norm": 3.5105366007060823, "learning_rate": 5.478033709535968e-06, "loss": 0.4048, "step": 14020 }, { "epoch": 1.9798079638520192, "grad_norm": 3.278474580171805, "learning_rate": 5.476674074634378e-06, "loss": 0.5636, "step": 14021 }, { "epoch": 1.979949166902005, "grad_norm": 3.66360812446567, "learning_rate": 5.4753145448529284e-06, "loss": 0.5968, "step": 14022 }, { "epoch": 1.980090369951991, "grad_norm": 3.1068984240121607, "learning_rate": 5.4739551202232155e-06, "loss": 0.4718, "step": 14023 }, { "epoch": 1.9802315730019768, "grad_norm": 3.529896546528489, "learning_rate": 5.472595800776831e-06, "loss": 0.5054, "step": 14024 }, { "epoch": 1.9803727760519627, "grad_norm": 3.0773333615397527, "learning_rate": 5.4712365865453645e-06, "loss": 0.4459, "step": 14025 }, { "epoch": 1.9805139791019486, "grad_norm": 3.7622421946209057, "learning_rate": 5.469877477560403e-06, "loss": 0.5472, "step": 14026 }, { "epoch": 1.9806551821519345, "grad_norm": 3.5612871264039563, "learning_rate": 5.468518473853535e-06, "loss": 0.5836, "step": 14027 }, { "epoch": 1.9807963852019204, "grad_norm": 3.8982104912496576, "learning_rate": 5.467159575456338e-06, "loss": 0.5565, "step": 14028 }, { "epoch": 1.9809375882519062, "grad_norm": 3.4771847286658883, "learning_rate": 5.465800782400397e-06, "loss": 0.5007, "step": 14029 }, { "epoch": 1.9810787913018921, "grad_norm": 3.8599443277703513, "learning_rate": 5.464442094717281e-06, "loss": 0.6166, "step": 14030 }, { "epoch": 1.981219994351878, "grad_norm": 3.55249159001819, "learning_rate": 5.463083512438582e-06, "loss": 0.5029, "step": 14031 }, { "epoch": 1.981361197401864, "grad_norm": 3.7410048001243377, "learning_rate": 5.46172503559586e-06, "loss": 0.5911, "step": 14032 }, { "epoch": 1.9815024004518498, "grad_norm": 3.329967938711971, "learning_rate": 5.460366664220686e-06, "loss": 0.4355, "step": 14033 }, { "epoch": 1.9816436035018357, "grad_norm": 3.2978784073699976, "learning_rate": 5.45900839834463e-06, "loss": 0.463, "step": 14034 }, { "epoch": 1.9817848065518215, "grad_norm": 2.7781668243490087, "learning_rate": 5.457650237999255e-06, "loss": 0.4184, "step": 14035 }, { "epoch": 1.9819260096018074, "grad_norm": 3.1362133022042977, "learning_rate": 5.45629218321613e-06, "loss": 0.4867, "step": 14036 }, { "epoch": 1.9820672126517933, "grad_norm": 3.874773400876753, "learning_rate": 5.454934234026813e-06, "loss": 0.5584, "step": 14037 }, { "epoch": 1.9822084157017792, "grad_norm": 3.305887450305212, "learning_rate": 5.453576390462861e-06, "loss": 0.531, "step": 14038 }, { "epoch": 1.982349618751765, "grad_norm": 3.8341483263123646, "learning_rate": 5.452218652555831e-06, "loss": 0.5236, "step": 14039 }, { "epoch": 1.982490821801751, "grad_norm": 4.18518586779701, "learning_rate": 5.4508610203372794e-06, "loss": 0.5643, "step": 14040 }, { "epoch": 1.9826320248517368, "grad_norm": 3.666619966205456, "learning_rate": 5.449503493838744e-06, "loss": 0.5491, "step": 14041 }, { "epoch": 1.9827732279017227, "grad_norm": 3.3240560366357976, "learning_rate": 5.448146073091787e-06, "loss": 0.4848, "step": 14042 }, { "epoch": 1.9829144309517086, "grad_norm": 3.1035495248420335, "learning_rate": 5.44678875812795e-06, "loss": 0.5086, "step": 14043 }, { "epoch": 1.9830556340016945, "grad_norm": 3.534598092378652, "learning_rate": 5.445431548978775e-06, "loss": 0.466, "step": 14044 }, { "epoch": 1.9831968370516804, "grad_norm": 3.3719351643908397, "learning_rate": 5.444074445675803e-06, "loss": 0.4456, "step": 14045 }, { "epoch": 1.9833380401016663, "grad_norm": 3.4647707238229177, "learning_rate": 5.442717448250574e-06, "loss": 0.4944, "step": 14046 }, { "epoch": 1.9834792431516521, "grad_norm": 3.1201164964440826, "learning_rate": 5.44136055673462e-06, "loss": 0.4482, "step": 14047 }, { "epoch": 1.983620446201638, "grad_norm": 3.766005685384043, "learning_rate": 5.440003771159485e-06, "loss": 0.5591, "step": 14048 }, { "epoch": 1.983761649251624, "grad_norm": 3.762544476774224, "learning_rate": 5.43864709155669e-06, "loss": 0.5945, "step": 14049 }, { "epoch": 1.9839028523016098, "grad_norm": 4.008481457946485, "learning_rate": 5.437290517957767e-06, "loss": 0.5293, "step": 14050 }, { "epoch": 1.9840440553515957, "grad_norm": 3.8424270293349254, "learning_rate": 5.435934050394242e-06, "loss": 0.596, "step": 14051 }, { "epoch": 1.9841852584015816, "grad_norm": 4.043495423807216, "learning_rate": 5.434577688897637e-06, "loss": 0.6161, "step": 14052 }, { "epoch": 1.9843264614515674, "grad_norm": 3.40787160400942, "learning_rate": 5.433221433499472e-06, "loss": 0.4502, "step": 14053 }, { "epoch": 1.9844676645015533, "grad_norm": 3.720288836568041, "learning_rate": 5.4318652842312725e-06, "loss": 0.5279, "step": 14054 }, { "epoch": 1.9846088675515392, "grad_norm": 3.9433751948255775, "learning_rate": 5.430509241124551e-06, "loss": 0.5263, "step": 14055 }, { "epoch": 1.984750070601525, "grad_norm": 3.0761589086409225, "learning_rate": 5.429153304210821e-06, "loss": 0.4472, "step": 14056 }, { "epoch": 1.984891273651511, "grad_norm": 4.264568296097902, "learning_rate": 5.427797473521599e-06, "loss": 0.5856, "step": 14057 }, { "epoch": 1.9850324767014969, "grad_norm": 3.2570349594000865, "learning_rate": 5.42644174908838e-06, "loss": 0.5996, "step": 14058 }, { "epoch": 1.9851736797514827, "grad_norm": 4.745779234966659, "learning_rate": 5.4250861309426835e-06, "loss": 0.5911, "step": 14059 }, { "epoch": 1.9853148828014686, "grad_norm": 4.143206267300422, "learning_rate": 5.42373061911601e-06, "loss": 0.6037, "step": 14060 }, { "epoch": 1.9854560858514545, "grad_norm": 3.5195158044242754, "learning_rate": 5.422375213639861e-06, "loss": 0.4757, "step": 14061 }, { "epoch": 1.9855972889014404, "grad_norm": 4.538327683816909, "learning_rate": 5.421019914545735e-06, "loss": 0.6427, "step": 14062 }, { "epoch": 1.9857384919514263, "grad_norm": 3.672037315512907, "learning_rate": 5.419664721865129e-06, "loss": 0.4594, "step": 14063 }, { "epoch": 1.9858796950014121, "grad_norm": 4.043021930528464, "learning_rate": 5.418309635629536e-06, "loss": 0.5672, "step": 14064 }, { "epoch": 1.986020898051398, "grad_norm": 3.6387995293735744, "learning_rate": 5.416954655870449e-06, "loss": 0.5245, "step": 14065 }, { "epoch": 1.986162101101384, "grad_norm": 4.0935576725852085, "learning_rate": 5.415599782619356e-06, "loss": 0.5474, "step": 14066 }, { "epoch": 1.9863033041513698, "grad_norm": 4.945419850549816, "learning_rate": 5.414245015907744e-06, "loss": 0.6251, "step": 14067 }, { "epoch": 1.9864445072013557, "grad_norm": 4.378137502899994, "learning_rate": 5.412890355767098e-06, "loss": 0.7481, "step": 14068 }, { "epoch": 1.9865857102513416, "grad_norm": 4.114005344599044, "learning_rate": 5.411535802228899e-06, "loss": 0.6113, "step": 14069 }, { "epoch": 1.9867269133013274, "grad_norm": 3.799682313176289, "learning_rate": 5.410181355324622e-06, "loss": 0.5621, "step": 14070 }, { "epoch": 1.9868681163513133, "grad_norm": 3.305948947377797, "learning_rate": 5.408827015085753e-06, "loss": 0.4899, "step": 14071 }, { "epoch": 1.9870093194012992, "grad_norm": 3.178203680661457, "learning_rate": 5.407472781543761e-06, "loss": 0.4897, "step": 14072 }, { "epoch": 1.987150522451285, "grad_norm": 3.469910386822805, "learning_rate": 5.406118654730117e-06, "loss": 0.4612, "step": 14073 }, { "epoch": 1.987291725501271, "grad_norm": 3.5526577549284584, "learning_rate": 5.404764634676297e-06, "loss": 0.5904, "step": 14074 }, { "epoch": 1.9874329285512569, "grad_norm": 3.636965306656946, "learning_rate": 5.403410721413754e-06, "loss": 0.5814, "step": 14075 }, { "epoch": 1.9875741316012427, "grad_norm": 3.9798626218358555, "learning_rate": 5.402056914973965e-06, "loss": 0.5199, "step": 14076 }, { "epoch": 1.9877153346512286, "grad_norm": 3.1541254382885207, "learning_rate": 5.400703215388387e-06, "loss": 0.4532, "step": 14077 }, { "epoch": 1.9878565377012145, "grad_norm": 4.103584707812915, "learning_rate": 5.399349622688479e-06, "loss": 0.6473, "step": 14078 }, { "epoch": 1.9879977407512002, "grad_norm": 3.6446340827844828, "learning_rate": 5.397996136905701e-06, "loss": 0.4868, "step": 14079 }, { "epoch": 1.988138943801186, "grad_norm": 3.6568943418663924, "learning_rate": 5.3966427580715044e-06, "loss": 0.5624, "step": 14080 }, { "epoch": 1.988280146851172, "grad_norm": 3.0657729728592136, "learning_rate": 5.395289486217341e-06, "loss": 0.4499, "step": 14081 }, { "epoch": 1.9884213499011578, "grad_norm": 3.6040588953093002, "learning_rate": 5.393936321374663e-06, "loss": 0.5029, "step": 14082 }, { "epoch": 1.9885625529511437, "grad_norm": 3.176063999664221, "learning_rate": 5.392583263574915e-06, "loss": 0.439, "step": 14083 }, { "epoch": 1.9887037560011296, "grad_norm": 3.7458423890007975, "learning_rate": 5.391230312849543e-06, "loss": 0.5877, "step": 14084 }, { "epoch": 1.9888449590511155, "grad_norm": 4.4646967106741755, "learning_rate": 5.389877469229987e-06, "loss": 0.6864, "step": 14085 }, { "epoch": 1.9889861621011014, "grad_norm": 4.578808672263429, "learning_rate": 5.388524732747688e-06, "loss": 0.5813, "step": 14086 }, { "epoch": 1.9891273651510872, "grad_norm": 3.243696386494515, "learning_rate": 5.387172103434078e-06, "loss": 0.5088, "step": 14087 }, { "epoch": 1.9892685682010731, "grad_norm": 4.345284359394688, "learning_rate": 5.385819581320602e-06, "loss": 0.6071, "step": 14088 }, { "epoch": 1.989409771251059, "grad_norm": 4.463065808901525, "learning_rate": 5.384467166438685e-06, "loss": 0.553, "step": 14089 }, { "epoch": 1.9895509743010449, "grad_norm": 4.663979135640216, "learning_rate": 5.3831148588197615e-06, "loss": 0.827, "step": 14090 }, { "epoch": 1.9896921773510308, "grad_norm": 3.430253573371457, "learning_rate": 5.3817626584952506e-06, "loss": 0.532, "step": 14091 }, { "epoch": 1.9898333804010166, "grad_norm": 3.8808844403018106, "learning_rate": 5.3804105654965784e-06, "loss": 0.5927, "step": 14092 }, { "epoch": 1.9899745834510025, "grad_norm": 3.4537343764441726, "learning_rate": 5.379058579855172e-06, "loss": 0.473, "step": 14093 }, { "epoch": 1.9901157865009884, "grad_norm": 3.734558252415471, "learning_rate": 5.3777067016024495e-06, "loss": 0.5917, "step": 14094 }, { "epoch": 1.9902569895509743, "grad_norm": 3.51712448839059, "learning_rate": 5.376354930769827e-06, "loss": 0.612, "step": 14095 }, { "epoch": 1.9903981926009602, "grad_norm": 3.6222833097605363, "learning_rate": 5.375003267388719e-06, "loss": 0.5816, "step": 14096 }, { "epoch": 1.990539395650946, "grad_norm": 3.331599129932487, "learning_rate": 5.373651711490538e-06, "loss": 0.5331, "step": 14097 }, { "epoch": 1.990680598700932, "grad_norm": 3.5198798999637924, "learning_rate": 5.372300263106693e-06, "loss": 0.545, "step": 14098 }, { "epoch": 1.9908218017509178, "grad_norm": 4.05490579461452, "learning_rate": 5.370948922268591e-06, "loss": 0.5371, "step": 14099 }, { "epoch": 1.9909630048009037, "grad_norm": 4.003866362314488, "learning_rate": 5.3695976890076375e-06, "loss": 0.6583, "step": 14100 }, { "epoch": 1.9911042078508896, "grad_norm": 5.208400190194892, "learning_rate": 5.368246563355234e-06, "loss": 0.5748, "step": 14101 }, { "epoch": 1.9912454109008755, "grad_norm": 3.0637301416234424, "learning_rate": 5.3668955453427795e-06, "loss": 0.3986, "step": 14102 }, { "epoch": 1.9913866139508614, "grad_norm": 3.2876650918687678, "learning_rate": 5.365544635001671e-06, "loss": 0.5307, "step": 14103 }, { "epoch": 1.9915278170008472, "grad_norm": 3.574000436075264, "learning_rate": 5.3641938323633025e-06, "loss": 0.4753, "step": 14104 }, { "epoch": 1.9916690200508331, "grad_norm": 4.096727148201548, "learning_rate": 5.362843137459068e-06, "loss": 0.6974, "step": 14105 }, { "epoch": 1.991810223100819, "grad_norm": 3.209796608290135, "learning_rate": 5.3614925503203586e-06, "loss": 0.4361, "step": 14106 }, { "epoch": 1.991951426150805, "grad_norm": 3.230951473200884, "learning_rate": 5.360142070978562e-06, "loss": 0.4297, "step": 14107 }, { "epoch": 1.9920926292007908, "grad_norm": 3.5667161632738047, "learning_rate": 5.358791699465056e-06, "loss": 0.4929, "step": 14108 }, { "epoch": 1.9922338322507767, "grad_norm": 3.838898420857128, "learning_rate": 5.357441435811224e-06, "loss": 0.5739, "step": 14109 }, { "epoch": 1.9923750353007625, "grad_norm": 3.5813676547244966, "learning_rate": 5.35609128004845e-06, "loss": 0.5014, "step": 14110 }, { "epoch": 1.9925162383507484, "grad_norm": 3.815652857806033, "learning_rate": 5.3547412322081095e-06, "loss": 0.5552, "step": 14111 }, { "epoch": 1.9926574414007343, "grad_norm": 3.974073335817395, "learning_rate": 5.353391292321577e-06, "loss": 0.52, "step": 14112 }, { "epoch": 1.99279864445072, "grad_norm": 3.1315195713257484, "learning_rate": 5.352041460420224e-06, "loss": 0.4843, "step": 14113 }, { "epoch": 1.9929398475007059, "grad_norm": 3.3439127415465673, "learning_rate": 5.3506917365354205e-06, "loss": 0.4724, "step": 14114 }, { "epoch": 1.9930810505506917, "grad_norm": 3.0946264095713163, "learning_rate": 5.349342120698533e-06, "loss": 0.4855, "step": 14115 }, { "epoch": 1.9932222536006776, "grad_norm": 3.4416027525340116, "learning_rate": 5.347992612940927e-06, "loss": 0.4581, "step": 14116 }, { "epoch": 1.9933634566506635, "grad_norm": 2.780677412453508, "learning_rate": 5.346643213293962e-06, "loss": 0.3918, "step": 14117 }, { "epoch": 1.9935046597006494, "grad_norm": 3.1745876731408673, "learning_rate": 5.345293921789e-06, "loss": 0.4443, "step": 14118 }, { "epoch": 1.9936458627506353, "grad_norm": 3.7227731812250924, "learning_rate": 5.343944738457396e-06, "loss": 0.6905, "step": 14119 }, { "epoch": 1.9937870658006211, "grad_norm": 3.821771934729362, "learning_rate": 5.3425956633305075e-06, "loss": 0.5668, "step": 14120 }, { "epoch": 1.993928268850607, "grad_norm": 3.3362924667742666, "learning_rate": 5.341246696439679e-06, "loss": 0.429, "step": 14121 }, { "epoch": 1.994069471900593, "grad_norm": 3.7720870163213176, "learning_rate": 5.3398978378162696e-06, "loss": 0.5339, "step": 14122 }, { "epoch": 1.9942106749505788, "grad_norm": 4.021627448380206, "learning_rate": 5.338549087491623e-06, "loss": 0.6265, "step": 14123 }, { "epoch": 1.9943518780005647, "grad_norm": 3.269514706156732, "learning_rate": 5.337200445497084e-06, "loss": 0.4721, "step": 14124 }, { "epoch": 1.9944930810505506, "grad_norm": 3.663667577988536, "learning_rate": 5.3358519118639895e-06, "loss": 0.5575, "step": 14125 }, { "epoch": 1.9946342841005364, "grad_norm": 4.493244696682536, "learning_rate": 5.33450348662368e-06, "loss": 0.655, "step": 14126 }, { "epoch": 1.9947754871505223, "grad_norm": 3.494900174633416, "learning_rate": 5.333155169807498e-06, "loss": 0.5459, "step": 14127 }, { "epoch": 1.9949166902005082, "grad_norm": 3.3629943872708643, "learning_rate": 5.331806961446774e-06, "loss": 0.4569, "step": 14128 }, { "epoch": 1.995057893250494, "grad_norm": 3.776577727677247, "learning_rate": 5.3304588615728405e-06, "loss": 0.4909, "step": 14129 }, { "epoch": 1.99519909630048, "grad_norm": 3.2977141035818014, "learning_rate": 5.3291108702170244e-06, "loss": 0.535, "step": 14130 }, { "epoch": 1.9953402993504659, "grad_norm": 4.761668998421619, "learning_rate": 5.327762987410657e-06, "loss": 0.6586, "step": 14131 }, { "epoch": 1.9954815024004517, "grad_norm": 3.772342215839018, "learning_rate": 5.3264152131850585e-06, "loss": 0.5715, "step": 14132 }, { "epoch": 1.9956227054504376, "grad_norm": 4.049886911514595, "learning_rate": 5.325067547571554e-06, "loss": 0.6804, "step": 14133 }, { "epoch": 1.9957639085004235, "grad_norm": 3.6322310435181837, "learning_rate": 5.323719990601459e-06, "loss": 0.4623, "step": 14134 }, { "epoch": 1.9959051115504094, "grad_norm": 3.7530447264745255, "learning_rate": 5.322372542306094e-06, "loss": 0.674, "step": 14135 }, { "epoch": 1.9960463146003953, "grad_norm": 4.069142408141527, "learning_rate": 5.321025202716769e-06, "loss": 0.6105, "step": 14136 }, { "epoch": 1.9961875176503812, "grad_norm": 3.4630871699004815, "learning_rate": 5.319677971864798e-06, "loss": 0.5535, "step": 14137 }, { "epoch": 1.996328720700367, "grad_norm": 4.394876662720301, "learning_rate": 5.318330849781485e-06, "loss": 0.6236, "step": 14138 }, { "epoch": 1.996469923750353, "grad_norm": 3.8526647510159124, "learning_rate": 5.316983836498146e-06, "loss": 0.5277, "step": 14139 }, { "epoch": 1.9966111268003388, "grad_norm": 2.8911609682361368, "learning_rate": 5.3156369320460796e-06, "loss": 0.3831, "step": 14140 }, { "epoch": 1.9967523298503247, "grad_norm": 2.9663938944191317, "learning_rate": 5.314290136456592e-06, "loss": 0.3636, "step": 14141 }, { "epoch": 1.9968935329003106, "grad_norm": 4.7647975140723675, "learning_rate": 5.312943449760975e-06, "loss": 0.6218, "step": 14142 }, { "epoch": 1.9970347359502965, "grad_norm": 3.236661396881344, "learning_rate": 5.311596871990527e-06, "loss": 0.4487, "step": 14143 }, { "epoch": 1.9971759390002823, "grad_norm": 3.826382081715904, "learning_rate": 5.31025040317654e-06, "loss": 0.5552, "step": 14144 }, { "epoch": 1.9973171420502682, "grad_norm": 3.0129649704183246, "learning_rate": 5.308904043350311e-06, "loss": 0.3987, "step": 14145 }, { "epoch": 1.997458345100254, "grad_norm": 4.04305886263828, "learning_rate": 5.307557792543128e-06, "loss": 0.584, "step": 14146 }, { "epoch": 1.99759954815024, "grad_norm": 3.882241816920631, "learning_rate": 5.306211650786273e-06, "loss": 0.5408, "step": 14147 }, { "epoch": 1.9977407512002259, "grad_norm": 3.7156242297894924, "learning_rate": 5.304865618111034e-06, "loss": 0.5256, "step": 14148 }, { "epoch": 1.9978819542502118, "grad_norm": 3.347998948374895, "learning_rate": 5.30351969454869e-06, "loss": 0.4801, "step": 14149 }, { "epoch": 1.9980231573001976, "grad_norm": 4.0121535600349425, "learning_rate": 5.302173880130519e-06, "loss": 0.5453, "step": 14150 }, { "epoch": 1.9981643603501835, "grad_norm": 3.525963863183588, "learning_rate": 5.3008281748878e-06, "loss": 0.4893, "step": 14151 }, { "epoch": 1.9983055634001694, "grad_norm": 3.614797379567509, "learning_rate": 5.299482578851803e-06, "loss": 0.5181, "step": 14152 }, { "epoch": 1.9984467664501553, "grad_norm": 4.034396814009825, "learning_rate": 5.2981370920538014e-06, "loss": 0.6122, "step": 14153 }, { "epoch": 1.9985879695001412, "grad_norm": 3.9394532807123497, "learning_rate": 5.296791714525064e-06, "loss": 0.5679, "step": 14154 }, { "epoch": 1.998729172550127, "grad_norm": 3.3692250589801445, "learning_rate": 5.295446446296855e-06, "loss": 0.5045, "step": 14155 }, { "epoch": 1.998870375600113, "grad_norm": 3.7991754933944493, "learning_rate": 5.294101287400435e-06, "loss": 0.59, "step": 14156 }, { "epoch": 1.9990115786500988, "grad_norm": 3.362914024480601, "learning_rate": 5.292756237867074e-06, "loss": 0.4627, "step": 14157 }, { "epoch": 1.9991527817000847, "grad_norm": 3.1203553867958935, "learning_rate": 5.291411297728027e-06, "loss": 0.4926, "step": 14158 }, { "epoch": 1.9992939847500706, "grad_norm": 3.6160683926808073, "learning_rate": 5.290066467014545e-06, "loss": 0.5583, "step": 14159 }, { "epoch": 1.9994351878000565, "grad_norm": 3.450972404441004, "learning_rate": 5.2887217457578856e-06, "loss": 0.4696, "step": 14160 }, { "epoch": 1.9995763908500424, "grad_norm": 2.8505279836101414, "learning_rate": 5.2873771339892925e-06, "loss": 0.4525, "step": 14161 }, { "epoch": 1.9997175939000282, "grad_norm": 3.721099729355444, "learning_rate": 5.286032631740023e-06, "loss": 0.533, "step": 14162 }, { "epoch": 1.9998587969500141, "grad_norm": 2.946940992759772, "learning_rate": 5.2846882390413214e-06, "loss": 0.4195, "step": 14163 }, { "epoch": 2.0, "grad_norm": 2.925316863283814, "learning_rate": 5.283343955924427e-06, "loss": 0.3603, "step": 14164 }, { "epoch": 2.000141203049986, "grad_norm": 2.764313582256329, "learning_rate": 5.281999782420584e-06, "loss": 0.2957, "step": 14165 }, { "epoch": 2.0002824060999718, "grad_norm": 2.0027093765135153, "learning_rate": 5.280655718561026e-06, "loss": 0.2162, "step": 14166 }, { "epoch": 2.0004236091499576, "grad_norm": 2.718105164338127, "learning_rate": 5.2793117643769935e-06, "loss": 0.2292, "step": 14167 }, { "epoch": 2.0005648121999435, "grad_norm": 2.3104670680762873, "learning_rate": 5.2779679198997145e-06, "loss": 0.1983, "step": 14168 }, { "epoch": 2.0007060152499294, "grad_norm": 2.426336070834441, "learning_rate": 5.2766241851604225e-06, "loss": 0.2435, "step": 14169 }, { "epoch": 2.0008472182999153, "grad_norm": 2.547201164183543, "learning_rate": 5.275280560190346e-06, "loss": 0.2368, "step": 14170 }, { "epoch": 2.000988421349901, "grad_norm": 2.7535339666019767, "learning_rate": 5.2739370450207075e-06, "loss": 0.2264, "step": 14171 }, { "epoch": 2.001129624399887, "grad_norm": 1.9715003169700214, "learning_rate": 5.27259363968273e-06, "loss": 0.201, "step": 14172 }, { "epoch": 2.001270827449873, "grad_norm": 2.1153406530200263, "learning_rate": 5.2712503442076325e-06, "loss": 0.1899, "step": 14173 }, { "epoch": 2.001412030499859, "grad_norm": 1.8926984858194746, "learning_rate": 5.269907158626639e-06, "loss": 0.2006, "step": 14174 }, { "epoch": 2.0015532335498447, "grad_norm": 2.5361594730093224, "learning_rate": 5.268564082970964e-06, "loss": 0.2619, "step": 14175 }, { "epoch": 2.0016944365998306, "grad_norm": 2.4405677275249693, "learning_rate": 5.267221117271812e-06, "loss": 0.1937, "step": 14176 }, { "epoch": 2.0018356396498165, "grad_norm": 2.0369341148734055, "learning_rate": 5.265878261560397e-06, "loss": 0.1733, "step": 14177 }, { "epoch": 2.0019768426998024, "grad_norm": 2.4771319069289834, "learning_rate": 5.264535515867924e-06, "loss": 0.1934, "step": 14178 }, { "epoch": 2.0021180457497882, "grad_norm": 2.494853993603759, "learning_rate": 5.2631928802256024e-06, "loss": 0.2219, "step": 14179 }, { "epoch": 2.002259248799774, "grad_norm": 2.2666890200972833, "learning_rate": 5.261850354664633e-06, "loss": 0.172, "step": 14180 }, { "epoch": 2.00240045184976, "grad_norm": 1.9982746863492475, "learning_rate": 5.260507939216217e-06, "loss": 0.1841, "step": 14181 }, { "epoch": 2.002541654899746, "grad_norm": 2.1616349671330943, "learning_rate": 5.259165633911549e-06, "loss": 0.1363, "step": 14182 }, { "epoch": 2.0026828579497318, "grad_norm": 2.2446598198390384, "learning_rate": 5.257823438781823e-06, "loss": 0.14, "step": 14183 }, { "epoch": 2.0028240609997177, "grad_norm": 1.8597113972274455, "learning_rate": 5.256481353858234e-06, "loss": 0.1464, "step": 14184 }, { "epoch": 2.0029652640497035, "grad_norm": 2.795630820238163, "learning_rate": 5.255139379171968e-06, "loss": 0.1914, "step": 14185 }, { "epoch": 2.0031064670996894, "grad_norm": 2.0371779665190366, "learning_rate": 5.253797514754214e-06, "loss": 0.1248, "step": 14186 }, { "epoch": 2.0032476701496753, "grad_norm": 3.0332761946912634, "learning_rate": 5.252455760636157e-06, "loss": 0.166, "step": 14187 }, { "epoch": 2.003388873199661, "grad_norm": 2.485954602177113, "learning_rate": 5.2511141168489764e-06, "loss": 0.1889, "step": 14188 }, { "epoch": 2.003530076249647, "grad_norm": 3.085159638632945, "learning_rate": 5.249772583423855e-06, "loss": 0.1583, "step": 14189 }, { "epoch": 2.003671279299633, "grad_norm": 3.337559871531928, "learning_rate": 5.248431160391963e-06, "loss": 0.1878, "step": 14190 }, { "epoch": 2.003812482349619, "grad_norm": 2.8635680750214925, "learning_rate": 5.2470898477844815e-06, "loss": 0.1679, "step": 14191 }, { "epoch": 2.0039536853996047, "grad_norm": 2.98437199484801, "learning_rate": 5.245748645632583e-06, "loss": 0.1937, "step": 14192 }, { "epoch": 2.0040948884495906, "grad_norm": 2.384055370766391, "learning_rate": 5.24440755396743e-06, "loss": 0.1667, "step": 14193 }, { "epoch": 2.0042360914995765, "grad_norm": 3.3477950157204623, "learning_rate": 5.24306657282019e-06, "loss": 0.224, "step": 14194 }, { "epoch": 2.0043772945495624, "grad_norm": 3.1348614932761243, "learning_rate": 5.241725702222027e-06, "loss": 0.1678, "step": 14195 }, { "epoch": 2.0045184975995483, "grad_norm": 2.5593119380690936, "learning_rate": 5.240384942204105e-06, "loss": 0.127, "step": 14196 }, { "epoch": 2.004659700649534, "grad_norm": 3.4366944809424167, "learning_rate": 5.2390442927975835e-06, "loss": 0.2114, "step": 14197 }, { "epoch": 2.00480090369952, "grad_norm": 2.981538377243886, "learning_rate": 5.237703754033616e-06, "loss": 0.1667, "step": 14198 }, { "epoch": 2.004942106749506, "grad_norm": 2.7998566495326433, "learning_rate": 5.236363325943357e-06, "loss": 0.1405, "step": 14199 }, { "epoch": 2.005083309799492, "grad_norm": 2.879230384218745, "learning_rate": 5.235023008557955e-06, "loss": 0.2018, "step": 14200 }, { "epoch": 2.0052245128494777, "grad_norm": 2.904074177485924, "learning_rate": 5.2336828019085616e-06, "loss": 0.2007, "step": 14201 }, { "epoch": 2.0053657158994636, "grad_norm": 3.241188879297599, "learning_rate": 5.232342706026323e-06, "loss": 0.2035, "step": 14202 }, { "epoch": 2.0055069189494494, "grad_norm": 3.213711204145903, "learning_rate": 5.231002720942379e-06, "loss": 0.1945, "step": 14203 }, { "epoch": 2.0056481219994353, "grad_norm": 2.8725007667883475, "learning_rate": 5.229662846687873e-06, "loss": 0.1774, "step": 14204 }, { "epoch": 2.005789325049421, "grad_norm": 3.027188981647017, "learning_rate": 5.228323083293943e-06, "loss": 0.1684, "step": 14205 }, { "epoch": 2.005930528099407, "grad_norm": 3.487025672725611, "learning_rate": 5.226983430791722e-06, "loss": 0.1932, "step": 14206 }, { "epoch": 2.006071731149393, "grad_norm": 2.5140653643065196, "learning_rate": 5.225643889212342e-06, "loss": 0.1697, "step": 14207 }, { "epoch": 2.006212934199379, "grad_norm": 2.9079514481862323, "learning_rate": 5.22430445858694e-06, "loss": 0.17, "step": 14208 }, { "epoch": 2.0063541372493647, "grad_norm": 3.092545711536245, "learning_rate": 5.2229651389466425e-06, "loss": 0.1615, "step": 14209 }, { "epoch": 2.0064953402993506, "grad_norm": 2.358912403650366, "learning_rate": 5.221625930322571e-06, "loss": 0.1576, "step": 14210 }, { "epoch": 2.0066365433493365, "grad_norm": 3.230862535969985, "learning_rate": 5.220286832745847e-06, "loss": 0.2036, "step": 14211 }, { "epoch": 2.0067777463993224, "grad_norm": 3.2766740755668966, "learning_rate": 5.2189478462475905e-06, "loss": 0.1743, "step": 14212 }, { "epoch": 2.0069189494493083, "grad_norm": 2.576363529450593, "learning_rate": 5.217608970858923e-06, "loss": 0.1315, "step": 14213 }, { "epoch": 2.007060152499294, "grad_norm": 3.175153181539466, "learning_rate": 5.216270206610959e-06, "loss": 0.18, "step": 14214 }, { "epoch": 2.00720135554928, "grad_norm": 3.011573116576983, "learning_rate": 5.21493155353481e-06, "loss": 0.2171, "step": 14215 }, { "epoch": 2.007342558599266, "grad_norm": 3.943489728370928, "learning_rate": 5.213593011661584e-06, "loss": 0.1617, "step": 14216 }, { "epoch": 2.007483761649252, "grad_norm": 3.1586607506246343, "learning_rate": 5.21225458102239e-06, "loss": 0.2609, "step": 14217 }, { "epoch": 2.0076249646992377, "grad_norm": 2.7151740132946847, "learning_rate": 5.2109162616483325e-06, "loss": 0.1475, "step": 14218 }, { "epoch": 2.0077661677492236, "grad_norm": 2.938301378282818, "learning_rate": 5.209578053570512e-06, "loss": 0.142, "step": 14219 }, { "epoch": 2.0079073707992094, "grad_norm": 2.828506970847201, "learning_rate": 5.20823995682003e-06, "loss": 0.1729, "step": 14220 }, { "epoch": 2.0080485738491953, "grad_norm": 2.825051815880469, "learning_rate": 5.20690197142798e-06, "loss": 0.1851, "step": 14221 }, { "epoch": 2.008189776899181, "grad_norm": 5.26932705193779, "learning_rate": 5.205564097425458e-06, "loss": 0.2492, "step": 14222 }, { "epoch": 2.008330979949167, "grad_norm": 2.572939951277935, "learning_rate": 5.204226334843558e-06, "loss": 0.1443, "step": 14223 }, { "epoch": 2.008472182999153, "grad_norm": 2.377493368791424, "learning_rate": 5.2028886837133605e-06, "loss": 0.1729, "step": 14224 }, { "epoch": 2.008613386049139, "grad_norm": 3.0403590517460297, "learning_rate": 5.201551144065964e-06, "loss": 0.1894, "step": 14225 }, { "epoch": 2.0087545890991247, "grad_norm": 2.9680354506223585, "learning_rate": 5.200213715932449e-06, "loss": 0.1945, "step": 14226 }, { "epoch": 2.0088957921491106, "grad_norm": 2.776244738273056, "learning_rate": 5.198876399343893e-06, "loss": 0.1701, "step": 14227 }, { "epoch": 2.0090369951990965, "grad_norm": 3.739130407624207, "learning_rate": 5.197539194331373e-06, "loss": 0.1941, "step": 14228 }, { "epoch": 2.0091781982490824, "grad_norm": 2.863443668479093, "learning_rate": 5.196202100925967e-06, "loss": 0.1955, "step": 14229 }, { "epoch": 2.0093194012990683, "grad_norm": 2.2225374371218947, "learning_rate": 5.194865119158752e-06, "loss": 0.1299, "step": 14230 }, { "epoch": 2.009460604349054, "grad_norm": 3.004502551734456, "learning_rate": 5.1935282490607964e-06, "loss": 0.1695, "step": 14231 }, { "epoch": 2.00960180739904, "grad_norm": 2.591361505249893, "learning_rate": 5.192191490663168e-06, "loss": 0.1569, "step": 14232 }, { "epoch": 2.0097430104490255, "grad_norm": 2.523837340637139, "learning_rate": 5.190854843996934e-06, "loss": 0.189, "step": 14233 }, { "epoch": 2.0098842134990114, "grad_norm": 3.2449230023383038, "learning_rate": 5.189518309093157e-06, "loss": 0.1788, "step": 14234 }, { "epoch": 2.0100254165489972, "grad_norm": 2.792490598095333, "learning_rate": 5.188181885982896e-06, "loss": 0.1572, "step": 14235 }, { "epoch": 2.010166619598983, "grad_norm": 2.957272892195983, "learning_rate": 5.18684557469721e-06, "loss": 0.1689, "step": 14236 }, { "epoch": 2.010307822648969, "grad_norm": 3.074026717603523, "learning_rate": 5.185509375267155e-06, "loss": 0.2192, "step": 14237 }, { "epoch": 2.010449025698955, "grad_norm": 2.9301614419600104, "learning_rate": 5.184173287723782e-06, "loss": 0.1761, "step": 14238 }, { "epoch": 2.0105902287489408, "grad_norm": 3.2150119772573413, "learning_rate": 5.182837312098141e-06, "loss": 0.1863, "step": 14239 }, { "epoch": 2.0107314317989267, "grad_norm": 2.674874134693218, "learning_rate": 5.1815014484212825e-06, "loss": 0.15, "step": 14240 }, { "epoch": 2.0108726348489125, "grad_norm": 2.730382139777421, "learning_rate": 5.1801656967242446e-06, "loss": 0.1965, "step": 14241 }, { "epoch": 2.0110138378988984, "grad_norm": 3.4104860281630245, "learning_rate": 5.178830057038079e-06, "loss": 0.2252, "step": 14242 }, { "epoch": 2.0111550409488843, "grad_norm": 2.8803258837059036, "learning_rate": 5.177494529393824e-06, "loss": 0.1883, "step": 14243 }, { "epoch": 2.01129624399887, "grad_norm": 3.8409567415236, "learning_rate": 5.1761591138225096e-06, "loss": 0.164, "step": 14244 }, { "epoch": 2.011437447048856, "grad_norm": 2.963409400142365, "learning_rate": 5.174823810355174e-06, "loss": 0.1798, "step": 14245 }, { "epoch": 2.011578650098842, "grad_norm": 3.9543857642348845, "learning_rate": 5.1734886190228496e-06, "loss": 0.2037, "step": 14246 }, { "epoch": 2.011719853148828, "grad_norm": 2.60517873264044, "learning_rate": 5.1721535398565616e-06, "loss": 0.1424, "step": 14247 }, { "epoch": 2.0118610561988137, "grad_norm": 6.411939889063179, "learning_rate": 5.170818572887344e-06, "loss": 0.1927, "step": 14248 }, { "epoch": 2.0120022592487996, "grad_norm": 3.2834816983888095, "learning_rate": 5.169483718146216e-06, "loss": 0.1621, "step": 14249 }, { "epoch": 2.0121434622987855, "grad_norm": 3.5214453367225875, "learning_rate": 5.168148975664203e-06, "loss": 0.1794, "step": 14250 }, { "epoch": 2.0122846653487714, "grad_norm": 3.499273405943138, "learning_rate": 5.166814345472322e-06, "loss": 0.2308, "step": 14251 }, { "epoch": 2.0124258683987573, "grad_norm": 2.922330510282529, "learning_rate": 5.165479827601583e-06, "loss": 0.1481, "step": 14252 }, { "epoch": 2.012567071448743, "grad_norm": 3.903078103500291, "learning_rate": 5.164145422083007e-06, "loss": 0.2282, "step": 14253 }, { "epoch": 2.012708274498729, "grad_norm": 2.5586107285867854, "learning_rate": 5.1628111289476025e-06, "loss": 0.1487, "step": 14254 }, { "epoch": 2.012849477548715, "grad_norm": 2.4014701856486385, "learning_rate": 5.161476948226381e-06, "loss": 0.157, "step": 14255 }, { "epoch": 2.012990680598701, "grad_norm": 3.03918997455723, "learning_rate": 5.160142879950343e-06, "loss": 0.185, "step": 14256 }, { "epoch": 2.0131318836486867, "grad_norm": 3.93537986322291, "learning_rate": 5.158808924150496e-06, "loss": 0.2293, "step": 14257 }, { "epoch": 2.0132730866986726, "grad_norm": 4.104975124740706, "learning_rate": 5.157475080857838e-06, "loss": 0.1952, "step": 14258 }, { "epoch": 2.0134142897486584, "grad_norm": 3.0242805165582864, "learning_rate": 5.156141350103364e-06, "loss": 0.1816, "step": 14259 }, { "epoch": 2.0135554927986443, "grad_norm": 2.7964190397144124, "learning_rate": 5.154807731918081e-06, "loss": 0.1841, "step": 14260 }, { "epoch": 2.01369669584863, "grad_norm": 2.8153029263490117, "learning_rate": 5.1534742263329705e-06, "loss": 0.1457, "step": 14261 }, { "epoch": 2.013837898898616, "grad_norm": 4.333958228662656, "learning_rate": 5.152140833379025e-06, "loss": 0.2535, "step": 14262 }, { "epoch": 2.013979101948602, "grad_norm": 2.8187272028955217, "learning_rate": 5.150807553087234e-06, "loss": 0.1274, "step": 14263 }, { "epoch": 2.014120304998588, "grad_norm": 3.7289097788501087, "learning_rate": 5.1494743854885755e-06, "loss": 0.1642, "step": 14264 }, { "epoch": 2.0142615080485737, "grad_norm": 2.9142387587933034, "learning_rate": 5.148141330614043e-06, "loss": 0.1559, "step": 14265 }, { "epoch": 2.0144027110985596, "grad_norm": 3.424627146168462, "learning_rate": 5.1468083884946104e-06, "loss": 0.2203, "step": 14266 }, { "epoch": 2.0145439141485455, "grad_norm": 2.8524654714765565, "learning_rate": 5.145475559161255e-06, "loss": 0.1395, "step": 14267 }, { "epoch": 2.0146851171985314, "grad_norm": 3.356551840326665, "learning_rate": 5.144142842644954e-06, "loss": 0.1992, "step": 14268 }, { "epoch": 2.0148263202485173, "grad_norm": 2.9127948550316467, "learning_rate": 5.14281023897667e-06, "loss": 0.1724, "step": 14269 }, { "epoch": 2.014967523298503, "grad_norm": 2.9915798834247758, "learning_rate": 5.14147774818738e-06, "loss": 0.1616, "step": 14270 }, { "epoch": 2.015108726348489, "grad_norm": 2.754564000908517, "learning_rate": 5.140145370308052e-06, "loss": 0.1849, "step": 14271 }, { "epoch": 2.015249929398475, "grad_norm": 2.7499737192811278, "learning_rate": 5.138813105369645e-06, "loss": 0.1643, "step": 14272 }, { "epoch": 2.015391132448461, "grad_norm": 2.702267368635129, "learning_rate": 5.137480953403121e-06, "loss": 0.1241, "step": 14273 }, { "epoch": 2.0155323354984467, "grad_norm": 3.2157430062993133, "learning_rate": 5.136148914439441e-06, "loss": 0.2016, "step": 14274 }, { "epoch": 2.0156735385484326, "grad_norm": 3.7214830687940554, "learning_rate": 5.134816988509559e-06, "loss": 0.2341, "step": 14275 }, { "epoch": 2.0158147415984184, "grad_norm": 2.680550947567722, "learning_rate": 5.133485175644426e-06, "loss": 0.1556, "step": 14276 }, { "epoch": 2.0159559446484043, "grad_norm": 3.493604159202271, "learning_rate": 5.132153475875003e-06, "loss": 0.1795, "step": 14277 }, { "epoch": 2.01609714769839, "grad_norm": 3.5515040373096523, "learning_rate": 5.130821889232228e-06, "loss": 0.2041, "step": 14278 }, { "epoch": 2.016238350748376, "grad_norm": 3.3819112021125464, "learning_rate": 5.1294904157470494e-06, "loss": 0.1543, "step": 14279 }, { "epoch": 2.016379553798362, "grad_norm": 2.9024671571010217, "learning_rate": 5.1281590554504095e-06, "loss": 0.1462, "step": 14280 }, { "epoch": 2.016520756848348, "grad_norm": 2.7384774592747374, "learning_rate": 5.126827808373245e-06, "loss": 0.1393, "step": 14281 }, { "epoch": 2.0166619598983337, "grad_norm": 2.780977016349886, "learning_rate": 5.125496674546502e-06, "loss": 0.1818, "step": 14282 }, { "epoch": 2.0168031629483196, "grad_norm": 3.0958753675088997, "learning_rate": 5.124165654001111e-06, "loss": 0.1723, "step": 14283 }, { "epoch": 2.0169443659983055, "grad_norm": 3.0788198776394315, "learning_rate": 5.122834746768004e-06, "loss": 0.2036, "step": 14284 }, { "epoch": 2.0170855690482914, "grad_norm": 2.7302967453617564, "learning_rate": 5.1215039528781165e-06, "loss": 0.1617, "step": 14285 }, { "epoch": 2.0172267720982773, "grad_norm": 2.9371680412750236, "learning_rate": 5.120173272362361e-06, "loss": 0.158, "step": 14286 }, { "epoch": 2.017367975148263, "grad_norm": 2.1490827787917004, "learning_rate": 5.118842705251677e-06, "loss": 0.1217, "step": 14287 }, { "epoch": 2.017509178198249, "grad_norm": 2.535003464375976, "learning_rate": 5.117512251576978e-06, "loss": 0.1514, "step": 14288 }, { "epoch": 2.017650381248235, "grad_norm": 2.7603601249819008, "learning_rate": 5.1161819113691865e-06, "loss": 0.2034, "step": 14289 }, { "epoch": 2.017791584298221, "grad_norm": 2.8555176317647097, "learning_rate": 5.114851684659219e-06, "loss": 0.1609, "step": 14290 }, { "epoch": 2.0179327873482067, "grad_norm": 3.2143877287287426, "learning_rate": 5.113521571477988e-06, "loss": 0.1343, "step": 14291 }, { "epoch": 2.0180739903981926, "grad_norm": 3.4625203700038596, "learning_rate": 5.112191571856406e-06, "loss": 0.1658, "step": 14292 }, { "epoch": 2.0182151934481785, "grad_norm": 3.026944741739093, "learning_rate": 5.110861685825377e-06, "loss": 0.2089, "step": 14293 }, { "epoch": 2.0183563964981643, "grad_norm": 2.158522000218707, "learning_rate": 5.109531913415819e-06, "loss": 0.1621, "step": 14294 }, { "epoch": 2.0184975995481502, "grad_norm": 2.6541316140519777, "learning_rate": 5.108202254658623e-06, "loss": 0.1305, "step": 14295 }, { "epoch": 2.018638802598136, "grad_norm": 2.9064285720761682, "learning_rate": 5.1068727095846935e-06, "loss": 0.1067, "step": 14296 }, { "epoch": 2.018780005648122, "grad_norm": 3.495979050414805, "learning_rate": 5.105543278224929e-06, "loss": 0.2024, "step": 14297 }, { "epoch": 2.018921208698108, "grad_norm": 3.284494536045945, "learning_rate": 5.104213960610223e-06, "loss": 0.1805, "step": 14298 }, { "epoch": 2.0190624117480938, "grad_norm": 3.2853795213068184, "learning_rate": 5.102884756771471e-06, "loss": 0.1939, "step": 14299 }, { "epoch": 2.0192036147980796, "grad_norm": 3.702290032946965, "learning_rate": 5.101555666739563e-06, "loss": 0.2033, "step": 14300 }, { "epoch": 2.0193448178480655, "grad_norm": 3.445254241275055, "learning_rate": 5.100226690545389e-06, "loss": 0.1478, "step": 14301 }, { "epoch": 2.0194860208980514, "grad_norm": 3.216175143802346, "learning_rate": 5.098897828219831e-06, "loss": 0.208, "step": 14302 }, { "epoch": 2.0196272239480373, "grad_norm": 3.4322661040218607, "learning_rate": 5.097569079793765e-06, "loss": 0.2012, "step": 14303 }, { "epoch": 2.019768426998023, "grad_norm": 3.287857603936467, "learning_rate": 5.09624044529808e-06, "loss": 0.189, "step": 14304 }, { "epoch": 2.019909630048009, "grad_norm": 3.962516942484197, "learning_rate": 5.094911924763649e-06, "loss": 0.1865, "step": 14305 }, { "epoch": 2.020050833097995, "grad_norm": 3.0687525123911725, "learning_rate": 5.093583518221347e-06, "loss": 0.1267, "step": 14306 }, { "epoch": 2.020192036147981, "grad_norm": 2.69741910815603, "learning_rate": 5.092255225702044e-06, "loss": 0.1479, "step": 14307 }, { "epoch": 2.0203332391979667, "grad_norm": 2.874969017459384, "learning_rate": 5.090927047236611e-06, "loss": 0.1617, "step": 14308 }, { "epoch": 2.0204744422479526, "grad_norm": 3.1260285356173965, "learning_rate": 5.089598982855913e-06, "loss": 0.162, "step": 14309 }, { "epoch": 2.0206156452979385, "grad_norm": 3.4280457272706006, "learning_rate": 5.0882710325908106e-06, "loss": 0.1893, "step": 14310 }, { "epoch": 2.0207568483479244, "grad_norm": 2.8575679563518412, "learning_rate": 5.086943196472175e-06, "loss": 0.1078, "step": 14311 }, { "epoch": 2.0208980513979102, "grad_norm": 2.837968988277867, "learning_rate": 5.085615474530854e-06, "loss": 0.1511, "step": 14312 }, { "epoch": 2.021039254447896, "grad_norm": 2.5132387422991886, "learning_rate": 5.084287866797708e-06, "loss": 0.116, "step": 14313 }, { "epoch": 2.021180457497882, "grad_norm": 2.997667691888442, "learning_rate": 5.082960373303588e-06, "loss": 0.1455, "step": 14314 }, { "epoch": 2.021321660547868, "grad_norm": 4.083504383571186, "learning_rate": 5.081632994079342e-06, "loss": 0.2181, "step": 14315 }, { "epoch": 2.0214628635978538, "grad_norm": 3.160523080242641, "learning_rate": 5.0803057291558255e-06, "loss": 0.1664, "step": 14316 }, { "epoch": 2.0216040666478396, "grad_norm": 3.151836517720333, "learning_rate": 5.078978578563878e-06, "loss": 0.1534, "step": 14317 }, { "epoch": 2.0217452696978255, "grad_norm": 2.8664655774830643, "learning_rate": 5.0776515423343445e-06, "loss": 0.1481, "step": 14318 }, { "epoch": 2.0218864727478114, "grad_norm": 3.377888578270953, "learning_rate": 5.076324620498066e-06, "loss": 0.227, "step": 14319 }, { "epoch": 2.0220276757977973, "grad_norm": 3.1228143672231394, "learning_rate": 5.074997813085873e-06, "loss": 0.1624, "step": 14320 }, { "epoch": 2.022168878847783, "grad_norm": 3.2807220001996455, "learning_rate": 5.073671120128601e-06, "loss": 0.196, "step": 14321 }, { "epoch": 2.022310081897769, "grad_norm": 3.1692646215771787, "learning_rate": 5.072344541657089e-06, "loss": 0.155, "step": 14322 }, { "epoch": 2.022451284947755, "grad_norm": 3.0084076323219295, "learning_rate": 5.071018077702161e-06, "loss": 0.1708, "step": 14323 }, { "epoch": 2.022592487997741, "grad_norm": 2.7475542580607564, "learning_rate": 5.069691728294643e-06, "loss": 0.175, "step": 14324 }, { "epoch": 2.0227336910477267, "grad_norm": 3.391186321616539, "learning_rate": 5.068365493465361e-06, "loss": 0.1412, "step": 14325 }, { "epoch": 2.0228748940977126, "grad_norm": 3.651232135350546, "learning_rate": 5.067039373245136e-06, "loss": 0.2176, "step": 14326 }, { "epoch": 2.0230160971476985, "grad_norm": 3.2651476850777144, "learning_rate": 5.065713367664781e-06, "loss": 0.1621, "step": 14327 }, { "epoch": 2.0231573001976844, "grad_norm": 2.2954667081660163, "learning_rate": 5.064387476755124e-06, "loss": 0.1325, "step": 14328 }, { "epoch": 2.0232985032476702, "grad_norm": 2.8573364849337124, "learning_rate": 5.0630617005469676e-06, "loss": 0.1454, "step": 14329 }, { "epoch": 2.023439706297656, "grad_norm": 3.1370163645774825, "learning_rate": 5.061736039071124e-06, "loss": 0.162, "step": 14330 }, { "epoch": 2.023580909347642, "grad_norm": 2.9252788074065603, "learning_rate": 5.060410492358402e-06, "loss": 0.1662, "step": 14331 }, { "epoch": 2.023722112397628, "grad_norm": 4.104286827489718, "learning_rate": 5.059085060439608e-06, "loss": 0.2688, "step": 14332 }, { "epoch": 2.0238633154476138, "grad_norm": 2.68288795901017, "learning_rate": 5.057759743345538e-06, "loss": 0.1552, "step": 14333 }, { "epoch": 2.0240045184975997, "grad_norm": 2.749144272339022, "learning_rate": 5.0564345411070025e-06, "loss": 0.1302, "step": 14334 }, { "epoch": 2.0241457215475855, "grad_norm": 2.9055213748664905, "learning_rate": 5.0551094537547915e-06, "loss": 0.1231, "step": 14335 }, { "epoch": 2.0242869245975714, "grad_norm": 2.6650302774262244, "learning_rate": 5.053784481319708e-06, "loss": 0.1583, "step": 14336 }, { "epoch": 2.0244281276475573, "grad_norm": 2.8705612937789855, "learning_rate": 5.052459623832531e-06, "loss": 0.1494, "step": 14337 }, { "epoch": 2.024569330697543, "grad_norm": 2.377704314149694, "learning_rate": 5.0511348813240515e-06, "loss": 0.1405, "step": 14338 }, { "epoch": 2.024710533747529, "grad_norm": 3.3624783597169388, "learning_rate": 5.0498102538250645e-06, "loss": 0.1776, "step": 14339 }, { "epoch": 2.024851736797515, "grad_norm": 3.010236408065653, "learning_rate": 5.048485741366351e-06, "loss": 0.1426, "step": 14340 }, { "epoch": 2.024992939847501, "grad_norm": 3.7898106021779268, "learning_rate": 5.047161343978688e-06, "loss": 0.1675, "step": 14341 }, { "epoch": 2.0251341428974867, "grad_norm": 3.1728100636799494, "learning_rate": 5.045837061692857e-06, "loss": 0.1764, "step": 14342 }, { "epoch": 2.0252753459474726, "grad_norm": 2.7611004852732357, "learning_rate": 5.044512894539635e-06, "loss": 0.1339, "step": 14343 }, { "epoch": 2.0254165489974585, "grad_norm": 3.5459142016380465, "learning_rate": 5.043188842549789e-06, "loss": 0.2682, "step": 14344 }, { "epoch": 2.0255577520474444, "grad_norm": 3.103278357585595, "learning_rate": 5.041864905754099e-06, "loss": 0.1832, "step": 14345 }, { "epoch": 2.0256989550974303, "grad_norm": 3.317205383651116, "learning_rate": 5.040541084183326e-06, "loss": 0.2119, "step": 14346 }, { "epoch": 2.025840158147416, "grad_norm": 2.9364367802322304, "learning_rate": 5.039217377868235e-06, "loss": 0.1709, "step": 14347 }, { "epoch": 2.025981361197402, "grad_norm": 3.9900746615327156, "learning_rate": 5.037893786839589e-06, "loss": 0.2049, "step": 14348 }, { "epoch": 2.026122564247388, "grad_norm": 2.913108349160879, "learning_rate": 5.036570311128151e-06, "loss": 0.2026, "step": 14349 }, { "epoch": 2.026263767297374, "grad_norm": 3.1605111393399814, "learning_rate": 5.03524695076467e-06, "loss": 0.1724, "step": 14350 }, { "epoch": 2.0264049703473597, "grad_norm": 2.747580571003615, "learning_rate": 5.033923705779908e-06, "loss": 0.1561, "step": 14351 }, { "epoch": 2.0265461733973456, "grad_norm": 2.858744698890405, "learning_rate": 5.032600576204616e-06, "loss": 0.1656, "step": 14352 }, { "epoch": 2.0266873764473314, "grad_norm": 3.0464691514061246, "learning_rate": 5.031277562069545e-06, "loss": 0.1449, "step": 14353 }, { "epoch": 2.0268285794973173, "grad_norm": 2.617906775579497, "learning_rate": 5.0299546634054345e-06, "loss": 0.1482, "step": 14354 }, { "epoch": 2.026969782547303, "grad_norm": 2.893959955776391, "learning_rate": 5.0286318802430265e-06, "loss": 0.1472, "step": 14355 }, { "epoch": 2.027110985597289, "grad_norm": 2.9869522340638763, "learning_rate": 5.0273092126130715e-06, "loss": 0.1418, "step": 14356 }, { "epoch": 2.027252188647275, "grad_norm": 3.7185787171677105, "learning_rate": 5.0259866605463025e-06, "loss": 0.148, "step": 14357 }, { "epoch": 2.027393391697261, "grad_norm": 3.6964771472901607, "learning_rate": 5.024664224073454e-06, "loss": 0.2241, "step": 14358 }, { "epoch": 2.0275345947472467, "grad_norm": 2.4112004860583665, "learning_rate": 5.023341903225263e-06, "loss": 0.1473, "step": 14359 }, { "epoch": 2.0276757977972326, "grad_norm": 2.6447613091692244, "learning_rate": 5.0220196980324545e-06, "loss": 0.1622, "step": 14360 }, { "epoch": 2.0278170008472185, "grad_norm": 3.3984022848298543, "learning_rate": 5.02069760852576e-06, "loss": 0.2059, "step": 14361 }, { "epoch": 2.0279582038972044, "grad_norm": 3.3626068429819407, "learning_rate": 5.0193756347359015e-06, "loss": 0.1935, "step": 14362 }, { "epoch": 2.0280994069471903, "grad_norm": 2.762416962983157, "learning_rate": 5.0180537766936026e-06, "loss": 0.1573, "step": 14363 }, { "epoch": 2.028240609997176, "grad_norm": 3.3537987722964937, "learning_rate": 5.016732034429581e-06, "loss": 0.1332, "step": 14364 }, { "epoch": 2.028381813047162, "grad_norm": 2.6345409502602406, "learning_rate": 5.015410407974557e-06, "loss": 0.146, "step": 14365 }, { "epoch": 2.028523016097148, "grad_norm": 2.957722803216369, "learning_rate": 5.014088897359242e-06, "loss": 0.1429, "step": 14366 }, { "epoch": 2.028664219147134, "grad_norm": 3.3282396203486995, "learning_rate": 5.012767502614344e-06, "loss": 0.1999, "step": 14367 }, { "epoch": 2.0288054221971192, "grad_norm": 3.1655430891975636, "learning_rate": 5.011446223770578e-06, "loss": 0.1631, "step": 14368 }, { "epoch": 2.028946625247105, "grad_norm": 4.190118466290238, "learning_rate": 5.010125060858648e-06, "loss": 0.1819, "step": 14369 }, { "epoch": 2.029087828297091, "grad_norm": 2.8680342102016025, "learning_rate": 5.0088040139092605e-06, "loss": 0.1645, "step": 14370 }, { "epoch": 2.029229031347077, "grad_norm": 4.414539800071081, "learning_rate": 5.007483082953109e-06, "loss": 0.1902, "step": 14371 }, { "epoch": 2.0293702343970628, "grad_norm": 2.877790409963356, "learning_rate": 5.006162268020891e-06, "loss": 0.138, "step": 14372 }, { "epoch": 2.0295114374470486, "grad_norm": 3.0804430241035554, "learning_rate": 5.004841569143308e-06, "loss": 0.155, "step": 14373 }, { "epoch": 2.0296526404970345, "grad_norm": 3.489980685524447, "learning_rate": 5.0035209863510496e-06, "loss": 0.1621, "step": 14374 }, { "epoch": 2.0297938435470204, "grad_norm": 2.8578953863908914, "learning_rate": 5.002200519674807e-06, "loss": 0.1307, "step": 14375 }, { "epoch": 2.0299350465970063, "grad_norm": 2.891395984033541, "learning_rate": 5.000880169145264e-06, "loss": 0.155, "step": 14376 }, { "epoch": 2.030076249646992, "grad_norm": 2.8140909444963302, "learning_rate": 4.9995599347931075e-06, "loss": 0.1893, "step": 14377 }, { "epoch": 2.030217452696978, "grad_norm": 3.47376664667973, "learning_rate": 4.99823981664902e-06, "loss": 0.1837, "step": 14378 }, { "epoch": 2.030358655746964, "grad_norm": 2.2789404831276023, "learning_rate": 4.9969198147436785e-06, "loss": 0.1118, "step": 14379 }, { "epoch": 2.03049985879695, "grad_norm": 3.5794591179775925, "learning_rate": 4.995599929107758e-06, "loss": 0.1727, "step": 14380 }, { "epoch": 2.0306410618469357, "grad_norm": 2.9646859197838196, "learning_rate": 4.994280159771936e-06, "loss": 0.1829, "step": 14381 }, { "epoch": 2.0307822648969216, "grad_norm": 3.4813244848788467, "learning_rate": 4.992960506766881e-06, "loss": 0.1485, "step": 14382 }, { "epoch": 2.0309234679469075, "grad_norm": 2.610096709229147, "learning_rate": 4.99164097012326e-06, "loss": 0.1793, "step": 14383 }, { "epoch": 2.0310646709968934, "grad_norm": 3.0311541027224624, "learning_rate": 4.990321549871738e-06, "loss": 0.1574, "step": 14384 }, { "epoch": 2.0312058740468792, "grad_norm": 3.102962097892004, "learning_rate": 4.989002246042982e-06, "loss": 0.1627, "step": 14385 }, { "epoch": 2.031347077096865, "grad_norm": 2.543570252241666, "learning_rate": 4.987683058667651e-06, "loss": 0.1529, "step": 14386 }, { "epoch": 2.031488280146851, "grad_norm": 2.6555729673879127, "learning_rate": 4.986363987776403e-06, "loss": 0.1465, "step": 14387 }, { "epoch": 2.031629483196837, "grad_norm": 3.3689759091298104, "learning_rate": 4.985045033399889e-06, "loss": 0.1545, "step": 14388 }, { "epoch": 2.0317706862468228, "grad_norm": 3.028162363237027, "learning_rate": 4.983726195568758e-06, "loss": 0.1785, "step": 14389 }, { "epoch": 2.0319118892968087, "grad_norm": 3.025675420290699, "learning_rate": 4.982407474313667e-06, "loss": 0.1866, "step": 14390 }, { "epoch": 2.0320530923467945, "grad_norm": 3.2872120834588427, "learning_rate": 4.98108886966526e-06, "loss": 0.1889, "step": 14391 }, { "epoch": 2.0321942953967804, "grad_norm": 3.583786146529471, "learning_rate": 4.979770381654181e-06, "loss": 0.2054, "step": 14392 }, { "epoch": 2.0323354984467663, "grad_norm": 3.51715389649081, "learning_rate": 4.978452010311069e-06, "loss": 0.1846, "step": 14393 }, { "epoch": 2.032476701496752, "grad_norm": 3.1202821920642863, "learning_rate": 4.977133755666564e-06, "loss": 0.1448, "step": 14394 }, { "epoch": 2.032617904546738, "grad_norm": 3.2100223492400097, "learning_rate": 4.975815617751301e-06, "loss": 0.137, "step": 14395 }, { "epoch": 2.032759107596724, "grad_norm": 3.3271857276497965, "learning_rate": 4.9744975965959145e-06, "loss": 0.1927, "step": 14396 }, { "epoch": 2.03290031064671, "grad_norm": 3.017136942033875, "learning_rate": 4.973179692231033e-06, "loss": 0.1585, "step": 14397 }, { "epoch": 2.0330415136966957, "grad_norm": 3.5420431462642243, "learning_rate": 4.971861904687283e-06, "loss": 0.1776, "step": 14398 }, { "epoch": 2.0331827167466816, "grad_norm": 2.366027750351628, "learning_rate": 4.9705442339952924e-06, "loss": 0.119, "step": 14399 }, { "epoch": 2.0333239197966675, "grad_norm": 3.016507923140273, "learning_rate": 4.9692266801856815e-06, "loss": 0.16, "step": 14400 }, { "epoch": 2.0334651228466534, "grad_norm": 2.379249937292288, "learning_rate": 4.967909243289066e-06, "loss": 0.118, "step": 14401 }, { "epoch": 2.0336063258966393, "grad_norm": 2.765877272180813, "learning_rate": 4.96659192333607e-06, "loss": 0.1206, "step": 14402 }, { "epoch": 2.033747528946625, "grad_norm": 3.118654429216308, "learning_rate": 4.965274720357303e-06, "loss": 0.1666, "step": 14403 }, { "epoch": 2.033888731996611, "grad_norm": 3.1721014618923173, "learning_rate": 4.963957634383384e-06, "loss": 0.174, "step": 14404 }, { "epoch": 2.034029935046597, "grad_norm": 3.396584819963182, "learning_rate": 4.9626406654449085e-06, "loss": 0.1862, "step": 14405 }, { "epoch": 2.034171138096583, "grad_norm": 3.2489918842077277, "learning_rate": 4.961323813572485e-06, "loss": 0.1685, "step": 14406 }, { "epoch": 2.0343123411465687, "grad_norm": 3.6677805330764848, "learning_rate": 4.960007078796725e-06, "loss": 0.185, "step": 14407 }, { "epoch": 2.0344535441965546, "grad_norm": 3.0668642501592838, "learning_rate": 4.958690461148222e-06, "loss": 0.1717, "step": 14408 }, { "epoch": 2.0345947472465404, "grad_norm": 2.9950345301949715, "learning_rate": 4.957373960657577e-06, "loss": 0.145, "step": 14409 }, { "epoch": 2.0347359502965263, "grad_norm": 3.021244840080818, "learning_rate": 4.9560575773553824e-06, "loss": 0.1659, "step": 14410 }, { "epoch": 2.034877153346512, "grad_norm": 3.3261812652438625, "learning_rate": 4.954741311272232e-06, "loss": 0.1926, "step": 14411 }, { "epoch": 2.035018356396498, "grad_norm": 3.6332731848057582, "learning_rate": 4.953425162438714e-06, "loss": 0.2188, "step": 14412 }, { "epoch": 2.035159559446484, "grad_norm": 3.463149589561639, "learning_rate": 4.9521091308854165e-06, "loss": 0.1672, "step": 14413 }, { "epoch": 2.03530076249647, "grad_norm": 3.639546616390054, "learning_rate": 4.950793216642923e-06, "loss": 0.2361, "step": 14414 }, { "epoch": 2.0354419655464557, "grad_norm": 2.70479450723535, "learning_rate": 4.949477419741814e-06, "loss": 0.1642, "step": 14415 }, { "epoch": 2.0355831685964416, "grad_norm": 3.19442161463194, "learning_rate": 4.948161740212669e-06, "loss": 0.1441, "step": 14416 }, { "epoch": 2.0357243716464275, "grad_norm": 3.14411243038963, "learning_rate": 4.946846178086063e-06, "loss": 0.167, "step": 14417 }, { "epoch": 2.0358655746964134, "grad_norm": 2.9209549904204573, "learning_rate": 4.945530733392566e-06, "loss": 0.1607, "step": 14418 }, { "epoch": 2.0360067777463993, "grad_norm": 3.25603671123706, "learning_rate": 4.944215406162756e-06, "loss": 0.1771, "step": 14419 }, { "epoch": 2.036147980796385, "grad_norm": 3.1952927664951236, "learning_rate": 4.942900196427195e-06, "loss": 0.1193, "step": 14420 }, { "epoch": 2.036289183846371, "grad_norm": 3.3253950030301147, "learning_rate": 4.941585104216455e-06, "loss": 0.2051, "step": 14421 }, { "epoch": 2.036430386896357, "grad_norm": 3.161273545556273, "learning_rate": 4.940270129561088e-06, "loss": 0.1643, "step": 14422 }, { "epoch": 2.036571589946343, "grad_norm": 3.0794686033906316, "learning_rate": 4.938955272491658e-06, "loss": 0.1818, "step": 14423 }, { "epoch": 2.0367127929963287, "grad_norm": 3.343477152971616, "learning_rate": 4.937640533038718e-06, "loss": 0.1523, "step": 14424 }, { "epoch": 2.0368539960463146, "grad_norm": 2.72045550067017, "learning_rate": 4.93632591123283e-06, "loss": 0.1435, "step": 14425 }, { "epoch": 2.0369951990963004, "grad_norm": 3.516167575412925, "learning_rate": 4.9350114071045405e-06, "loss": 0.1869, "step": 14426 }, { "epoch": 2.0371364021462863, "grad_norm": 2.9425003429891023, "learning_rate": 4.933697020684399e-06, "loss": 0.1622, "step": 14427 }, { "epoch": 2.037277605196272, "grad_norm": 3.1527418176714113, "learning_rate": 4.932382752002951e-06, "loss": 0.2026, "step": 14428 }, { "epoch": 2.037418808246258, "grad_norm": 3.060786390341475, "learning_rate": 4.9310686010907384e-06, "loss": 0.1693, "step": 14429 }, { "epoch": 2.037560011296244, "grad_norm": 2.812296176985327, "learning_rate": 4.929754567978303e-06, "loss": 0.1916, "step": 14430 }, { "epoch": 2.03770121434623, "grad_norm": 2.798882413281499, "learning_rate": 4.928440652696181e-06, "loss": 0.1369, "step": 14431 }, { "epoch": 2.0378424173962157, "grad_norm": 2.7242914221321985, "learning_rate": 4.92712685527491e-06, "loss": 0.1487, "step": 14432 }, { "epoch": 2.0379836204462016, "grad_norm": 2.5991060870490523, "learning_rate": 4.9258131757450175e-06, "loss": 0.1513, "step": 14433 }, { "epoch": 2.0381248234961875, "grad_norm": 3.6661838953118018, "learning_rate": 4.924499614137037e-06, "loss": 0.2036, "step": 14434 }, { "epoch": 2.0382660265461734, "grad_norm": 3.2058879551837363, "learning_rate": 4.9231861704814935e-06, "loss": 0.1462, "step": 14435 }, { "epoch": 2.0384072295961593, "grad_norm": 3.213894949275474, "learning_rate": 4.921872844808906e-06, "loss": 0.1514, "step": 14436 }, { "epoch": 2.038548432646145, "grad_norm": 2.9134086050215724, "learning_rate": 4.920559637149805e-06, "loss": 0.1506, "step": 14437 }, { "epoch": 2.038689635696131, "grad_norm": 2.74923612539152, "learning_rate": 4.919246547534709e-06, "loss": 0.1517, "step": 14438 }, { "epoch": 2.038830838746117, "grad_norm": 2.682071353550135, "learning_rate": 4.917933575994124e-06, "loss": 0.1278, "step": 14439 }, { "epoch": 2.038972041796103, "grad_norm": 3.2705496410561716, "learning_rate": 4.916620722558568e-06, "loss": 0.1566, "step": 14440 }, { "epoch": 2.0391132448460887, "grad_norm": 2.8690052286964085, "learning_rate": 4.915307987258547e-06, "loss": 0.1389, "step": 14441 }, { "epoch": 2.0392544478960746, "grad_norm": 3.308230467778446, "learning_rate": 4.913995370124578e-06, "loss": 0.133, "step": 14442 }, { "epoch": 2.0393956509460605, "grad_norm": 3.066123818748472, "learning_rate": 4.9126828711871585e-06, "loss": 0.1833, "step": 14443 }, { "epoch": 2.0395368539960463, "grad_norm": 2.4789147917615573, "learning_rate": 4.911370490476792e-06, "loss": 0.1342, "step": 14444 }, { "epoch": 2.0396780570460322, "grad_norm": 3.9985157494152013, "learning_rate": 4.91005822802398e-06, "loss": 0.1754, "step": 14445 }, { "epoch": 2.039819260096018, "grad_norm": 4.196855961328, "learning_rate": 4.908746083859214e-06, "loss": 0.1951, "step": 14446 }, { "epoch": 2.039960463146004, "grad_norm": 3.315833152185437, "learning_rate": 4.907434058012991e-06, "loss": 0.1571, "step": 14447 }, { "epoch": 2.04010166619599, "grad_norm": 3.797592149307698, "learning_rate": 4.906122150515801e-06, "loss": 0.1729, "step": 14448 }, { "epoch": 2.0402428692459758, "grad_norm": 2.6630312289297073, "learning_rate": 4.904810361398132e-06, "loss": 0.1008, "step": 14449 }, { "epoch": 2.0403840722959616, "grad_norm": 4.827326875472237, "learning_rate": 4.9034986906904715e-06, "loss": 0.2579, "step": 14450 }, { "epoch": 2.0405252753459475, "grad_norm": 3.1732992206046378, "learning_rate": 4.902187138423299e-06, "loss": 0.1344, "step": 14451 }, { "epoch": 2.0406664783959334, "grad_norm": 3.564656717310189, "learning_rate": 4.900875704627096e-06, "loss": 0.2047, "step": 14452 }, { "epoch": 2.0408076814459193, "grad_norm": 3.958722176129888, "learning_rate": 4.899564389332337e-06, "loss": 0.1985, "step": 14453 }, { "epoch": 2.040948884495905, "grad_norm": 2.967947643032591, "learning_rate": 4.898253192569501e-06, "loss": 0.1525, "step": 14454 }, { "epoch": 2.041090087545891, "grad_norm": 3.8814231634163012, "learning_rate": 4.896942114369061e-06, "loss": 0.1622, "step": 14455 }, { "epoch": 2.041231290595877, "grad_norm": 4.243009724057618, "learning_rate": 4.8956311547614796e-06, "loss": 0.2231, "step": 14456 }, { "epoch": 2.041372493645863, "grad_norm": 3.973926363340206, "learning_rate": 4.894320313777226e-06, "loss": 0.1891, "step": 14457 }, { "epoch": 2.0415136966958487, "grad_norm": 2.69751464755363, "learning_rate": 4.893009591446759e-06, "loss": 0.1778, "step": 14458 }, { "epoch": 2.0416548997458346, "grad_norm": 3.4475903068847424, "learning_rate": 4.891698987800547e-06, "loss": 0.1709, "step": 14459 }, { "epoch": 2.0417961027958205, "grad_norm": 2.7951824698686045, "learning_rate": 4.8903885028690454e-06, "loss": 0.1417, "step": 14460 }, { "epoch": 2.0419373058458063, "grad_norm": 3.257452885225593, "learning_rate": 4.8890781366827075e-06, "loss": 0.1303, "step": 14461 }, { "epoch": 2.0420785088957922, "grad_norm": 2.8495338617238306, "learning_rate": 4.887767889271987e-06, "loss": 0.1724, "step": 14462 }, { "epoch": 2.042219711945778, "grad_norm": 2.3983026096438875, "learning_rate": 4.886457760667332e-06, "loss": 0.1098, "step": 14463 }, { "epoch": 2.042360914995764, "grad_norm": 3.2747010421517584, "learning_rate": 4.885147750899192e-06, "loss": 0.1641, "step": 14464 }, { "epoch": 2.04250211804575, "grad_norm": 2.7885144302966127, "learning_rate": 4.883837859998009e-06, "loss": 0.1583, "step": 14465 }, { "epoch": 2.0426433210957358, "grad_norm": 2.748064948528609, "learning_rate": 4.882528087994223e-06, "loss": 0.1552, "step": 14466 }, { "epoch": 2.0427845241457216, "grad_norm": 2.8641929437321525, "learning_rate": 4.881218434918276e-06, "loss": 0.1634, "step": 14467 }, { "epoch": 2.0429257271957075, "grad_norm": 3.018267799468174, "learning_rate": 4.8799089008006005e-06, "loss": 0.1519, "step": 14468 }, { "epoch": 2.0430669302456934, "grad_norm": 3.4160592729698553, "learning_rate": 4.878599485671631e-06, "loss": 0.1962, "step": 14469 }, { "epoch": 2.0432081332956793, "grad_norm": 3.789761582749137, "learning_rate": 4.877290189561795e-06, "loss": 0.2248, "step": 14470 }, { "epoch": 2.043349336345665, "grad_norm": 3.2795590658121605, "learning_rate": 4.875981012501526e-06, "loss": 0.1741, "step": 14471 }, { "epoch": 2.043490539395651, "grad_norm": 3.0509738309865746, "learning_rate": 4.874671954521249e-06, "loss": 0.1545, "step": 14472 }, { "epoch": 2.043631742445637, "grad_norm": 2.4125005815503746, "learning_rate": 4.873363015651379e-06, "loss": 0.1453, "step": 14473 }, { "epoch": 2.043772945495623, "grad_norm": 3.223822307896231, "learning_rate": 4.872054195922338e-06, "loss": 0.1679, "step": 14474 }, { "epoch": 2.0439141485456087, "grad_norm": 3.119359485457371, "learning_rate": 4.870745495364539e-06, "loss": 0.171, "step": 14475 }, { "epoch": 2.0440553515955946, "grad_norm": 2.9642883978824073, "learning_rate": 4.869436914008403e-06, "loss": 0.1511, "step": 14476 }, { "epoch": 2.0441965546455805, "grad_norm": 3.8382683873887493, "learning_rate": 4.868128451884339e-06, "loss": 0.2111, "step": 14477 }, { "epoch": 2.0443377576955664, "grad_norm": 3.701817816728572, "learning_rate": 4.866820109022752e-06, "loss": 0.212, "step": 14478 }, { "epoch": 2.0444789607455522, "grad_norm": 3.2401535968562927, "learning_rate": 4.865511885454049e-06, "loss": 0.1752, "step": 14479 }, { "epoch": 2.044620163795538, "grad_norm": 2.8456347826651616, "learning_rate": 4.864203781208632e-06, "loss": 0.1486, "step": 14480 }, { "epoch": 2.044761366845524, "grad_norm": 2.964767280176492, "learning_rate": 4.862895796316902e-06, "loss": 0.1455, "step": 14481 }, { "epoch": 2.04490256989551, "grad_norm": 3.1783440330617503, "learning_rate": 4.861587930809255e-06, "loss": 0.1607, "step": 14482 }, { "epoch": 2.0450437729454958, "grad_norm": 2.805464062517001, "learning_rate": 4.860280184716085e-06, "loss": 0.1538, "step": 14483 }, { "epoch": 2.0451849759954817, "grad_norm": 3.0363029439724802, "learning_rate": 4.858972558067784e-06, "loss": 0.1252, "step": 14484 }, { "epoch": 2.0453261790454675, "grad_norm": 2.7912879830353474, "learning_rate": 4.8576650508947405e-06, "loss": 0.1633, "step": 14485 }, { "epoch": 2.0454673820954534, "grad_norm": 4.65681582840178, "learning_rate": 4.856357663227341e-06, "loss": 0.2214, "step": 14486 }, { "epoch": 2.0456085851454393, "grad_norm": 2.967665760933173, "learning_rate": 4.855050395095964e-06, "loss": 0.1651, "step": 14487 }, { "epoch": 2.045749788195425, "grad_norm": 3.2960868267511456, "learning_rate": 4.8537432465309975e-06, "loss": 0.1425, "step": 14488 }, { "epoch": 2.045890991245411, "grad_norm": 4.540432646575895, "learning_rate": 4.852436217562819e-06, "loss": 0.164, "step": 14489 }, { "epoch": 2.046032194295397, "grad_norm": 2.872218498108694, "learning_rate": 4.851129308221796e-06, "loss": 0.1696, "step": 14490 }, { "epoch": 2.046173397345383, "grad_norm": 2.601509231900234, "learning_rate": 4.849822518538304e-06, "loss": 0.1349, "step": 14491 }, { "epoch": 2.0463146003953687, "grad_norm": 3.5493912120676474, "learning_rate": 4.848515848542709e-06, "loss": 0.1864, "step": 14492 }, { "epoch": 2.0464558034453546, "grad_norm": 3.160794284832748, "learning_rate": 4.847209298265385e-06, "loss": 0.173, "step": 14493 }, { "epoch": 2.0465970064953405, "grad_norm": 2.5793314112141807, "learning_rate": 4.845902867736692e-06, "loss": 0.1437, "step": 14494 }, { "epoch": 2.0467382095453264, "grad_norm": 3.0260085828590926, "learning_rate": 4.844596556986989e-06, "loss": 0.1919, "step": 14495 }, { "epoch": 2.0468794125953123, "grad_norm": 3.0410348182592455, "learning_rate": 4.843290366046637e-06, "loss": 0.1545, "step": 14496 }, { "epoch": 2.047020615645298, "grad_norm": 3.5868306716188534, "learning_rate": 4.84198429494599e-06, "loss": 0.1707, "step": 14497 }, { "epoch": 2.047161818695284, "grad_norm": 3.3010423588028885, "learning_rate": 4.840678343715399e-06, "loss": 0.2064, "step": 14498 }, { "epoch": 2.04730302174527, "grad_norm": 3.1544093565275917, "learning_rate": 4.839372512385215e-06, "loss": 0.1461, "step": 14499 }, { "epoch": 2.047444224795256, "grad_norm": 3.275244596971666, "learning_rate": 4.838066800985786e-06, "loss": 0.1738, "step": 14500 }, { "epoch": 2.0475854278452417, "grad_norm": 2.7475601438991446, "learning_rate": 4.836761209547456e-06, "loss": 0.146, "step": 14501 }, { "epoch": 2.0477266308952276, "grad_norm": 3.5534864873617855, "learning_rate": 4.8354557381005655e-06, "loss": 0.1868, "step": 14502 }, { "epoch": 2.0478678339452134, "grad_norm": 2.771343403983787, "learning_rate": 4.8341503866754525e-06, "loss": 0.1519, "step": 14503 }, { "epoch": 2.0480090369951993, "grad_norm": 3.3522315043418356, "learning_rate": 4.832845155302449e-06, "loss": 0.1824, "step": 14504 }, { "epoch": 2.048150240045185, "grad_norm": 3.1009461985662314, "learning_rate": 4.831540044011898e-06, "loss": 0.1614, "step": 14505 }, { "epoch": 2.0482914430951706, "grad_norm": 3.2946245665499587, "learning_rate": 4.830235052834127e-06, "loss": 0.1297, "step": 14506 }, { "epoch": 2.0484326461451565, "grad_norm": 2.934026589059701, "learning_rate": 4.828930181799457e-06, "loss": 0.1727, "step": 14507 }, { "epoch": 2.0485738491951424, "grad_norm": 2.6742657975111204, "learning_rate": 4.827625430938216e-06, "loss": 0.1401, "step": 14508 }, { "epoch": 2.0487150522451283, "grad_norm": 2.8155205381034096, "learning_rate": 4.826320800280724e-06, "loss": 0.1496, "step": 14509 }, { "epoch": 2.048856255295114, "grad_norm": 2.585024664146013, "learning_rate": 4.8250162898573046e-06, "loss": 0.1197, "step": 14510 }, { "epoch": 2.0489974583451, "grad_norm": 2.4630113574813115, "learning_rate": 4.823711899698272e-06, "loss": 0.1, "step": 14511 }, { "epoch": 2.049138661395086, "grad_norm": 3.145132487311336, "learning_rate": 4.822407629833941e-06, "loss": 0.1465, "step": 14512 }, { "epoch": 2.049279864445072, "grad_norm": 2.6480232836693816, "learning_rate": 4.821103480294619e-06, "loss": 0.1401, "step": 14513 }, { "epoch": 2.0494210674950577, "grad_norm": 2.8567173217373343, "learning_rate": 4.81979945111062e-06, "loss": 0.1594, "step": 14514 }, { "epoch": 2.0495622705450436, "grad_norm": 3.247257542962107, "learning_rate": 4.818495542312236e-06, "loss": 0.1687, "step": 14515 }, { "epoch": 2.0497034735950295, "grad_norm": 2.8576051767621986, "learning_rate": 4.8171917539297816e-06, "loss": 0.1157, "step": 14516 }, { "epoch": 2.0498446766450154, "grad_norm": 3.298349014100732, "learning_rate": 4.815888085993554e-06, "loss": 0.1646, "step": 14517 }, { "epoch": 2.0499858796950012, "grad_norm": 2.741543886876584, "learning_rate": 4.814584538533848e-06, "loss": 0.1472, "step": 14518 }, { "epoch": 2.050127082744987, "grad_norm": 2.6667480833985717, "learning_rate": 4.813281111580958e-06, "loss": 0.125, "step": 14519 }, { "epoch": 2.050268285794973, "grad_norm": 3.504940685230831, "learning_rate": 4.811977805165174e-06, "loss": 0.1372, "step": 14520 }, { "epoch": 2.050409488844959, "grad_norm": 3.116437641274064, "learning_rate": 4.810674619316782e-06, "loss": 0.1739, "step": 14521 }, { "epoch": 2.0505506918949448, "grad_norm": 3.2684391051589765, "learning_rate": 4.8093715540660745e-06, "loss": 0.1893, "step": 14522 }, { "epoch": 2.0506918949449306, "grad_norm": 4.225769454584112, "learning_rate": 4.808068609443333e-06, "loss": 0.1916, "step": 14523 }, { "epoch": 2.0508330979949165, "grad_norm": 2.852047397862396, "learning_rate": 4.806765785478833e-06, "loss": 0.174, "step": 14524 }, { "epoch": 2.0509743010449024, "grad_norm": 2.9341874008657234, "learning_rate": 4.805463082202852e-06, "loss": 0.1295, "step": 14525 }, { "epoch": 2.0511155040948883, "grad_norm": 3.164927506202152, "learning_rate": 4.804160499645667e-06, "loss": 0.1738, "step": 14526 }, { "epoch": 2.051256707144874, "grad_norm": 3.118051857326728, "learning_rate": 4.802858037837543e-06, "loss": 0.1594, "step": 14527 }, { "epoch": 2.05139791019486, "grad_norm": 4.073432053418342, "learning_rate": 4.801555696808758e-06, "loss": 0.1915, "step": 14528 }, { "epoch": 2.051539113244846, "grad_norm": 2.938396498779365, "learning_rate": 4.8002534765895746e-06, "loss": 0.1665, "step": 14529 }, { "epoch": 2.051680316294832, "grad_norm": 3.4783238643001253, "learning_rate": 4.798951377210253e-06, "loss": 0.1845, "step": 14530 }, { "epoch": 2.0518215193448177, "grad_norm": 3.1278819423532314, "learning_rate": 4.797649398701061e-06, "loss": 0.1573, "step": 14531 }, { "epoch": 2.0519627223948036, "grad_norm": 3.306840448543757, "learning_rate": 4.796347541092241e-06, "loss": 0.1496, "step": 14532 }, { "epoch": 2.0521039254447895, "grad_norm": 3.49600215583203, "learning_rate": 4.7950458044140614e-06, "loss": 0.2002, "step": 14533 }, { "epoch": 2.0522451284947754, "grad_norm": 3.246943747904683, "learning_rate": 4.7937441886967694e-06, "loss": 0.1718, "step": 14534 }, { "epoch": 2.0523863315447612, "grad_norm": 2.9637311309017726, "learning_rate": 4.792442693970614e-06, "loss": 0.166, "step": 14535 }, { "epoch": 2.052527534594747, "grad_norm": 2.8466348179332894, "learning_rate": 4.791141320265842e-06, "loss": 0.1812, "step": 14536 }, { "epoch": 2.052668737644733, "grad_norm": 3.42864011915757, "learning_rate": 4.7898400676126946e-06, "loss": 0.1478, "step": 14537 }, { "epoch": 2.052809940694719, "grad_norm": 3.4276552424087066, "learning_rate": 4.788538936041415e-06, "loss": 0.1853, "step": 14538 }, { "epoch": 2.0529511437447048, "grad_norm": 3.730478099923996, "learning_rate": 4.787237925582237e-06, "loss": 0.1835, "step": 14539 }, { "epoch": 2.0530923467946907, "grad_norm": 2.761514372505962, "learning_rate": 4.7859370362654045e-06, "loss": 0.1442, "step": 14540 }, { "epoch": 2.0532335498446765, "grad_norm": 2.884582167454878, "learning_rate": 4.78463626812114e-06, "loss": 0.1436, "step": 14541 }, { "epoch": 2.0533747528946624, "grad_norm": 2.6521006983908393, "learning_rate": 4.783335621179675e-06, "loss": 0.1707, "step": 14542 }, { "epoch": 2.0535159559446483, "grad_norm": 3.532411439116292, "learning_rate": 4.78203509547124e-06, "loss": 0.185, "step": 14543 }, { "epoch": 2.053657158994634, "grad_norm": 2.7642484762186372, "learning_rate": 4.780734691026051e-06, "loss": 0.1684, "step": 14544 }, { "epoch": 2.05379836204462, "grad_norm": 3.0002742334175267, "learning_rate": 4.779434407874337e-06, "loss": 0.1719, "step": 14545 }, { "epoch": 2.053939565094606, "grad_norm": 3.0852635829896045, "learning_rate": 4.778134246046313e-06, "loss": 0.1995, "step": 14546 }, { "epoch": 2.054080768144592, "grad_norm": 3.9868922650329814, "learning_rate": 4.776834205572194e-06, "loss": 0.2383, "step": 14547 }, { "epoch": 2.0542219711945777, "grad_norm": 2.623615788264525, "learning_rate": 4.775534286482197e-06, "loss": 0.1417, "step": 14548 }, { "epoch": 2.0543631742445636, "grad_norm": 2.8091972795339957, "learning_rate": 4.774234488806517e-06, "loss": 0.1326, "step": 14549 }, { "epoch": 2.0545043772945495, "grad_norm": 3.1106108909666785, "learning_rate": 4.772934812575377e-06, "loss": 0.1648, "step": 14550 }, { "epoch": 2.0546455803445354, "grad_norm": 2.841043330077408, "learning_rate": 4.771635257818973e-06, "loss": 0.1492, "step": 14551 }, { "epoch": 2.0547867833945213, "grad_norm": 3.236467731611961, "learning_rate": 4.770335824567508e-06, "loss": 0.1895, "step": 14552 }, { "epoch": 2.054927986444507, "grad_norm": 3.068723290317985, "learning_rate": 4.769036512851181e-06, "loss": 0.1956, "step": 14553 }, { "epoch": 2.055069189494493, "grad_norm": 3.2571445380593245, "learning_rate": 4.767737322700185e-06, "loss": 0.1538, "step": 14554 }, { "epoch": 2.055210392544479, "grad_norm": 4.353635912000762, "learning_rate": 4.766438254144714e-06, "loss": 0.2065, "step": 14555 }, { "epoch": 2.055351595594465, "grad_norm": 3.4657953081230684, "learning_rate": 4.765139307214956e-06, "loss": 0.2037, "step": 14556 }, { "epoch": 2.0554927986444507, "grad_norm": 3.5216367986386143, "learning_rate": 4.763840481941104e-06, "loss": 0.1886, "step": 14557 }, { "epoch": 2.0556340016944366, "grad_norm": 3.995323943844227, "learning_rate": 4.762541778353337e-06, "loss": 0.17, "step": 14558 }, { "epoch": 2.0557752047444224, "grad_norm": 3.195036420341871, "learning_rate": 4.761243196481835e-06, "loss": 0.161, "step": 14559 }, { "epoch": 2.0559164077944083, "grad_norm": 2.6690543835378078, "learning_rate": 4.75994473635678e-06, "loss": 0.1273, "step": 14560 }, { "epoch": 2.056057610844394, "grad_norm": 3.521330546201101, "learning_rate": 4.758646398008342e-06, "loss": 0.1568, "step": 14561 }, { "epoch": 2.05619881389438, "grad_norm": 3.109355959077045, "learning_rate": 4.757348181466702e-06, "loss": 0.1404, "step": 14562 }, { "epoch": 2.056340016944366, "grad_norm": 2.8202268118063225, "learning_rate": 4.756050086762028e-06, "loss": 0.1748, "step": 14563 }, { "epoch": 2.056481219994352, "grad_norm": 2.8964411338576728, "learning_rate": 4.754752113924482e-06, "loss": 0.1451, "step": 14564 }, { "epoch": 2.0566224230443377, "grad_norm": 2.653786648258179, "learning_rate": 4.753454262984238e-06, "loss": 0.1768, "step": 14565 }, { "epoch": 2.0567636260943236, "grad_norm": 2.8933153106071865, "learning_rate": 4.7521565339714415e-06, "loss": 0.1562, "step": 14566 }, { "epoch": 2.0569048291443095, "grad_norm": 3.6819364818263796, "learning_rate": 4.750858926916266e-06, "loss": 0.2102, "step": 14567 }, { "epoch": 2.0570460321942954, "grad_norm": 2.5768264984096674, "learning_rate": 4.749561441848862e-06, "loss": 0.117, "step": 14568 }, { "epoch": 2.0571872352442813, "grad_norm": 2.7749613516535443, "learning_rate": 4.748264078799382e-06, "loss": 0.1705, "step": 14569 }, { "epoch": 2.057328438294267, "grad_norm": 2.5641453387634665, "learning_rate": 4.746966837797977e-06, "loss": 0.149, "step": 14570 }, { "epoch": 2.057469641344253, "grad_norm": 3.2472698068938337, "learning_rate": 4.745669718874795e-06, "loss": 0.1765, "step": 14571 }, { "epoch": 2.057610844394239, "grad_norm": 3.2171274648342894, "learning_rate": 4.744372722059978e-06, "loss": 0.1535, "step": 14572 }, { "epoch": 2.057752047444225, "grad_norm": 2.932980407575749, "learning_rate": 4.74307584738367e-06, "loss": 0.1201, "step": 14573 }, { "epoch": 2.0578932504942107, "grad_norm": 3.351798804594412, "learning_rate": 4.741779094876009e-06, "loss": 0.1806, "step": 14574 }, { "epoch": 2.0580344535441966, "grad_norm": 2.9507311558184175, "learning_rate": 4.7404824645671314e-06, "loss": 0.1751, "step": 14575 }, { "epoch": 2.0581756565941824, "grad_norm": 2.9260992971226565, "learning_rate": 4.739185956487169e-06, "loss": 0.1433, "step": 14576 }, { "epoch": 2.0583168596441683, "grad_norm": 3.3584715023393747, "learning_rate": 4.737889570666253e-06, "loss": 0.19, "step": 14577 }, { "epoch": 2.058458062694154, "grad_norm": 2.479939649544663, "learning_rate": 4.736593307134508e-06, "loss": 0.1159, "step": 14578 }, { "epoch": 2.05859926574414, "grad_norm": 3.7224454554767035, "learning_rate": 4.735297165922065e-06, "loss": 0.1665, "step": 14579 }, { "epoch": 2.058740468794126, "grad_norm": 3.458701228776448, "learning_rate": 4.7340011470590415e-06, "loss": 0.1998, "step": 14580 }, { "epoch": 2.058881671844112, "grad_norm": 3.162857260665056, "learning_rate": 4.732705250575558e-06, "loss": 0.1349, "step": 14581 }, { "epoch": 2.0590228748940977, "grad_norm": 2.8403605883911576, "learning_rate": 4.7314094765017325e-06, "loss": 0.1525, "step": 14582 }, { "epoch": 2.0591640779440836, "grad_norm": 3.1992001135000527, "learning_rate": 4.730113824867668e-06, "loss": 0.2035, "step": 14583 }, { "epoch": 2.0593052809940695, "grad_norm": 2.648282229832616, "learning_rate": 4.728818295703487e-06, "loss": 0.1593, "step": 14584 }, { "epoch": 2.0594464840440554, "grad_norm": 3.007968036461489, "learning_rate": 4.727522889039292e-06, "loss": 0.1619, "step": 14585 }, { "epoch": 2.0595876870940413, "grad_norm": 3.8038497459193437, "learning_rate": 4.726227604905188e-06, "loss": 0.2118, "step": 14586 }, { "epoch": 2.059728890144027, "grad_norm": 3.6179408844566976, "learning_rate": 4.7249324433312775e-06, "loss": 0.1816, "step": 14587 }, { "epoch": 2.059870093194013, "grad_norm": 2.7689915142598234, "learning_rate": 4.723637404347658e-06, "loss": 0.1549, "step": 14588 }, { "epoch": 2.060011296243999, "grad_norm": 2.789638715487804, "learning_rate": 4.722342487984426e-06, "loss": 0.1301, "step": 14589 }, { "epoch": 2.060152499293985, "grad_norm": 2.847566461959244, "learning_rate": 4.721047694271676e-06, "loss": 0.1681, "step": 14590 }, { "epoch": 2.0602937023439707, "grad_norm": 3.323595266061516, "learning_rate": 4.719753023239498e-06, "loss": 0.1665, "step": 14591 }, { "epoch": 2.0604349053939566, "grad_norm": 3.192750341669437, "learning_rate": 4.718458474917979e-06, "loss": 0.1596, "step": 14592 }, { "epoch": 2.0605761084439425, "grad_norm": 3.6462342323683457, "learning_rate": 4.717164049337205e-06, "loss": 0.176, "step": 14593 }, { "epoch": 2.0607173114939283, "grad_norm": 3.878666353507677, "learning_rate": 4.715869746527256e-06, "loss": 0.1904, "step": 14594 }, { "epoch": 2.060858514543914, "grad_norm": 2.9470708923734557, "learning_rate": 4.714575566518209e-06, "loss": 0.1528, "step": 14595 }, { "epoch": 2.0609997175939, "grad_norm": 3.9209314096008607, "learning_rate": 4.713281509340146e-06, "loss": 0.1744, "step": 14596 }, { "epoch": 2.061140920643886, "grad_norm": 3.2482012332949233, "learning_rate": 4.7119875750231395e-06, "loss": 0.1495, "step": 14597 }, { "epoch": 2.061282123693872, "grad_norm": 2.5126908947578164, "learning_rate": 4.7106937635972565e-06, "loss": 0.1051, "step": 14598 }, { "epoch": 2.0614233267438578, "grad_norm": 2.8896517390888667, "learning_rate": 4.70940007509257e-06, "loss": 0.1846, "step": 14599 }, { "epoch": 2.0615645297938436, "grad_norm": 3.5348870815979083, "learning_rate": 4.708106509539134e-06, "loss": 0.1612, "step": 14600 }, { "epoch": 2.0617057328438295, "grad_norm": 2.874917253744368, "learning_rate": 4.706813066967021e-06, "loss": 0.1626, "step": 14601 }, { "epoch": 2.0618469358938154, "grad_norm": 3.118695010241647, "learning_rate": 4.705519747406285e-06, "loss": 0.1761, "step": 14602 }, { "epoch": 2.0619881389438013, "grad_norm": 3.037491061569983, "learning_rate": 4.7042265508869855e-06, "loss": 0.1709, "step": 14603 }, { "epoch": 2.062129341993787, "grad_norm": 2.893084895351072, "learning_rate": 4.702933477439172e-06, "loss": 0.1371, "step": 14604 }, { "epoch": 2.062270545043773, "grad_norm": 3.5008183748342594, "learning_rate": 4.7016405270928985e-06, "loss": 0.1892, "step": 14605 }, { "epoch": 2.062411748093759, "grad_norm": 3.9740044113957316, "learning_rate": 4.700347699878211e-06, "loss": 0.2049, "step": 14606 }, { "epoch": 2.062552951143745, "grad_norm": 3.059640078716887, "learning_rate": 4.699054995825153e-06, "loss": 0.1605, "step": 14607 }, { "epoch": 2.0626941541937307, "grad_norm": 3.0647110018905788, "learning_rate": 4.697762414963768e-06, "loss": 0.168, "step": 14608 }, { "epoch": 2.0628353572437166, "grad_norm": 2.928518624130187, "learning_rate": 4.696469957324094e-06, "loss": 0.1475, "step": 14609 }, { "epoch": 2.0629765602937025, "grad_norm": 2.552423475697362, "learning_rate": 4.695177622936169e-06, "loss": 0.1329, "step": 14610 }, { "epoch": 2.0631177633436883, "grad_norm": 3.650369928103259, "learning_rate": 4.693885411830025e-06, "loss": 0.1764, "step": 14611 }, { "epoch": 2.0632589663936742, "grad_norm": 3.10239989841359, "learning_rate": 4.692593324035688e-06, "loss": 0.1552, "step": 14612 }, { "epoch": 2.06340016944366, "grad_norm": 2.8650250485629507, "learning_rate": 4.691301359583195e-06, "loss": 0.1596, "step": 14613 }, { "epoch": 2.063541372493646, "grad_norm": 2.8342796954252014, "learning_rate": 4.690009518502564e-06, "loss": 0.1558, "step": 14614 }, { "epoch": 2.063682575543632, "grad_norm": 3.1280005916282105, "learning_rate": 4.68871780082382e-06, "loss": 0.1922, "step": 14615 }, { "epoch": 2.0638237785936178, "grad_norm": 2.9336291447364826, "learning_rate": 4.687426206576983e-06, "loss": 0.1612, "step": 14616 }, { "epoch": 2.0639649816436036, "grad_norm": 3.132439844782433, "learning_rate": 4.686134735792064e-06, "loss": 0.1469, "step": 14617 }, { "epoch": 2.0641061846935895, "grad_norm": 3.0325203779537317, "learning_rate": 4.684843388499075e-06, "loss": 0.1675, "step": 14618 }, { "epoch": 2.0642473877435754, "grad_norm": 2.53948808917075, "learning_rate": 4.683552164728033e-06, "loss": 0.1442, "step": 14619 }, { "epoch": 2.0643885907935613, "grad_norm": 2.512427411162805, "learning_rate": 4.682261064508944e-06, "loss": 0.1277, "step": 14620 }, { "epoch": 2.064529793843547, "grad_norm": 2.9280640623766434, "learning_rate": 4.680970087871811e-06, "loss": 0.1398, "step": 14621 }, { "epoch": 2.064670996893533, "grad_norm": 2.8192033797968827, "learning_rate": 4.679679234846636e-06, "loss": 0.1624, "step": 14622 }, { "epoch": 2.064812199943519, "grad_norm": 3.2511219112624463, "learning_rate": 4.678388505463417e-06, "loss": 0.156, "step": 14623 }, { "epoch": 2.064953402993505, "grad_norm": 3.19350399453411, "learning_rate": 4.677097899752152e-06, "loss": 0.1641, "step": 14624 }, { "epoch": 2.0650946060434907, "grad_norm": 3.191256580327672, "learning_rate": 4.675807417742832e-06, "loss": 0.1575, "step": 14625 }, { "epoch": 2.0652358090934766, "grad_norm": 3.1236377453924526, "learning_rate": 4.674517059465449e-06, "loss": 0.1459, "step": 14626 }, { "epoch": 2.0653770121434625, "grad_norm": 2.9712037598668717, "learning_rate": 4.67322682494999e-06, "loss": 0.161, "step": 14627 }, { "epoch": 2.0655182151934484, "grad_norm": 3.0656621266934807, "learning_rate": 4.671936714226438e-06, "loss": 0.1531, "step": 14628 }, { "epoch": 2.0656594182434342, "grad_norm": 2.9629760266444976, "learning_rate": 4.6706467273247766e-06, "loss": 0.1298, "step": 14629 }, { "epoch": 2.06580062129342, "grad_norm": 2.835014546155835, "learning_rate": 4.66935686427498e-06, "loss": 0.1436, "step": 14630 }, { "epoch": 2.065941824343406, "grad_norm": 3.2045946385137007, "learning_rate": 4.6680671251070306e-06, "loss": 0.1624, "step": 14631 }, { "epoch": 2.066083027393392, "grad_norm": 3.7945292588806274, "learning_rate": 4.666777509850899e-06, "loss": 0.2096, "step": 14632 }, { "epoch": 2.0662242304433778, "grad_norm": 3.2625338323915236, "learning_rate": 4.665488018536559e-06, "loss": 0.1307, "step": 14633 }, { "epoch": 2.0663654334933637, "grad_norm": 3.259428476327424, "learning_rate": 4.664198651193969e-06, "loss": 0.1459, "step": 14634 }, { "epoch": 2.0665066365433495, "grad_norm": 3.751529472395767, "learning_rate": 4.662909407853096e-06, "loss": 0.1881, "step": 14635 }, { "epoch": 2.0666478395933354, "grad_norm": 3.8586700412946446, "learning_rate": 4.6616202885439056e-06, "loss": 0.1769, "step": 14636 }, { "epoch": 2.0667890426433213, "grad_norm": 2.485426300728362, "learning_rate": 4.660331293296355e-06, "loss": 0.1014, "step": 14637 }, { "epoch": 2.066930245693307, "grad_norm": 3.033677964650005, "learning_rate": 4.659042422140399e-06, "loss": 0.1675, "step": 14638 }, { "epoch": 2.0670714487432926, "grad_norm": 3.543588609325569, "learning_rate": 4.65775367510599e-06, "loss": 0.1553, "step": 14639 }, { "epoch": 2.0672126517932785, "grad_norm": 3.4025258491146584, "learning_rate": 4.656465052223079e-06, "loss": 0.1635, "step": 14640 }, { "epoch": 2.0673538548432644, "grad_norm": 3.392461143517055, "learning_rate": 4.655176553521612e-06, "loss": 0.1505, "step": 14641 }, { "epoch": 2.0674950578932503, "grad_norm": 3.2749484841609195, "learning_rate": 4.653888179031533e-06, "loss": 0.151, "step": 14642 }, { "epoch": 2.067636260943236, "grad_norm": 4.143356489301317, "learning_rate": 4.652599928782786e-06, "loss": 0.2052, "step": 14643 }, { "epoch": 2.067777463993222, "grad_norm": 2.7448579883531474, "learning_rate": 4.6513118028053065e-06, "loss": 0.1451, "step": 14644 }, { "epoch": 2.067918667043208, "grad_norm": 2.6900722822828738, "learning_rate": 4.65002380112903e-06, "loss": 0.1015, "step": 14645 }, { "epoch": 2.068059870093194, "grad_norm": 2.87713081792148, "learning_rate": 4.64873592378389e-06, "loss": 0.1303, "step": 14646 }, { "epoch": 2.0682010731431797, "grad_norm": 3.4431861699135315, "learning_rate": 4.647448170799813e-06, "loss": 0.2075, "step": 14647 }, { "epoch": 2.0683422761931656, "grad_norm": 3.440467482759277, "learning_rate": 4.646160542206733e-06, "loss": 0.1991, "step": 14648 }, { "epoch": 2.0684834792431515, "grad_norm": 3.3183933573921665, "learning_rate": 4.644873038034568e-06, "loss": 0.1474, "step": 14649 }, { "epoch": 2.0686246822931373, "grad_norm": 2.784566255648997, "learning_rate": 4.6435856583132465e-06, "loss": 0.1726, "step": 14650 }, { "epoch": 2.0687658853431232, "grad_norm": 3.1361440696705527, "learning_rate": 4.642298403072677e-06, "loss": 0.1232, "step": 14651 }, { "epoch": 2.068907088393109, "grad_norm": 2.343068463033581, "learning_rate": 4.641011272342775e-06, "loss": 0.1304, "step": 14652 }, { "epoch": 2.069048291443095, "grad_norm": 3.089784748506537, "learning_rate": 4.639724266153459e-06, "loss": 0.1761, "step": 14653 }, { "epoch": 2.069189494493081, "grad_norm": 3.124526206622, "learning_rate": 4.6384373845346375e-06, "loss": 0.1693, "step": 14654 }, { "epoch": 2.0693306975430668, "grad_norm": 3.4770227369051816, "learning_rate": 4.637150627516215e-06, "loss": 0.1354, "step": 14655 }, { "epoch": 2.0694719005930526, "grad_norm": 2.964237823095029, "learning_rate": 4.635863995128097e-06, "loss": 0.1544, "step": 14656 }, { "epoch": 2.0696131036430385, "grad_norm": 3.6455839127496756, "learning_rate": 4.634577487400182e-06, "loss": 0.1697, "step": 14657 }, { "epoch": 2.0697543066930244, "grad_norm": 2.242226678578641, "learning_rate": 4.633291104362369e-06, "loss": 0.085, "step": 14658 }, { "epoch": 2.0698955097430103, "grad_norm": 2.914102869308605, "learning_rate": 4.632004846044555e-06, "loss": 0.108, "step": 14659 }, { "epoch": 2.070036712792996, "grad_norm": 2.8743823220547102, "learning_rate": 4.630718712476628e-06, "loss": 0.1292, "step": 14660 }, { "epoch": 2.070177915842982, "grad_norm": 3.244372647564022, "learning_rate": 4.62943270368848e-06, "loss": 0.1828, "step": 14661 }, { "epoch": 2.070319118892968, "grad_norm": 3.0521290080655055, "learning_rate": 4.628146819709997e-06, "loss": 0.1393, "step": 14662 }, { "epoch": 2.070460321942954, "grad_norm": 3.879372136043958, "learning_rate": 4.626861060571061e-06, "loss": 0.1892, "step": 14663 }, { "epoch": 2.0706015249929397, "grad_norm": 3.7941529531463423, "learning_rate": 4.6255754263015495e-06, "loss": 0.2226, "step": 14664 }, { "epoch": 2.0707427280429256, "grad_norm": 2.9503825090448488, "learning_rate": 4.624289916931349e-06, "loss": 0.1357, "step": 14665 }, { "epoch": 2.0708839310929115, "grad_norm": 4.137609784273914, "learning_rate": 4.623004532490328e-06, "loss": 0.1918, "step": 14666 }, { "epoch": 2.0710251341428974, "grad_norm": 2.76583384798674, "learning_rate": 4.6217192730083645e-06, "loss": 0.1543, "step": 14667 }, { "epoch": 2.0711663371928832, "grad_norm": 3.049304803794742, "learning_rate": 4.6204341385153186e-06, "loss": 0.1672, "step": 14668 }, { "epoch": 2.071307540242869, "grad_norm": 3.511286794759079, "learning_rate": 4.619149129041056e-06, "loss": 0.1936, "step": 14669 }, { "epoch": 2.071448743292855, "grad_norm": 3.560594584298056, "learning_rate": 4.617864244615448e-06, "loss": 0.1811, "step": 14670 }, { "epoch": 2.071589946342841, "grad_norm": 3.6844193061281514, "learning_rate": 4.61657948526835e-06, "loss": 0.1764, "step": 14671 }, { "epoch": 2.0717311493928268, "grad_norm": 2.8845571761890962, "learning_rate": 4.615294851029619e-06, "loss": 0.16, "step": 14672 }, { "epoch": 2.0718723524428126, "grad_norm": 3.673445053258589, "learning_rate": 4.614010341929112e-06, "loss": 0.1499, "step": 14673 }, { "epoch": 2.0720135554927985, "grad_norm": 2.805554667742319, "learning_rate": 4.612725957996677e-06, "loss": 0.1324, "step": 14674 }, { "epoch": 2.0721547585427844, "grad_norm": 3.648881882688345, "learning_rate": 4.6114416992621645e-06, "loss": 0.1863, "step": 14675 }, { "epoch": 2.0722959615927703, "grad_norm": 3.000001512007524, "learning_rate": 4.610157565755421e-06, "loss": 0.1615, "step": 14676 }, { "epoch": 2.072437164642756, "grad_norm": 2.6928126251656326, "learning_rate": 4.608873557506287e-06, "loss": 0.1517, "step": 14677 }, { "epoch": 2.072578367692742, "grad_norm": 3.114969395821639, "learning_rate": 4.607589674544603e-06, "loss": 0.1553, "step": 14678 }, { "epoch": 2.072719570742728, "grad_norm": 3.0876503342139165, "learning_rate": 4.606305916900206e-06, "loss": 0.1664, "step": 14679 }, { "epoch": 2.072860773792714, "grad_norm": 2.9884965229670915, "learning_rate": 4.6050222846029315e-06, "loss": 0.1511, "step": 14680 }, { "epoch": 2.0730019768426997, "grad_norm": 3.7433518284316865, "learning_rate": 4.603738777682604e-06, "loss": 0.1602, "step": 14681 }, { "epoch": 2.0731431798926856, "grad_norm": 3.288698736598821, "learning_rate": 4.602455396169062e-06, "loss": 0.1865, "step": 14682 }, { "epoch": 2.0732843829426715, "grad_norm": 2.877266163911154, "learning_rate": 4.601172140092125e-06, "loss": 0.1388, "step": 14683 }, { "epoch": 2.0734255859926574, "grad_norm": 3.736464992739931, "learning_rate": 4.59988900948162e-06, "loss": 0.196, "step": 14684 }, { "epoch": 2.0735667890426432, "grad_norm": 2.799427415147197, "learning_rate": 4.598606004367358e-06, "loss": 0.1528, "step": 14685 }, { "epoch": 2.073707992092629, "grad_norm": 3.023109675095226, "learning_rate": 4.597323124779155e-06, "loss": 0.1394, "step": 14686 }, { "epoch": 2.073849195142615, "grad_norm": 2.8983180028306235, "learning_rate": 4.596040370746835e-06, "loss": 0.1417, "step": 14687 }, { "epoch": 2.073990398192601, "grad_norm": 2.3930957576533953, "learning_rate": 4.594757742300201e-06, "loss": 0.1329, "step": 14688 }, { "epoch": 2.0741316012425868, "grad_norm": 2.9215574769745727, "learning_rate": 4.593475239469064e-06, "loss": 0.1391, "step": 14689 }, { "epoch": 2.0742728042925727, "grad_norm": 2.8407623388259027, "learning_rate": 4.592192862283228e-06, "loss": 0.1761, "step": 14690 }, { "epoch": 2.0744140073425585, "grad_norm": 2.803837297825593, "learning_rate": 4.590910610772493e-06, "loss": 0.1335, "step": 14691 }, { "epoch": 2.0745552103925444, "grad_norm": 3.705153680428913, "learning_rate": 4.589628484966661e-06, "loss": 0.1546, "step": 14692 }, { "epoch": 2.0746964134425303, "grad_norm": 2.677548486329379, "learning_rate": 4.588346484895525e-06, "loss": 0.1172, "step": 14693 }, { "epoch": 2.074837616492516, "grad_norm": 3.530784935867504, "learning_rate": 4.587064610588881e-06, "loss": 0.1673, "step": 14694 }, { "epoch": 2.074978819542502, "grad_norm": 3.835583788441309, "learning_rate": 4.585782862076517e-06, "loss": 0.1826, "step": 14695 }, { "epoch": 2.075120022592488, "grad_norm": 2.810338910208409, "learning_rate": 4.5845012393882205e-06, "loss": 0.1247, "step": 14696 }, { "epoch": 2.075261225642474, "grad_norm": 2.670421583774606, "learning_rate": 4.583219742553777e-06, "loss": 0.1634, "step": 14697 }, { "epoch": 2.0754024286924597, "grad_norm": 3.4911466877046577, "learning_rate": 4.581938371602964e-06, "loss": 0.1967, "step": 14698 }, { "epoch": 2.0755436317424456, "grad_norm": 3.384510781102771, "learning_rate": 4.580657126565566e-06, "loss": 0.2017, "step": 14699 }, { "epoch": 2.0756848347924315, "grad_norm": 2.950843819307427, "learning_rate": 4.5793760074713565e-06, "loss": 0.1931, "step": 14700 }, { "epoch": 2.0758260378424174, "grad_norm": 2.465239592204603, "learning_rate": 4.578095014350113e-06, "loss": 0.1127, "step": 14701 }, { "epoch": 2.0759672408924033, "grad_norm": 3.013464678330998, "learning_rate": 4.576814147231594e-06, "loss": 0.1298, "step": 14702 }, { "epoch": 2.076108443942389, "grad_norm": 3.3342127123388177, "learning_rate": 4.5755334061455685e-06, "loss": 0.1805, "step": 14703 }, { "epoch": 2.076249646992375, "grad_norm": 3.804902666181815, "learning_rate": 4.574252791121808e-06, "loss": 0.1785, "step": 14704 }, { "epoch": 2.076390850042361, "grad_norm": 4.219726050235283, "learning_rate": 4.57297230219007e-06, "loss": 0.2245, "step": 14705 }, { "epoch": 2.076532053092347, "grad_norm": 3.2869633158365428, "learning_rate": 4.571691939380111e-06, "loss": 0.1672, "step": 14706 }, { "epoch": 2.0766732561423327, "grad_norm": 2.883201253351491, "learning_rate": 4.570411702721688e-06, "loss": 0.1654, "step": 14707 }, { "epoch": 2.0768144591923186, "grad_norm": 2.9341604826350256, "learning_rate": 4.569131592244552e-06, "loss": 0.1235, "step": 14708 }, { "epoch": 2.0769556622423044, "grad_norm": 3.537611157175534, "learning_rate": 4.567851607978452e-06, "loss": 0.1567, "step": 14709 }, { "epoch": 2.0770968652922903, "grad_norm": 3.2301576981102236, "learning_rate": 4.566571749953135e-06, "loss": 0.159, "step": 14710 }, { "epoch": 2.077238068342276, "grad_norm": 3.3168217313129627, "learning_rate": 4.565292018198344e-06, "loss": 0.2093, "step": 14711 }, { "epoch": 2.077379271392262, "grad_norm": 3.6085816615228152, "learning_rate": 4.564012412743819e-06, "loss": 0.2345, "step": 14712 }, { "epoch": 2.077520474442248, "grad_norm": 3.7185986269953117, "learning_rate": 4.562732933619298e-06, "loss": 0.1716, "step": 14713 }, { "epoch": 2.077661677492234, "grad_norm": 3.3619422330644992, "learning_rate": 4.561453580854516e-06, "loss": 0.1799, "step": 14714 }, { "epoch": 2.0778028805422197, "grad_norm": 3.0835975112833967, "learning_rate": 4.5601743544791995e-06, "loss": 0.1808, "step": 14715 }, { "epoch": 2.0779440835922056, "grad_norm": 2.720222764102392, "learning_rate": 4.558895254523086e-06, "loss": 0.1357, "step": 14716 }, { "epoch": 2.0780852866421915, "grad_norm": 3.7691111678852307, "learning_rate": 4.5576162810158966e-06, "loss": 0.2266, "step": 14717 }, { "epoch": 2.0782264896921774, "grad_norm": 3.068380231707557, "learning_rate": 4.556337433987359e-06, "loss": 0.1653, "step": 14718 }, { "epoch": 2.0783676927421633, "grad_norm": 2.9330922610478765, "learning_rate": 4.555058713467184e-06, "loss": 0.1819, "step": 14719 }, { "epoch": 2.078508895792149, "grad_norm": 3.5628272873615114, "learning_rate": 4.553780119485093e-06, "loss": 0.1515, "step": 14720 }, { "epoch": 2.078650098842135, "grad_norm": 2.608584105521275, "learning_rate": 4.552501652070796e-06, "loss": 0.1447, "step": 14721 }, { "epoch": 2.078791301892121, "grad_norm": 3.1552897009342273, "learning_rate": 4.551223311254013e-06, "loss": 0.1158, "step": 14722 }, { "epoch": 2.078932504942107, "grad_norm": 2.8022880804967083, "learning_rate": 4.5499450970644455e-06, "loss": 0.1801, "step": 14723 }, { "epoch": 2.0790737079920927, "grad_norm": 3.509499661794767, "learning_rate": 4.5486670095318006e-06, "loss": 0.194, "step": 14724 }, { "epoch": 2.0792149110420786, "grad_norm": 2.6474141595607925, "learning_rate": 4.547389048685781e-06, "loss": 0.147, "step": 14725 }, { "epoch": 2.0793561140920644, "grad_norm": 3.65937466596559, "learning_rate": 4.5461112145560845e-06, "loss": 0.191, "step": 14726 }, { "epoch": 2.0794973171420503, "grad_norm": 3.2089370048205588, "learning_rate": 4.544833507172408e-06, "loss": 0.1641, "step": 14727 }, { "epoch": 2.079638520192036, "grad_norm": 3.594347137145248, "learning_rate": 4.543555926564445e-06, "loss": 0.191, "step": 14728 }, { "epoch": 2.079779723242022, "grad_norm": 3.355553440511009, "learning_rate": 4.542278472761886e-06, "loss": 0.1876, "step": 14729 }, { "epoch": 2.079920926292008, "grad_norm": 4.358562061112239, "learning_rate": 4.541001145794419e-06, "loss": 0.2084, "step": 14730 }, { "epoch": 2.080062129341994, "grad_norm": 3.663134912340441, "learning_rate": 4.539723945691727e-06, "loss": 0.1952, "step": 14731 }, { "epoch": 2.0802033323919797, "grad_norm": 2.620609435243367, "learning_rate": 4.538446872483492e-06, "loss": 0.1585, "step": 14732 }, { "epoch": 2.0803445354419656, "grad_norm": 3.18988289485214, "learning_rate": 4.53716992619939e-06, "loss": 0.1706, "step": 14733 }, { "epoch": 2.0804857384919515, "grad_norm": 3.3767087967814704, "learning_rate": 4.535893106869107e-06, "loss": 0.1794, "step": 14734 }, { "epoch": 2.0806269415419374, "grad_norm": 2.482774651330091, "learning_rate": 4.534616414522304e-06, "loss": 0.1465, "step": 14735 }, { "epoch": 2.0807681445919233, "grad_norm": 3.1214397052021474, "learning_rate": 4.533339849188656e-06, "loss": 0.1763, "step": 14736 }, { "epoch": 2.080909347641909, "grad_norm": 3.2769096578573267, "learning_rate": 4.532063410897829e-06, "loss": 0.1512, "step": 14737 }, { "epoch": 2.081050550691895, "grad_norm": 3.4795245883684207, "learning_rate": 4.530787099679482e-06, "loss": 0.2028, "step": 14738 }, { "epoch": 2.081191753741881, "grad_norm": 2.950221792215814, "learning_rate": 4.5295109155632865e-06, "loss": 0.1449, "step": 14739 }, { "epoch": 2.081332956791867, "grad_norm": 2.527010016401659, "learning_rate": 4.528234858578894e-06, "loss": 0.1283, "step": 14740 }, { "epoch": 2.0814741598418527, "grad_norm": 3.0980367902942763, "learning_rate": 4.526958928755959e-06, "loss": 0.178, "step": 14741 }, { "epoch": 2.0816153628918386, "grad_norm": 2.8845475225499775, "learning_rate": 4.525683126124137e-06, "loss": 0.1741, "step": 14742 }, { "epoch": 2.0817565659418245, "grad_norm": 2.99925301901024, "learning_rate": 4.524407450713073e-06, "loss": 0.1822, "step": 14743 }, { "epoch": 2.0818977689918103, "grad_norm": 2.530270627382348, "learning_rate": 4.523131902552417e-06, "loss": 0.1573, "step": 14744 }, { "epoch": 2.082038972041796, "grad_norm": 3.5011484764194543, "learning_rate": 4.521856481671809e-06, "loss": 0.194, "step": 14745 }, { "epoch": 2.082180175091782, "grad_norm": 3.122057458557989, "learning_rate": 4.520581188100891e-06, "loss": 0.1502, "step": 14746 }, { "epoch": 2.082321378141768, "grad_norm": 2.883435123567567, "learning_rate": 4.5193060218692995e-06, "loss": 0.1462, "step": 14747 }, { "epoch": 2.082462581191754, "grad_norm": 3.0580899428797315, "learning_rate": 4.5180309830066696e-06, "loss": 0.166, "step": 14748 }, { "epoch": 2.0826037842417398, "grad_norm": 4.309262967509799, "learning_rate": 4.5167560715426314e-06, "loss": 0.2126, "step": 14749 }, { "epoch": 2.0827449872917256, "grad_norm": 3.0728251994612905, "learning_rate": 4.515481287506811e-06, "loss": 0.1493, "step": 14750 }, { "epoch": 2.0828861903417115, "grad_norm": 3.242438596854217, "learning_rate": 4.514206630928845e-06, "loss": 0.1689, "step": 14751 }, { "epoch": 2.0830273933916974, "grad_norm": 2.9169527570582487, "learning_rate": 4.512932101838343e-06, "loss": 0.1427, "step": 14752 }, { "epoch": 2.0831685964416833, "grad_norm": 2.564260144329419, "learning_rate": 4.51165770026493e-06, "loss": 0.1469, "step": 14753 }, { "epoch": 2.083309799491669, "grad_norm": 2.905321137966021, "learning_rate": 4.510383426238221e-06, "loss": 0.1698, "step": 14754 }, { "epoch": 2.083451002541655, "grad_norm": 3.210278845358085, "learning_rate": 4.509109279787826e-06, "loss": 0.1893, "step": 14755 }, { "epoch": 2.083592205591641, "grad_norm": 3.071897152654023, "learning_rate": 4.507835260943365e-06, "loss": 0.1595, "step": 14756 }, { "epoch": 2.083733408641627, "grad_norm": 4.599099350285521, "learning_rate": 4.506561369734441e-06, "loss": 0.2427, "step": 14757 }, { "epoch": 2.0838746116916127, "grad_norm": 3.3709038021514006, "learning_rate": 4.505287606190658e-06, "loss": 0.1901, "step": 14758 }, { "epoch": 2.0840158147415986, "grad_norm": 3.5956189374270497, "learning_rate": 4.504013970341618e-06, "loss": 0.1548, "step": 14759 }, { "epoch": 2.0841570177915845, "grad_norm": 3.460639583309433, "learning_rate": 4.502740462216919e-06, "loss": 0.1743, "step": 14760 }, { "epoch": 2.0842982208415703, "grad_norm": 3.9106752735296153, "learning_rate": 4.501467081846158e-06, "loss": 0.1975, "step": 14761 }, { "epoch": 2.0844394238915562, "grad_norm": 2.577299182990156, "learning_rate": 4.500193829258928e-06, "loss": 0.1423, "step": 14762 }, { "epoch": 2.084580626941542, "grad_norm": 2.4666178976754543, "learning_rate": 4.498920704484817e-06, "loss": 0.1011, "step": 14763 }, { "epoch": 2.084721829991528, "grad_norm": 2.6806325645666966, "learning_rate": 4.497647707553414e-06, "loss": 0.1383, "step": 14764 }, { "epoch": 2.084863033041514, "grad_norm": 3.6345367504397825, "learning_rate": 4.496374838494302e-06, "loss": 0.1611, "step": 14765 }, { "epoch": 2.0850042360914998, "grad_norm": 3.837225139829277, "learning_rate": 4.495102097337062e-06, "loss": 0.1843, "step": 14766 }, { "epoch": 2.0851454391414856, "grad_norm": 2.9897523737953455, "learning_rate": 4.493829484111267e-06, "loss": 0.1799, "step": 14767 }, { "epoch": 2.0852866421914715, "grad_norm": 3.2996123634210925, "learning_rate": 4.492556998846505e-06, "loss": 0.2218, "step": 14768 }, { "epoch": 2.0854278452414574, "grad_norm": 3.5960858087511145, "learning_rate": 4.491284641572338e-06, "loss": 0.2227, "step": 14769 }, { "epoch": 2.0855690482914433, "grad_norm": 4.195496101232432, "learning_rate": 4.490012412318334e-06, "loss": 0.2608, "step": 14770 }, { "epoch": 2.085710251341429, "grad_norm": 3.041712918328491, "learning_rate": 4.488740311114064e-06, "loss": 0.1714, "step": 14771 }, { "epoch": 2.085851454391415, "grad_norm": 3.199707506797733, "learning_rate": 4.487468337989083e-06, "loss": 0.1827, "step": 14772 }, { "epoch": 2.085992657441401, "grad_norm": 2.737178528616085, "learning_rate": 4.486196492972964e-06, "loss": 0.1715, "step": 14773 }, { "epoch": 2.086133860491387, "grad_norm": 3.2689073476561528, "learning_rate": 4.484924776095255e-06, "loss": 0.1848, "step": 14774 }, { "epoch": 2.0862750635413727, "grad_norm": 2.9070056491402156, "learning_rate": 4.483653187385514e-06, "loss": 0.1491, "step": 14775 }, { "epoch": 2.0864162665913586, "grad_norm": 3.3285631651922905, "learning_rate": 4.48238172687329e-06, "loss": 0.1645, "step": 14776 }, { "epoch": 2.0865574696413445, "grad_norm": 3.1527224533195586, "learning_rate": 4.481110394588131e-06, "loss": 0.1673, "step": 14777 }, { "epoch": 2.0866986726913304, "grad_norm": 3.569857753140123, "learning_rate": 4.479839190559583e-06, "loss": 0.216, "step": 14778 }, { "epoch": 2.086839875741316, "grad_norm": 3.411398949315808, "learning_rate": 4.4785681148171885e-06, "loss": 0.1464, "step": 14779 }, { "epoch": 2.0869810787913017, "grad_norm": 3.2647383971700767, "learning_rate": 4.477297167390487e-06, "loss": 0.2029, "step": 14780 }, { "epoch": 2.0871222818412876, "grad_norm": 3.346219672569038, "learning_rate": 4.476026348309014e-06, "loss": 0.2099, "step": 14781 }, { "epoch": 2.0872634848912734, "grad_norm": 3.165621249847517, "learning_rate": 4.474755657602303e-06, "loss": 0.175, "step": 14782 }, { "epoch": 2.0874046879412593, "grad_norm": 2.954786489251234, "learning_rate": 4.473485095299885e-06, "loss": 0.1874, "step": 14783 }, { "epoch": 2.087545890991245, "grad_norm": 3.180168472474487, "learning_rate": 4.472214661431282e-06, "loss": 0.195, "step": 14784 }, { "epoch": 2.087687094041231, "grad_norm": 2.4944453671412, "learning_rate": 4.47094435602603e-06, "loss": 0.1162, "step": 14785 }, { "epoch": 2.087828297091217, "grad_norm": 2.7462661622366675, "learning_rate": 4.469674179113641e-06, "loss": 0.1316, "step": 14786 }, { "epoch": 2.087969500141203, "grad_norm": 3.3388234911876205, "learning_rate": 4.4684041307236345e-06, "loss": 0.1562, "step": 14787 }, { "epoch": 2.0881107031911887, "grad_norm": 3.232322320950187, "learning_rate": 4.467134210885529e-06, "loss": 0.1845, "step": 14788 }, { "epoch": 2.0882519062411746, "grad_norm": 3.0059205588299363, "learning_rate": 4.465864419628829e-06, "loss": 0.1598, "step": 14789 }, { "epoch": 2.0883931092911605, "grad_norm": 3.4000775578431743, "learning_rate": 4.464594756983055e-06, "loss": 0.1958, "step": 14790 }, { "epoch": 2.0885343123411464, "grad_norm": 3.6036805416685627, "learning_rate": 4.463325222977708e-06, "loss": 0.2047, "step": 14791 }, { "epoch": 2.0886755153911323, "grad_norm": 3.1765058636760672, "learning_rate": 4.462055817642291e-06, "loss": 0.1917, "step": 14792 }, { "epoch": 2.088816718441118, "grad_norm": 3.354980318032052, "learning_rate": 4.460786541006305e-06, "loss": 0.2047, "step": 14793 }, { "epoch": 2.088957921491104, "grad_norm": 3.121617781585857, "learning_rate": 4.459517393099253e-06, "loss": 0.1525, "step": 14794 }, { "epoch": 2.08909912454109, "grad_norm": 3.1263451041656585, "learning_rate": 4.458248373950616e-06, "loss": 0.1525, "step": 14795 }, { "epoch": 2.089240327591076, "grad_norm": 2.621276669067131, "learning_rate": 4.456979483589896e-06, "loss": 0.1393, "step": 14796 }, { "epoch": 2.0893815306410617, "grad_norm": 2.5769628276992975, "learning_rate": 4.45571072204658e-06, "loss": 0.1524, "step": 14797 }, { "epoch": 2.0895227336910476, "grad_norm": 3.333510010626498, "learning_rate": 4.454442089350151e-06, "loss": 0.1665, "step": 14798 }, { "epoch": 2.0896639367410335, "grad_norm": 2.6756184294840595, "learning_rate": 4.4531735855300954e-06, "loss": 0.1324, "step": 14799 }, { "epoch": 2.0898051397910193, "grad_norm": 2.7463422131108435, "learning_rate": 4.451905210615889e-06, "loss": 0.1593, "step": 14800 }, { "epoch": 2.0899463428410052, "grad_norm": 3.26841170314668, "learning_rate": 4.450636964637005e-06, "loss": 0.1354, "step": 14801 }, { "epoch": 2.090087545890991, "grad_norm": 2.9900825630036545, "learning_rate": 4.4493688476229295e-06, "loss": 0.1432, "step": 14802 }, { "epoch": 2.090228748940977, "grad_norm": 3.4545242970346917, "learning_rate": 4.44810085960312e-06, "loss": 0.0991, "step": 14803 }, { "epoch": 2.090369951990963, "grad_norm": 2.9520612797555987, "learning_rate": 4.4468330006070505e-06, "loss": 0.1265, "step": 14804 }, { "epoch": 2.0905111550409488, "grad_norm": 3.096010947621436, "learning_rate": 4.445565270664184e-06, "loss": 0.154, "step": 14805 }, { "epoch": 2.0906523580909346, "grad_norm": 3.6246407079592617, "learning_rate": 4.444297669803981e-06, "loss": 0.1946, "step": 14806 }, { "epoch": 2.0907935611409205, "grad_norm": 3.1607073559508887, "learning_rate": 4.443030198055897e-06, "loss": 0.1468, "step": 14807 }, { "epoch": 2.0909347641909064, "grad_norm": 3.2154633273797657, "learning_rate": 4.441762855449395e-06, "loss": 0.1492, "step": 14808 }, { "epoch": 2.0910759672408923, "grad_norm": 3.6044325483107995, "learning_rate": 4.4404956420139245e-06, "loss": 0.1927, "step": 14809 }, { "epoch": 2.091217170290878, "grad_norm": 3.614882966736453, "learning_rate": 4.439228557778933e-06, "loss": 0.189, "step": 14810 }, { "epoch": 2.091358373340864, "grad_norm": 3.4637240785764005, "learning_rate": 4.437961602773874e-06, "loss": 0.1632, "step": 14811 }, { "epoch": 2.09149957639085, "grad_norm": 3.137579592637615, "learning_rate": 4.436694777028175e-06, "loss": 0.1579, "step": 14812 }, { "epoch": 2.091640779440836, "grad_norm": 3.01614947690494, "learning_rate": 4.435428080571293e-06, "loss": 0.159, "step": 14813 }, { "epoch": 2.0917819824908217, "grad_norm": 3.0429779982154175, "learning_rate": 4.434161513432659e-06, "loss": 0.1274, "step": 14814 }, { "epoch": 2.0919231855408076, "grad_norm": 3.2929352557122322, "learning_rate": 4.432895075641707e-06, "loss": 0.1708, "step": 14815 }, { "epoch": 2.0920643885907935, "grad_norm": 2.8386277956830988, "learning_rate": 4.431628767227869e-06, "loss": 0.1415, "step": 14816 }, { "epoch": 2.0922055916407793, "grad_norm": 3.891388677289785, "learning_rate": 4.4303625882205735e-06, "loss": 0.2128, "step": 14817 }, { "epoch": 2.0923467946907652, "grad_norm": 3.149948032572408, "learning_rate": 4.4290965386492436e-06, "loss": 0.1475, "step": 14818 }, { "epoch": 2.092487997740751, "grad_norm": 4.007698313373494, "learning_rate": 4.427830618543311e-06, "loss": 0.2441, "step": 14819 }, { "epoch": 2.092629200790737, "grad_norm": 2.92107266459875, "learning_rate": 4.426564827932185e-06, "loss": 0.1807, "step": 14820 }, { "epoch": 2.092770403840723, "grad_norm": 3.1223032818463174, "learning_rate": 4.425299166845285e-06, "loss": 0.146, "step": 14821 }, { "epoch": 2.0929116068907088, "grad_norm": 5.280147708513427, "learning_rate": 4.424033635312025e-06, "loss": 0.2332, "step": 14822 }, { "epoch": 2.0930528099406946, "grad_norm": 2.7770934091599377, "learning_rate": 4.422768233361815e-06, "loss": 0.1392, "step": 14823 }, { "epoch": 2.0931940129906805, "grad_norm": 3.6269976184711568, "learning_rate": 4.421502961024059e-06, "loss": 0.1755, "step": 14824 }, { "epoch": 2.0933352160406664, "grad_norm": 3.247149494228637, "learning_rate": 4.420237818328169e-06, "loss": 0.1663, "step": 14825 }, { "epoch": 2.0934764190906523, "grad_norm": 3.00890367077005, "learning_rate": 4.418972805303542e-06, "loss": 0.1389, "step": 14826 }, { "epoch": 2.093617622140638, "grad_norm": 3.5266573223263915, "learning_rate": 4.417707921979577e-06, "loss": 0.1949, "step": 14827 }, { "epoch": 2.093758825190624, "grad_norm": 3.2962994776832275, "learning_rate": 4.416443168385672e-06, "loss": 0.1627, "step": 14828 }, { "epoch": 2.09390002824061, "grad_norm": 3.023267164479171, "learning_rate": 4.415178544551211e-06, "loss": 0.1204, "step": 14829 }, { "epoch": 2.094041231290596, "grad_norm": 2.8526165874527636, "learning_rate": 4.413914050505591e-06, "loss": 0.1735, "step": 14830 }, { "epoch": 2.0941824343405817, "grad_norm": 3.0237715823739766, "learning_rate": 4.412649686278195e-06, "loss": 0.1606, "step": 14831 }, { "epoch": 2.0943236373905676, "grad_norm": 4.007294511241387, "learning_rate": 4.4113854518984085e-06, "loss": 0.1832, "step": 14832 }, { "epoch": 2.0944648404405535, "grad_norm": 2.979239271609002, "learning_rate": 4.410121347395612e-06, "loss": 0.1724, "step": 14833 }, { "epoch": 2.0946060434905394, "grad_norm": 3.431458698942434, "learning_rate": 4.408857372799179e-06, "loss": 0.1823, "step": 14834 }, { "epoch": 2.0947472465405252, "grad_norm": 3.110631748267666, "learning_rate": 4.4075935281384875e-06, "loss": 0.1523, "step": 14835 }, { "epoch": 2.094888449590511, "grad_norm": 3.928381714299041, "learning_rate": 4.406329813442907e-06, "loss": 0.2255, "step": 14836 }, { "epoch": 2.095029652640497, "grad_norm": 3.6633800423974474, "learning_rate": 4.405066228741805e-06, "loss": 0.1376, "step": 14837 }, { "epoch": 2.095170855690483, "grad_norm": 3.1022673119154103, "learning_rate": 4.403802774064548e-06, "loss": 0.1568, "step": 14838 }, { "epoch": 2.0953120587404688, "grad_norm": 3.3718871810380446, "learning_rate": 4.402539449440499e-06, "loss": 0.2237, "step": 14839 }, { "epoch": 2.0954532617904547, "grad_norm": 3.0438122311282623, "learning_rate": 4.401276254899014e-06, "loss": 0.1986, "step": 14840 }, { "epoch": 2.0955944648404405, "grad_norm": 3.2434081434213446, "learning_rate": 4.400013190469448e-06, "loss": 0.1328, "step": 14841 }, { "epoch": 2.0957356678904264, "grad_norm": 3.420380656620362, "learning_rate": 4.39875025618116e-06, "loss": 0.2275, "step": 14842 }, { "epoch": 2.0958768709404123, "grad_norm": 3.8499492969113374, "learning_rate": 4.397487452063498e-06, "loss": 0.2032, "step": 14843 }, { "epoch": 2.096018073990398, "grad_norm": 4.264430293136782, "learning_rate": 4.396224778145808e-06, "loss": 0.204, "step": 14844 }, { "epoch": 2.096159277040384, "grad_norm": 2.8705637589430695, "learning_rate": 4.394962234457437e-06, "loss": 0.1665, "step": 14845 }, { "epoch": 2.09630048009037, "grad_norm": 2.8925088328316715, "learning_rate": 4.393699821027716e-06, "loss": 0.1432, "step": 14846 }, { "epoch": 2.096441683140356, "grad_norm": 3.7661947691622273, "learning_rate": 4.392437537885994e-06, "loss": 0.221, "step": 14847 }, { "epoch": 2.0965828861903417, "grad_norm": 2.6518016188653877, "learning_rate": 4.391175385061601e-06, "loss": 0.124, "step": 14848 }, { "epoch": 2.0967240892403276, "grad_norm": 3.6642526872880627, "learning_rate": 4.389913362583871e-06, "loss": 0.1811, "step": 14849 }, { "epoch": 2.0968652922903135, "grad_norm": 2.8003833095445683, "learning_rate": 4.3886514704821315e-06, "loss": 0.1469, "step": 14850 }, { "epoch": 2.0970064953402994, "grad_norm": 3.6431541882073994, "learning_rate": 4.387389708785708e-06, "loss": 0.1988, "step": 14851 }, { "epoch": 2.0971476983902853, "grad_norm": 2.7586957518606057, "learning_rate": 4.386128077523923e-06, "loss": 0.1424, "step": 14852 }, { "epoch": 2.097288901440271, "grad_norm": 3.0509260388095996, "learning_rate": 4.384866576726099e-06, "loss": 0.1801, "step": 14853 }, { "epoch": 2.097430104490257, "grad_norm": 3.172895054998916, "learning_rate": 4.383605206421549e-06, "loss": 0.1813, "step": 14854 }, { "epoch": 2.097571307540243, "grad_norm": 4.007224745658212, "learning_rate": 4.3823439666395895e-06, "loss": 0.2486, "step": 14855 }, { "epoch": 2.097712510590229, "grad_norm": 3.6581780512053528, "learning_rate": 4.38108285740953e-06, "loss": 0.1671, "step": 14856 }, { "epoch": 2.0978537136402147, "grad_norm": 2.9558809374791815, "learning_rate": 4.379821878760679e-06, "loss": 0.1469, "step": 14857 }, { "epoch": 2.0979949166902006, "grad_norm": 3.24766558221534, "learning_rate": 4.378561030722335e-06, "loss": 0.1834, "step": 14858 }, { "epoch": 2.0981361197401864, "grad_norm": 3.3788336628057953, "learning_rate": 4.377300313323809e-06, "loss": 0.1685, "step": 14859 }, { "epoch": 2.0982773227901723, "grad_norm": 3.257120797683473, "learning_rate": 4.3760397265943965e-06, "loss": 0.1278, "step": 14860 }, { "epoch": 2.098418525840158, "grad_norm": 2.3337844707372866, "learning_rate": 4.374779270563391e-06, "loss": 0.1428, "step": 14861 }, { "epoch": 2.098559728890144, "grad_norm": 3.3899738293785577, "learning_rate": 4.37351894526009e-06, "loss": 0.1786, "step": 14862 }, { "epoch": 2.09870093194013, "grad_norm": 3.231098927035343, "learning_rate": 4.372258750713771e-06, "loss": 0.1892, "step": 14863 }, { "epoch": 2.098842134990116, "grad_norm": 2.9858132781219417, "learning_rate": 4.370998686953733e-06, "loss": 0.1494, "step": 14864 }, { "epoch": 2.0989833380401017, "grad_norm": 3.6665589280435946, "learning_rate": 4.369738754009253e-06, "loss": 0.2189, "step": 14865 }, { "epoch": 2.0991245410900876, "grad_norm": 3.6455033138250363, "learning_rate": 4.368478951909614e-06, "loss": 0.1603, "step": 14866 }, { "epoch": 2.0992657441400735, "grad_norm": 2.7809100450445143, "learning_rate": 4.367219280684091e-06, "loss": 0.1023, "step": 14867 }, { "epoch": 2.0994069471900594, "grad_norm": 2.351442099352178, "learning_rate": 4.36595974036196e-06, "loss": 0.1016, "step": 14868 }, { "epoch": 2.0995481502400453, "grad_norm": 2.6976451267489554, "learning_rate": 4.364700330972492e-06, "loss": 0.1464, "step": 14869 }, { "epoch": 2.099689353290031, "grad_norm": 3.8132638017116554, "learning_rate": 4.363441052544953e-06, "loss": 0.2113, "step": 14870 }, { "epoch": 2.099830556340017, "grad_norm": 2.6185184978354594, "learning_rate": 4.362181905108611e-06, "loss": 0.1433, "step": 14871 }, { "epoch": 2.099971759390003, "grad_norm": 3.364107867167429, "learning_rate": 4.3609228886927265e-06, "loss": 0.1962, "step": 14872 }, { "epoch": 2.100112962439989, "grad_norm": 2.946728253642788, "learning_rate": 4.359664003326559e-06, "loss": 0.1586, "step": 14873 }, { "epoch": 2.1002541654899747, "grad_norm": 2.934823074906148, "learning_rate": 4.3584052490393645e-06, "loss": 0.1941, "step": 14874 }, { "epoch": 2.1003953685399606, "grad_norm": 3.410164330629009, "learning_rate": 4.357146625860391e-06, "loss": 0.2351, "step": 14875 }, { "epoch": 2.1005365715899464, "grad_norm": 2.6355142674112537, "learning_rate": 4.355888133818897e-06, "loss": 0.1658, "step": 14876 }, { "epoch": 2.1006777746399323, "grad_norm": 2.7918339739886995, "learning_rate": 4.3546297729441256e-06, "loss": 0.1865, "step": 14877 }, { "epoch": 2.100818977689918, "grad_norm": 3.226498127834616, "learning_rate": 4.35337154326532e-06, "loss": 0.1775, "step": 14878 }, { "epoch": 2.100960180739904, "grad_norm": 3.115239480675422, "learning_rate": 4.352113444811724e-06, "loss": 0.1736, "step": 14879 }, { "epoch": 2.10110138378989, "grad_norm": 2.9237773285505453, "learning_rate": 4.350855477612565e-06, "loss": 0.1336, "step": 14880 }, { "epoch": 2.101242586839876, "grad_norm": 3.059546421125386, "learning_rate": 4.349597641697091e-06, "loss": 0.1503, "step": 14881 }, { "epoch": 2.1013837898898617, "grad_norm": 2.3377603270119094, "learning_rate": 4.348339937094525e-06, "loss": 0.1294, "step": 14882 }, { "epoch": 2.1015249929398476, "grad_norm": 2.6550347434385797, "learning_rate": 4.3470823638340995e-06, "loss": 0.1264, "step": 14883 }, { "epoch": 2.1016661959898335, "grad_norm": 2.861419878098478, "learning_rate": 4.345824921945039e-06, "loss": 0.167, "step": 14884 }, { "epoch": 2.1018073990398194, "grad_norm": 3.063558956679336, "learning_rate": 4.344567611456564e-06, "loss": 0.1662, "step": 14885 }, { "epoch": 2.1019486020898053, "grad_norm": 3.1617001351913863, "learning_rate": 4.3433104323978956e-06, "loss": 0.1501, "step": 14886 }, { "epoch": 2.102089805139791, "grad_norm": 3.490238549557351, "learning_rate": 4.34205338479825e-06, "loss": 0.1853, "step": 14887 }, { "epoch": 2.102231008189777, "grad_norm": 2.7021273634409178, "learning_rate": 4.340796468686841e-06, "loss": 0.1674, "step": 14888 }, { "epoch": 2.102372211239763, "grad_norm": 2.930307317253397, "learning_rate": 4.339539684092877e-06, "loss": 0.1453, "step": 14889 }, { "epoch": 2.102513414289749, "grad_norm": 3.005669476133508, "learning_rate": 4.338283031045567e-06, "loss": 0.1604, "step": 14890 }, { "epoch": 2.1026546173397347, "grad_norm": 3.2647644071534936, "learning_rate": 4.3370265095741135e-06, "loss": 0.1643, "step": 14891 }, { "epoch": 2.1027958203897206, "grad_norm": 2.604405240960576, "learning_rate": 4.335770119707715e-06, "loss": 0.1582, "step": 14892 }, { "epoch": 2.1029370234397065, "grad_norm": 3.310422443778681, "learning_rate": 4.334513861475577e-06, "loss": 0.1709, "step": 14893 }, { "epoch": 2.1030782264896923, "grad_norm": 3.185655743344321, "learning_rate": 4.333257734906889e-06, "loss": 0.1691, "step": 14894 }, { "epoch": 2.103219429539678, "grad_norm": 3.6704081750717714, "learning_rate": 4.332001740030849e-06, "loss": 0.1699, "step": 14895 }, { "epoch": 2.103360632589664, "grad_norm": 3.7543527398896357, "learning_rate": 4.330745876876635e-06, "loss": 0.1687, "step": 14896 }, { "epoch": 2.10350183563965, "grad_norm": 2.962924753903919, "learning_rate": 4.3294901454734405e-06, "loss": 0.147, "step": 14897 }, { "epoch": 2.103643038689636, "grad_norm": 3.242798923557571, "learning_rate": 4.328234545850441e-06, "loss": 0.1652, "step": 14898 }, { "epoch": 2.1037842417396218, "grad_norm": 2.9847366424919306, "learning_rate": 4.3269790780368256e-06, "loss": 0.1601, "step": 14899 }, { "epoch": 2.1039254447896076, "grad_norm": 3.922785570697993, "learning_rate": 4.325723742061767e-06, "loss": 0.1891, "step": 14900 }, { "epoch": 2.1040666478395935, "grad_norm": 2.3151654861922037, "learning_rate": 4.324468537954437e-06, "loss": 0.1572, "step": 14901 }, { "epoch": 2.1042078508895794, "grad_norm": 3.123206609118541, "learning_rate": 4.323213465744007e-06, "loss": 0.1504, "step": 14902 }, { "epoch": 2.1043490539395653, "grad_norm": 2.90514734394011, "learning_rate": 4.3219585254596455e-06, "loss": 0.1532, "step": 14903 }, { "epoch": 2.104490256989551, "grad_norm": 3.027450803759767, "learning_rate": 4.320703717130516e-06, "loss": 0.1432, "step": 14904 }, { "epoch": 2.104631460039537, "grad_norm": 2.91162423138713, "learning_rate": 4.319449040785778e-06, "loss": 0.1391, "step": 14905 }, { "epoch": 2.104772663089523, "grad_norm": 3.483731175370125, "learning_rate": 4.3181944964545915e-06, "loss": 0.1515, "step": 14906 }, { "epoch": 2.104913866139509, "grad_norm": 3.0444545337867486, "learning_rate": 4.316940084166111e-06, "loss": 0.1456, "step": 14907 }, { "epoch": 2.1050550691894947, "grad_norm": 4.30507932861241, "learning_rate": 4.3156858039494895e-06, "loss": 0.2329, "step": 14908 }, { "epoch": 2.1051962722394806, "grad_norm": 3.0450061824220658, "learning_rate": 4.314431655833874e-06, "loss": 0.1381, "step": 14909 }, { "epoch": 2.1053374752894665, "grad_norm": 3.199023202258202, "learning_rate": 4.313177639848408e-06, "loss": 0.1909, "step": 14910 }, { "epoch": 2.105478678339452, "grad_norm": 3.2879930424187966, "learning_rate": 4.311923756022243e-06, "loss": 0.1913, "step": 14911 }, { "epoch": 2.105619881389438, "grad_norm": 2.9228811658155567, "learning_rate": 4.310670004384515e-06, "loss": 0.1544, "step": 14912 }, { "epoch": 2.1057610844394237, "grad_norm": 2.9836705655645295, "learning_rate": 4.309416384964355e-06, "loss": 0.1729, "step": 14913 }, { "epoch": 2.1059022874894096, "grad_norm": 3.0379705858112023, "learning_rate": 4.308162897790903e-06, "loss": 0.1581, "step": 14914 }, { "epoch": 2.1060434905393954, "grad_norm": 4.138632277129836, "learning_rate": 4.306909542893281e-06, "loss": 0.1813, "step": 14915 }, { "epoch": 2.1061846935893813, "grad_norm": 3.0170395322653603, "learning_rate": 4.305656320300628e-06, "loss": 0.15, "step": 14916 }, { "epoch": 2.106325896639367, "grad_norm": 3.1018977039841054, "learning_rate": 4.304403230042063e-06, "loss": 0.1769, "step": 14917 }, { "epoch": 2.106467099689353, "grad_norm": 3.4307746281307425, "learning_rate": 4.303150272146706e-06, "loss": 0.1605, "step": 14918 }, { "epoch": 2.106608302739339, "grad_norm": 3.9431462181931334, "learning_rate": 4.301897446643677e-06, "loss": 0.1555, "step": 14919 }, { "epoch": 2.106749505789325, "grad_norm": 2.92845224895522, "learning_rate": 4.30064475356209e-06, "loss": 0.1375, "step": 14920 }, { "epoch": 2.1068907088393107, "grad_norm": 2.9199468572958955, "learning_rate": 4.299392192931058e-06, "loss": 0.1769, "step": 14921 }, { "epoch": 2.1070319118892966, "grad_norm": 3.0808912821237397, "learning_rate": 4.29813976477969e-06, "loss": 0.1201, "step": 14922 }, { "epoch": 2.1071731149392825, "grad_norm": 2.665509370044914, "learning_rate": 4.296887469137091e-06, "loss": 0.1131, "step": 14923 }, { "epoch": 2.1073143179892684, "grad_norm": 3.362935102608214, "learning_rate": 4.295635306032364e-06, "loss": 0.149, "step": 14924 }, { "epoch": 2.1074555210392543, "grad_norm": 3.3016930406371747, "learning_rate": 4.294383275494609e-06, "loss": 0.1592, "step": 14925 }, { "epoch": 2.10759672408924, "grad_norm": 2.9915239868989407, "learning_rate": 4.293131377552923e-06, "loss": 0.1522, "step": 14926 }, { "epoch": 2.107737927139226, "grad_norm": 2.840127563091646, "learning_rate": 4.291879612236395e-06, "loss": 0.135, "step": 14927 }, { "epoch": 2.107879130189212, "grad_norm": 2.873609055645654, "learning_rate": 4.290627979574123e-06, "loss": 0.1155, "step": 14928 }, { "epoch": 2.108020333239198, "grad_norm": 3.3994236831957227, "learning_rate": 4.2893764795951955e-06, "loss": 0.1676, "step": 14929 }, { "epoch": 2.1081615362891837, "grad_norm": 3.370001710807155, "learning_rate": 4.2881251123286896e-06, "loss": 0.1466, "step": 14930 }, { "epoch": 2.1083027393391696, "grad_norm": 2.9610430277388473, "learning_rate": 4.286873877803688e-06, "loss": 0.1599, "step": 14931 }, { "epoch": 2.1084439423891554, "grad_norm": 3.5469329957362827, "learning_rate": 4.2856227760492665e-06, "loss": 0.1857, "step": 14932 }, { "epoch": 2.1085851454391413, "grad_norm": 2.967649905093386, "learning_rate": 4.284371807094508e-06, "loss": 0.1793, "step": 14933 }, { "epoch": 2.108726348489127, "grad_norm": 3.2228400753042754, "learning_rate": 4.28312097096848e-06, "loss": 0.148, "step": 14934 }, { "epoch": 2.108867551539113, "grad_norm": 3.2894830449048285, "learning_rate": 4.281870267700251e-06, "loss": 0.1468, "step": 14935 }, { "epoch": 2.109008754589099, "grad_norm": 3.161925060626662, "learning_rate": 4.280619697318888e-06, "loss": 0.1611, "step": 14936 }, { "epoch": 2.109149957639085, "grad_norm": 2.6948737421878426, "learning_rate": 4.279369259853454e-06, "loss": 0.1271, "step": 14937 }, { "epoch": 2.1092911606890707, "grad_norm": 3.1466285862507744, "learning_rate": 4.278118955333007e-06, "loss": 0.1292, "step": 14938 }, { "epoch": 2.1094323637390566, "grad_norm": 2.705543119220257, "learning_rate": 4.276868783786605e-06, "loss": 0.1409, "step": 14939 }, { "epoch": 2.1095735667890425, "grad_norm": 2.3343913097812687, "learning_rate": 4.275618745243301e-06, "loss": 0.1384, "step": 14940 }, { "epoch": 2.1097147698390284, "grad_norm": 3.334124086375242, "learning_rate": 4.274368839732145e-06, "loss": 0.1806, "step": 14941 }, { "epoch": 2.1098559728890143, "grad_norm": 3.0754218680604644, "learning_rate": 4.273119067282184e-06, "loss": 0.1915, "step": 14942 }, { "epoch": 2.109997175939, "grad_norm": 3.9666483931710657, "learning_rate": 4.271869427922463e-06, "loss": 0.1803, "step": 14943 }, { "epoch": 2.110138378988986, "grad_norm": 3.050769715090813, "learning_rate": 4.270619921682019e-06, "loss": 0.1349, "step": 14944 }, { "epoch": 2.110279582038972, "grad_norm": 3.7490523746071527, "learning_rate": 4.269370548589897e-06, "loss": 0.1692, "step": 14945 }, { "epoch": 2.110420785088958, "grad_norm": 3.9353258016788835, "learning_rate": 4.268121308675132e-06, "loss": 0.2086, "step": 14946 }, { "epoch": 2.1105619881389437, "grad_norm": 3.6957392876328434, "learning_rate": 4.26687220196675e-06, "loss": 0.1764, "step": 14947 }, { "epoch": 2.1107031911889296, "grad_norm": 2.8877215962754694, "learning_rate": 4.265623228493781e-06, "loss": 0.1579, "step": 14948 }, { "epoch": 2.1108443942389155, "grad_norm": 3.1526232535011043, "learning_rate": 4.2643743882852486e-06, "loss": 0.1621, "step": 14949 }, { "epoch": 2.1109855972889013, "grad_norm": 4.676465827580886, "learning_rate": 4.2631256813701815e-06, "loss": 0.2213, "step": 14950 }, { "epoch": 2.111126800338887, "grad_norm": 3.859406800222345, "learning_rate": 4.261877107777596e-06, "loss": 0.195, "step": 14951 }, { "epoch": 2.111268003388873, "grad_norm": 3.1723621501857258, "learning_rate": 4.260628667536508e-06, "loss": 0.1452, "step": 14952 }, { "epoch": 2.111409206438859, "grad_norm": 3.3518591906624216, "learning_rate": 4.2593803606759324e-06, "loss": 0.1888, "step": 14953 }, { "epoch": 2.111550409488845, "grad_norm": 2.8849255309611395, "learning_rate": 4.258132187224877e-06, "loss": 0.1738, "step": 14954 }, { "epoch": 2.1116916125388308, "grad_norm": 2.5035119889991355, "learning_rate": 4.256884147212351e-06, "loss": 0.1498, "step": 14955 }, { "epoch": 2.1118328155888166, "grad_norm": 3.3491211750099006, "learning_rate": 4.255636240667356e-06, "loss": 0.1464, "step": 14956 }, { "epoch": 2.1119740186388025, "grad_norm": 2.7697017372059203, "learning_rate": 4.254388467618894e-06, "loss": 0.1625, "step": 14957 }, { "epoch": 2.1121152216887884, "grad_norm": 3.3454954291310934, "learning_rate": 4.253140828095964e-06, "loss": 0.1806, "step": 14958 }, { "epoch": 2.1122564247387743, "grad_norm": 2.4364290655683827, "learning_rate": 4.251893322127558e-06, "loss": 0.11, "step": 14959 }, { "epoch": 2.11239762778876, "grad_norm": 2.5987351444910862, "learning_rate": 4.2506459497426685e-06, "loss": 0.0881, "step": 14960 }, { "epoch": 2.112538830838746, "grad_norm": 3.450966224988919, "learning_rate": 4.2493987109702814e-06, "loss": 0.2145, "step": 14961 }, { "epoch": 2.112680033888732, "grad_norm": 3.84106334736533, "learning_rate": 4.2481516058393876e-06, "loss": 0.202, "step": 14962 }, { "epoch": 2.112821236938718, "grad_norm": 3.1324661551584696, "learning_rate": 4.2469046343789715e-06, "loss": 0.1658, "step": 14963 }, { "epoch": 2.1129624399887037, "grad_norm": 3.216044549219269, "learning_rate": 4.2456577966180025e-06, "loss": 0.1666, "step": 14964 }, { "epoch": 2.1131036430386896, "grad_norm": 2.805378137647697, "learning_rate": 4.244411092585461e-06, "loss": 0.1733, "step": 14965 }, { "epoch": 2.1132448460886755, "grad_norm": 3.1503686632829333, "learning_rate": 4.243164522310317e-06, "loss": 0.1577, "step": 14966 }, { "epoch": 2.1133860491386613, "grad_norm": 3.94323185859925, "learning_rate": 4.241918085821547e-06, "loss": 0.2141, "step": 14967 }, { "epoch": 2.1135272521886472, "grad_norm": 2.9421215266790983, "learning_rate": 4.240671783148114e-06, "loss": 0.1542, "step": 14968 }, { "epoch": 2.113668455238633, "grad_norm": 3.149453921887369, "learning_rate": 4.23942561431898e-06, "loss": 0.1679, "step": 14969 }, { "epoch": 2.113809658288619, "grad_norm": 3.0518665916513803, "learning_rate": 4.238179579363109e-06, "loss": 0.1533, "step": 14970 }, { "epoch": 2.113950861338605, "grad_norm": 2.8931169273462567, "learning_rate": 4.236933678309455e-06, "loss": 0.1619, "step": 14971 }, { "epoch": 2.1140920643885908, "grad_norm": 3.252152318483666, "learning_rate": 4.235687911186974e-06, "loss": 0.2056, "step": 14972 }, { "epoch": 2.1142332674385766, "grad_norm": 2.3990781208039205, "learning_rate": 4.234442278024616e-06, "loss": 0.1619, "step": 14973 }, { "epoch": 2.1143744704885625, "grad_norm": 3.244987205817203, "learning_rate": 4.2331967788513295e-06, "loss": 0.1609, "step": 14974 }, { "epoch": 2.1145156735385484, "grad_norm": 2.9794194675563523, "learning_rate": 4.2319514136960605e-06, "loss": 0.1379, "step": 14975 }, { "epoch": 2.1146568765885343, "grad_norm": 3.0139823547475566, "learning_rate": 4.230706182587748e-06, "loss": 0.1268, "step": 14976 }, { "epoch": 2.11479807963852, "grad_norm": 2.804843237428806, "learning_rate": 4.229461085555333e-06, "loss": 0.1767, "step": 14977 }, { "epoch": 2.114939282688506, "grad_norm": 3.059740113459109, "learning_rate": 4.228216122627747e-06, "loss": 0.175, "step": 14978 }, { "epoch": 2.115080485738492, "grad_norm": 2.9609526234077954, "learning_rate": 4.226971293833929e-06, "loss": 0.1496, "step": 14979 }, { "epoch": 2.115221688788478, "grad_norm": 3.4798730569758223, "learning_rate": 4.225726599202808e-06, "loss": 0.1907, "step": 14980 }, { "epoch": 2.1153628918384637, "grad_norm": 4.08775318922559, "learning_rate": 4.224482038763305e-06, "loss": 0.1791, "step": 14981 }, { "epoch": 2.1155040948884496, "grad_norm": 2.531328177773969, "learning_rate": 4.223237612544344e-06, "loss": 0.1445, "step": 14982 }, { "epoch": 2.1156452979384355, "grad_norm": 3.329116697563938, "learning_rate": 4.221993320574842e-06, "loss": 0.1793, "step": 14983 }, { "epoch": 2.1157865009884214, "grad_norm": 4.046029421308103, "learning_rate": 4.220749162883725e-06, "loss": 0.1678, "step": 14984 }, { "epoch": 2.1159277040384072, "grad_norm": 3.380780169913058, "learning_rate": 4.219505139499901e-06, "loss": 0.1956, "step": 14985 }, { "epoch": 2.116068907088393, "grad_norm": 3.8856173836564283, "learning_rate": 4.218261250452281e-06, "loss": 0.1783, "step": 14986 }, { "epoch": 2.116210110138379, "grad_norm": 3.0188358575181935, "learning_rate": 4.217017495769772e-06, "loss": 0.1295, "step": 14987 }, { "epoch": 2.116351313188365, "grad_norm": 3.9108532376077645, "learning_rate": 4.21577387548128e-06, "loss": 0.2092, "step": 14988 }, { "epoch": 2.1164925162383508, "grad_norm": 3.646000478123008, "learning_rate": 4.214530389615704e-06, "loss": 0.1845, "step": 14989 }, { "epoch": 2.1166337192883367, "grad_norm": 2.9919507888537296, "learning_rate": 4.213287038201943e-06, "loss": 0.153, "step": 14990 }, { "epoch": 2.1167749223383225, "grad_norm": 2.604114597330168, "learning_rate": 4.212043821268893e-06, "loss": 0.1773, "step": 14991 }, { "epoch": 2.1169161253883084, "grad_norm": 3.0187264274322363, "learning_rate": 4.210800738845445e-06, "loss": 0.1622, "step": 14992 }, { "epoch": 2.1170573284382943, "grad_norm": 3.8901259044929426, "learning_rate": 4.209557790960488e-06, "loss": 0.2034, "step": 14993 }, { "epoch": 2.11719853148828, "grad_norm": 3.078774077890639, "learning_rate": 4.208314977642907e-06, "loss": 0.1576, "step": 14994 }, { "epoch": 2.117339734538266, "grad_norm": 2.4090286591856875, "learning_rate": 4.20707229892158e-06, "loss": 0.1468, "step": 14995 }, { "epoch": 2.117480937588252, "grad_norm": 2.645305681220911, "learning_rate": 4.205829754825396e-06, "loss": 0.1368, "step": 14996 }, { "epoch": 2.117622140638238, "grad_norm": 3.686023318559023, "learning_rate": 4.2045873453832296e-06, "loss": 0.2151, "step": 14997 }, { "epoch": 2.1177633436882237, "grad_norm": 3.4230883998239032, "learning_rate": 4.203345070623947e-06, "loss": 0.1252, "step": 14998 }, { "epoch": 2.1179045467382096, "grad_norm": 3.877781277386026, "learning_rate": 4.2021029305764205e-06, "loss": 0.2358, "step": 14999 }, { "epoch": 2.1180457497881955, "grad_norm": 2.5377684155300217, "learning_rate": 4.200860925269519e-06, "loss": 0.1364, "step": 15000 }, { "epoch": 2.1181869528381814, "grad_norm": 3.8775433759596556, "learning_rate": 4.1996190547321e-06, "loss": 0.2193, "step": 15001 }, { "epoch": 2.1183281558881673, "grad_norm": 3.375783433924981, "learning_rate": 4.198377318993035e-06, "loss": 0.1452, "step": 15002 }, { "epoch": 2.118469358938153, "grad_norm": 2.897557966792658, "learning_rate": 4.197135718081173e-06, "loss": 0.1449, "step": 15003 }, { "epoch": 2.118610561988139, "grad_norm": 2.8229348090093884, "learning_rate": 4.1958942520253735e-06, "loss": 0.1114, "step": 15004 }, { "epoch": 2.118751765038125, "grad_norm": 2.7829848479897947, "learning_rate": 4.194652920854483e-06, "loss": 0.1139, "step": 15005 }, { "epoch": 2.118892968088111, "grad_norm": 3.3053893218495665, "learning_rate": 4.193411724597352e-06, "loss": 0.1685, "step": 15006 }, { "epoch": 2.1190341711380967, "grad_norm": 3.7341367839155617, "learning_rate": 4.192170663282825e-06, "loss": 0.2181, "step": 15007 }, { "epoch": 2.1191753741880826, "grad_norm": 3.539189794135913, "learning_rate": 4.1909297369397435e-06, "loss": 0.15, "step": 15008 }, { "epoch": 2.1193165772380684, "grad_norm": 3.040661257208769, "learning_rate": 4.189688945596947e-06, "loss": 0.1297, "step": 15009 }, { "epoch": 2.1194577802880543, "grad_norm": 3.3813302582229157, "learning_rate": 4.188448289283269e-06, "loss": 0.1766, "step": 15010 }, { "epoch": 2.11959898333804, "grad_norm": 2.9788250801288165, "learning_rate": 4.1872077680275435e-06, "loss": 0.1397, "step": 15011 }, { "epoch": 2.119740186388026, "grad_norm": 3.9348054053650805, "learning_rate": 4.185967381858599e-06, "loss": 0.202, "step": 15012 }, { "epoch": 2.119881389438012, "grad_norm": 2.7323535036660296, "learning_rate": 4.184727130805258e-06, "loss": 0.1083, "step": 15013 }, { "epoch": 2.120022592487998, "grad_norm": 3.175895298213388, "learning_rate": 4.183487014896354e-06, "loss": 0.1768, "step": 15014 }, { "epoch": 2.1201637955379837, "grad_norm": 2.567138502671909, "learning_rate": 4.182247034160697e-06, "loss": 0.1465, "step": 15015 }, { "epoch": 2.1203049985879696, "grad_norm": 3.3725784453691423, "learning_rate": 4.1810071886271065e-06, "loss": 0.1824, "step": 15016 }, { "epoch": 2.1204462016379555, "grad_norm": 3.465111551640221, "learning_rate": 4.179767478324394e-06, "loss": 0.1577, "step": 15017 }, { "epoch": 2.1205874046879414, "grad_norm": 3.4411676313966617, "learning_rate": 4.178527903281371e-06, "loss": 0.1754, "step": 15018 }, { "epoch": 2.1207286077379273, "grad_norm": 2.9355279210236565, "learning_rate": 4.177288463526848e-06, "loss": 0.1678, "step": 15019 }, { "epoch": 2.120869810787913, "grad_norm": 3.1830502892892225, "learning_rate": 4.176049159089626e-06, "loss": 0.1833, "step": 15020 }, { "epoch": 2.121011013837899, "grad_norm": 3.7100785031135572, "learning_rate": 4.174809989998506e-06, "loss": 0.2061, "step": 15021 }, { "epoch": 2.121152216887885, "grad_norm": 3.020319444234034, "learning_rate": 4.173570956282286e-06, "loss": 0.1582, "step": 15022 }, { "epoch": 2.121293419937871, "grad_norm": 3.2570410794452416, "learning_rate": 4.172332057969762e-06, "loss": 0.1614, "step": 15023 }, { "epoch": 2.1214346229878567, "grad_norm": 3.421671625015669, "learning_rate": 4.171093295089723e-06, "loss": 0.1873, "step": 15024 }, { "epoch": 2.1215758260378426, "grad_norm": 3.6725136520261064, "learning_rate": 4.169854667670958e-06, "loss": 0.2005, "step": 15025 }, { "epoch": 2.1217170290878284, "grad_norm": 2.8740106213502954, "learning_rate": 4.168616175742255e-06, "loss": 0.1577, "step": 15026 }, { "epoch": 2.1218582321378143, "grad_norm": 3.39007598518118, "learning_rate": 4.167377819332392e-06, "loss": 0.1669, "step": 15027 }, { "epoch": 2.1219994351878, "grad_norm": 3.8893944417913446, "learning_rate": 4.1661395984701495e-06, "loss": 0.1527, "step": 15028 }, { "epoch": 2.122140638237786, "grad_norm": 3.6523311713010664, "learning_rate": 4.164901513184304e-06, "loss": 0.1689, "step": 15029 }, { "epoch": 2.122281841287772, "grad_norm": 3.2139326574200013, "learning_rate": 4.1636635635036235e-06, "loss": 0.157, "step": 15030 }, { "epoch": 2.122423044337758, "grad_norm": 3.7698136708259082, "learning_rate": 4.162425749456889e-06, "loss": 0.1798, "step": 15031 }, { "epoch": 2.1225642473877437, "grad_norm": 3.2163990018433624, "learning_rate": 4.161188071072854e-06, "loss": 0.136, "step": 15032 }, { "epoch": 2.1227054504377296, "grad_norm": 3.1792484676247117, "learning_rate": 4.159950528380287e-06, "loss": 0.1538, "step": 15033 }, { "epoch": 2.1228466534877155, "grad_norm": 3.905499515906006, "learning_rate": 4.158713121407949e-06, "loss": 0.1653, "step": 15034 }, { "epoch": 2.1229878565377014, "grad_norm": 3.564224959407462, "learning_rate": 4.15747585018459e-06, "loss": 0.1676, "step": 15035 }, { "epoch": 2.1231290595876873, "grad_norm": 3.748945262770402, "learning_rate": 4.156238714738974e-06, "loss": 0.1843, "step": 15036 }, { "epoch": 2.123270262637673, "grad_norm": 2.570973310936118, "learning_rate": 4.155001715099845e-06, "loss": 0.1268, "step": 15037 }, { "epoch": 2.123411465687659, "grad_norm": 3.7183523154949634, "learning_rate": 4.153764851295954e-06, "loss": 0.2035, "step": 15038 }, { "epoch": 2.123552668737645, "grad_norm": 4.096832486503096, "learning_rate": 4.152528123356042e-06, "loss": 0.2187, "step": 15039 }, { "epoch": 2.123693871787631, "grad_norm": 2.9425809624842056, "learning_rate": 4.1512915313088505e-06, "loss": 0.1863, "step": 15040 }, { "epoch": 2.1238350748376167, "grad_norm": 3.197391533332398, "learning_rate": 4.150055075183119e-06, "loss": 0.1897, "step": 15041 }, { "epoch": 2.1239762778876026, "grad_norm": 3.295176317437243, "learning_rate": 4.148818755007581e-06, "loss": 0.1715, "step": 15042 }, { "epoch": 2.1241174809375885, "grad_norm": 4.422025202361986, "learning_rate": 4.147582570810967e-06, "loss": 0.2516, "step": 15043 }, { "epoch": 2.1242586839875743, "grad_norm": 3.0766039904430182, "learning_rate": 4.146346522622008e-06, "loss": 0.1411, "step": 15044 }, { "epoch": 2.12439988703756, "grad_norm": 3.5462547076011304, "learning_rate": 4.145110610469427e-06, "loss": 0.196, "step": 15045 }, { "epoch": 2.124541090087546, "grad_norm": 3.2665062274682795, "learning_rate": 4.143874834381947e-06, "loss": 0.1662, "step": 15046 }, { "epoch": 2.124682293137532, "grad_norm": 3.357493733909517, "learning_rate": 4.142639194388284e-06, "loss": 0.1582, "step": 15047 }, { "epoch": 2.124823496187518, "grad_norm": 2.9172112252489337, "learning_rate": 4.141403690517163e-06, "loss": 0.1517, "step": 15048 }, { "epoch": 2.1249646992375038, "grad_norm": 3.5797723630560627, "learning_rate": 4.1401683227972865e-06, "loss": 0.1704, "step": 15049 }, { "epoch": 2.1251059022874896, "grad_norm": 3.0585148957457364, "learning_rate": 4.138933091257368e-06, "loss": 0.1597, "step": 15050 }, { "epoch": 2.1252471053374755, "grad_norm": 3.0791653110915425, "learning_rate": 4.137697995926112e-06, "loss": 0.1569, "step": 15051 }, { "epoch": 2.1253883083874614, "grad_norm": 3.7476913945567203, "learning_rate": 4.13646303683222e-06, "loss": 0.154, "step": 15052 }, { "epoch": 2.1255295114374473, "grad_norm": 3.466153510014836, "learning_rate": 4.1352282140043985e-06, "loss": 0.1559, "step": 15053 }, { "epoch": 2.1256707144874327, "grad_norm": 4.193919735679608, "learning_rate": 4.1339935274713404e-06, "loss": 0.2095, "step": 15054 }, { "epoch": 2.1258119175374186, "grad_norm": 3.5933205070182455, "learning_rate": 4.132758977261739e-06, "loss": 0.1547, "step": 15055 }, { "epoch": 2.1259531205874045, "grad_norm": 2.6516630399879015, "learning_rate": 4.13152456340429e-06, "loss": 0.1351, "step": 15056 }, { "epoch": 2.1260943236373904, "grad_norm": 2.89272810062591, "learning_rate": 4.130290285927667e-06, "loss": 0.1706, "step": 15057 }, { "epoch": 2.1262355266873763, "grad_norm": 3.3785242585665487, "learning_rate": 4.129056144860567e-06, "loss": 0.1747, "step": 15058 }, { "epoch": 2.126376729737362, "grad_norm": 3.135433627434231, "learning_rate": 4.127822140231668e-06, "loss": 0.1703, "step": 15059 }, { "epoch": 2.126517932787348, "grad_norm": 3.540149061212886, "learning_rate": 4.126588272069645e-06, "loss": 0.2135, "step": 15060 }, { "epoch": 2.126659135837334, "grad_norm": 2.9357028330020496, "learning_rate": 4.1253545404031735e-06, "loss": 0.1514, "step": 15061 }, { "epoch": 2.12680033888732, "grad_norm": 3.1794316391888873, "learning_rate": 4.124120945260927e-06, "loss": 0.1303, "step": 15062 }, { "epoch": 2.1269415419373057, "grad_norm": 3.8592275483359657, "learning_rate": 4.1228874866715706e-06, "loss": 0.1838, "step": 15063 }, { "epoch": 2.1270827449872916, "grad_norm": 3.052003553252904, "learning_rate": 4.121654164663769e-06, "loss": 0.156, "step": 15064 }, { "epoch": 2.1272239480372774, "grad_norm": 2.7289109250999997, "learning_rate": 4.120420979266192e-06, "loss": 0.1304, "step": 15065 }, { "epoch": 2.1273651510872633, "grad_norm": 3.4764045042754734, "learning_rate": 4.119187930507489e-06, "loss": 0.1698, "step": 15066 }, { "epoch": 2.127506354137249, "grad_norm": 3.3630868009356494, "learning_rate": 4.117955018416319e-06, "loss": 0.1671, "step": 15067 }, { "epoch": 2.127647557187235, "grad_norm": 3.5640522510184, "learning_rate": 4.116722243021333e-06, "loss": 0.1946, "step": 15068 }, { "epoch": 2.127788760237221, "grad_norm": 4.219082572583667, "learning_rate": 4.115489604351178e-06, "loss": 0.1897, "step": 15069 }, { "epoch": 2.127929963287207, "grad_norm": 3.3782169472236965, "learning_rate": 4.114257102434508e-06, "loss": 0.1728, "step": 15070 }, { "epoch": 2.1280711663371927, "grad_norm": 3.0814984795076126, "learning_rate": 4.1130247372999595e-06, "loss": 0.1403, "step": 15071 }, { "epoch": 2.1282123693871786, "grad_norm": 3.2247922281572863, "learning_rate": 4.111792508976175e-06, "loss": 0.1121, "step": 15072 }, { "epoch": 2.1283535724371645, "grad_norm": 3.73470675850116, "learning_rate": 4.110560417491792e-06, "loss": 0.1547, "step": 15073 }, { "epoch": 2.1284947754871504, "grad_norm": 3.441965473210724, "learning_rate": 4.1093284628754355e-06, "loss": 0.1731, "step": 15074 }, { "epoch": 2.1286359785371363, "grad_norm": 3.1088202238575704, "learning_rate": 4.108096645155745e-06, "loss": 0.1529, "step": 15075 }, { "epoch": 2.128777181587122, "grad_norm": 5.926867882436746, "learning_rate": 4.106864964361343e-06, "loss": 0.2199, "step": 15076 }, { "epoch": 2.128918384637108, "grad_norm": 3.3538342038355853, "learning_rate": 4.105633420520856e-06, "loss": 0.1979, "step": 15077 }, { "epoch": 2.129059587687094, "grad_norm": 3.33164812848948, "learning_rate": 4.104402013662901e-06, "loss": 0.1805, "step": 15078 }, { "epoch": 2.12920079073708, "grad_norm": 2.591249641717634, "learning_rate": 4.103170743816097e-06, "loss": 0.1485, "step": 15079 }, { "epoch": 2.1293419937870657, "grad_norm": 3.8147083144932705, "learning_rate": 4.101939611009059e-06, "loss": 0.1581, "step": 15080 }, { "epoch": 2.1294831968370516, "grad_norm": 3.704038547663268, "learning_rate": 4.100708615270395e-06, "loss": 0.1505, "step": 15081 }, { "epoch": 2.1296243998870374, "grad_norm": 2.9692070296532767, "learning_rate": 4.09947775662872e-06, "loss": 0.1225, "step": 15082 }, { "epoch": 2.1297656029370233, "grad_norm": 3.6741542702938865, "learning_rate": 4.098247035112631e-06, "loss": 0.2271, "step": 15083 }, { "epoch": 2.129906805987009, "grad_norm": 4.116231123347006, "learning_rate": 4.097016450750733e-06, "loss": 0.2155, "step": 15084 }, { "epoch": 2.130048009036995, "grad_norm": 3.126971954090494, "learning_rate": 4.095786003571621e-06, "loss": 0.1604, "step": 15085 }, { "epoch": 2.130189212086981, "grad_norm": 3.683596408656099, "learning_rate": 4.094555693603891e-06, "loss": 0.181, "step": 15086 }, { "epoch": 2.130330415136967, "grad_norm": 3.5021821377923334, "learning_rate": 4.093325520876139e-06, "loss": 0.2267, "step": 15087 }, { "epoch": 2.1304716181869527, "grad_norm": 2.081015399487398, "learning_rate": 4.092095485416952e-06, "loss": 0.1187, "step": 15088 }, { "epoch": 2.1306128212369386, "grad_norm": 2.730292541348565, "learning_rate": 4.090865587254913e-06, "loss": 0.1296, "step": 15089 }, { "epoch": 2.1307540242869245, "grad_norm": 2.832515701568385, "learning_rate": 4.0896358264186095e-06, "loss": 0.1298, "step": 15090 }, { "epoch": 2.1308952273369104, "grad_norm": 3.4139305485327758, "learning_rate": 4.088406202936614e-06, "loss": 0.172, "step": 15091 }, { "epoch": 2.1310364303868963, "grad_norm": 2.928108830981196, "learning_rate": 4.087176716837502e-06, "loss": 0.1889, "step": 15092 }, { "epoch": 2.131177633436882, "grad_norm": 3.003778646342553, "learning_rate": 4.085947368149853e-06, "loss": 0.1254, "step": 15093 }, { "epoch": 2.131318836486868, "grad_norm": 3.7382676155308556, "learning_rate": 4.0847181569022335e-06, "loss": 0.1865, "step": 15094 }, { "epoch": 2.131460039536854, "grad_norm": 3.432698523725392, "learning_rate": 4.083489083123209e-06, "loss": 0.1899, "step": 15095 }, { "epoch": 2.13160124258684, "grad_norm": 3.2717226901531236, "learning_rate": 4.0822601468413425e-06, "loss": 0.1667, "step": 15096 }, { "epoch": 2.1317424456368257, "grad_norm": 3.7007068331408406, "learning_rate": 4.081031348085195e-06, "loss": 0.1668, "step": 15097 }, { "epoch": 2.1318836486868116, "grad_norm": 2.92579617287353, "learning_rate": 4.079802686883318e-06, "loss": 0.1504, "step": 15098 }, { "epoch": 2.1320248517367975, "grad_norm": 3.081008852157073, "learning_rate": 4.078574163264278e-06, "loss": 0.162, "step": 15099 }, { "epoch": 2.1321660547867833, "grad_norm": 4.676494500511638, "learning_rate": 4.077345777256614e-06, "loss": 0.2159, "step": 15100 }, { "epoch": 2.132307257836769, "grad_norm": 3.273177133325917, "learning_rate": 4.076117528888876e-06, "loss": 0.1853, "step": 15101 }, { "epoch": 2.132448460886755, "grad_norm": 2.3973525284543373, "learning_rate": 4.074889418189608e-06, "loss": 0.1114, "step": 15102 }, { "epoch": 2.132589663936741, "grad_norm": 3.228568631471149, "learning_rate": 4.073661445187351e-06, "loss": 0.1295, "step": 15103 }, { "epoch": 2.132730866986727, "grad_norm": 2.969215036599727, "learning_rate": 4.07243360991064e-06, "loss": 0.1467, "step": 15104 }, { "epoch": 2.1328720700367128, "grad_norm": 2.8815928869127094, "learning_rate": 4.071205912388015e-06, "loss": 0.1646, "step": 15105 }, { "epoch": 2.1330132730866986, "grad_norm": 2.725606607562211, "learning_rate": 4.069978352648004e-06, "loss": 0.1484, "step": 15106 }, { "epoch": 2.1331544761366845, "grad_norm": 2.593678166755236, "learning_rate": 4.068750930719139e-06, "loss": 0.1331, "step": 15107 }, { "epoch": 2.1332956791866704, "grad_norm": 3.473783906379059, "learning_rate": 4.067523646629938e-06, "loss": 0.1954, "step": 15108 }, { "epoch": 2.1334368822366563, "grad_norm": 3.260515877028411, "learning_rate": 4.0662965004089195e-06, "loss": 0.179, "step": 15109 }, { "epoch": 2.133578085286642, "grad_norm": 3.326850664080281, "learning_rate": 4.065069492084614e-06, "loss": 0.175, "step": 15110 }, { "epoch": 2.133719288336628, "grad_norm": 2.59067015179295, "learning_rate": 4.063842621685529e-06, "loss": 0.1451, "step": 15111 }, { "epoch": 2.133860491386614, "grad_norm": 2.8373891751141764, "learning_rate": 4.062615889240176e-06, "loss": 0.1531, "step": 15112 }, { "epoch": 2.1340016944366, "grad_norm": 3.029293007074109, "learning_rate": 4.061389294777068e-06, "loss": 0.1216, "step": 15113 }, { "epoch": 2.1341428974865857, "grad_norm": 3.3179999426202853, "learning_rate": 4.060162838324708e-06, "loss": 0.1422, "step": 15114 }, { "epoch": 2.1342841005365716, "grad_norm": 3.4714392997108714, "learning_rate": 4.058936519911598e-06, "loss": 0.163, "step": 15115 }, { "epoch": 2.1344253035865575, "grad_norm": 2.4070650932072546, "learning_rate": 4.057710339566238e-06, "loss": 0.1165, "step": 15116 }, { "epoch": 2.1345665066365433, "grad_norm": 3.466047058832093, "learning_rate": 4.0564842973171225e-06, "loss": 0.141, "step": 15117 }, { "epoch": 2.1347077096865292, "grad_norm": 2.972186387799967, "learning_rate": 4.055258393192746e-06, "loss": 0.1781, "step": 15118 }, { "epoch": 2.134848912736515, "grad_norm": 3.8897642449954346, "learning_rate": 4.054032627221597e-06, "loss": 0.2186, "step": 15119 }, { "epoch": 2.134990115786501, "grad_norm": 3.2690317664813615, "learning_rate": 4.052806999432161e-06, "loss": 0.1855, "step": 15120 }, { "epoch": 2.135131318836487, "grad_norm": 2.859182497181163, "learning_rate": 4.05158150985292e-06, "loss": 0.1622, "step": 15121 }, { "epoch": 2.1352725218864728, "grad_norm": 3.8242214097206304, "learning_rate": 4.050356158512357e-06, "loss": 0.2007, "step": 15122 }, { "epoch": 2.1354137249364586, "grad_norm": 2.6069501082366044, "learning_rate": 4.049130945438949e-06, "loss": 0.1251, "step": 15123 }, { "epoch": 2.1355549279864445, "grad_norm": 2.7180317843372417, "learning_rate": 4.047905870661172e-06, "loss": 0.1544, "step": 15124 }, { "epoch": 2.1356961310364304, "grad_norm": 2.9042577119280417, "learning_rate": 4.046680934207488e-06, "loss": 0.1635, "step": 15125 }, { "epoch": 2.1358373340864163, "grad_norm": 3.170796151757244, "learning_rate": 4.045456136106363e-06, "loss": 0.137, "step": 15126 }, { "epoch": 2.135978537136402, "grad_norm": 3.255644785142373, "learning_rate": 4.044231476386271e-06, "loss": 0.1428, "step": 15127 }, { "epoch": 2.136119740186388, "grad_norm": 2.73315244051077, "learning_rate": 4.043006955075667e-06, "loss": 0.1417, "step": 15128 }, { "epoch": 2.136260943236374, "grad_norm": 4.063230899008293, "learning_rate": 4.041782572203009e-06, "loss": 0.1303, "step": 15129 }, { "epoch": 2.13640214628636, "grad_norm": 3.2473876566108912, "learning_rate": 4.0405583277967506e-06, "loss": 0.1535, "step": 15130 }, { "epoch": 2.1365433493363457, "grad_norm": 3.7070008738291453, "learning_rate": 4.0393342218853425e-06, "loss": 0.1599, "step": 15131 }, { "epoch": 2.1366845523863316, "grad_norm": 3.4082105076053537, "learning_rate": 4.038110254497234e-06, "loss": 0.1476, "step": 15132 }, { "epoch": 2.1368257554363175, "grad_norm": 5.208319682110785, "learning_rate": 4.0368864256608674e-06, "loss": 0.2258, "step": 15133 }, { "epoch": 2.1369669584863034, "grad_norm": 3.0085624690337975, "learning_rate": 4.0356627354046854e-06, "loss": 0.1475, "step": 15134 }, { "epoch": 2.1371081615362892, "grad_norm": 2.415015866021174, "learning_rate": 4.034439183757125e-06, "loss": 0.1138, "step": 15135 }, { "epoch": 2.137249364586275, "grad_norm": 3.1360550291141385, "learning_rate": 4.033215770746622e-06, "loss": 0.1698, "step": 15136 }, { "epoch": 2.137390567636261, "grad_norm": 4.566812597135408, "learning_rate": 4.0319924964016075e-06, "loss": 0.2156, "step": 15137 }, { "epoch": 2.137531770686247, "grad_norm": 6.247990108210894, "learning_rate": 4.030769360750507e-06, "loss": 0.283, "step": 15138 }, { "epoch": 2.1376729737362328, "grad_norm": 3.511944283390823, "learning_rate": 4.029546363821752e-06, "loss": 0.1779, "step": 15139 }, { "epoch": 2.1378141767862187, "grad_norm": 3.048618573623077, "learning_rate": 4.028323505643762e-06, "loss": 0.17, "step": 15140 }, { "epoch": 2.1379553798362045, "grad_norm": 3.7790333489320553, "learning_rate": 4.027100786244958e-06, "loss": 0.1425, "step": 15141 }, { "epoch": 2.1380965828861904, "grad_norm": 3.470877394090885, "learning_rate": 4.025878205653747e-06, "loss": 0.1668, "step": 15142 }, { "epoch": 2.1382377859361763, "grad_norm": 3.960163049755031, "learning_rate": 4.024655763898544e-06, "loss": 0.2372, "step": 15143 }, { "epoch": 2.138378988986162, "grad_norm": 4.182152833433484, "learning_rate": 4.023433461007764e-06, "loss": 0.145, "step": 15144 }, { "epoch": 2.138520192036148, "grad_norm": 3.2500553320678467, "learning_rate": 4.0222112970098095e-06, "loss": 0.1865, "step": 15145 }, { "epoch": 2.138661395086134, "grad_norm": 3.3012387812780988, "learning_rate": 4.020989271933082e-06, "loss": 0.154, "step": 15146 }, { "epoch": 2.13880259813612, "grad_norm": 3.0022748563567525, "learning_rate": 4.019767385805983e-06, "loss": 0.164, "step": 15147 }, { "epoch": 2.1389438011861057, "grad_norm": 3.159307467713296, "learning_rate": 4.0185456386569066e-06, "loss": 0.1743, "step": 15148 }, { "epoch": 2.1390850042360916, "grad_norm": 2.376297777505026, "learning_rate": 4.017324030514246e-06, "loss": 0.1294, "step": 15149 }, { "epoch": 2.1392262072860775, "grad_norm": 3.462669972751617, "learning_rate": 4.016102561406392e-06, "loss": 0.1799, "step": 15150 }, { "epoch": 2.1393674103360634, "grad_norm": 2.645935121768809, "learning_rate": 4.014881231361729e-06, "loss": 0.165, "step": 15151 }, { "epoch": 2.1395086133860493, "grad_norm": 3.791345115388742, "learning_rate": 4.013660040408643e-06, "loss": 0.2019, "step": 15152 }, { "epoch": 2.139649816436035, "grad_norm": 2.508780759169283, "learning_rate": 4.012438988575511e-06, "loss": 0.1248, "step": 15153 }, { "epoch": 2.139791019486021, "grad_norm": 3.35665896251655, "learning_rate": 4.011218075890711e-06, "loss": 0.1979, "step": 15154 }, { "epoch": 2.139932222536007, "grad_norm": 2.929483331155179, "learning_rate": 4.009997302382614e-06, "loss": 0.1587, "step": 15155 }, { "epoch": 2.140073425585993, "grad_norm": 3.1618003912768797, "learning_rate": 4.008776668079596e-06, "loss": 0.1694, "step": 15156 }, { "epoch": 2.1402146286359787, "grad_norm": 3.0996051331933314, "learning_rate": 4.007556173010021e-06, "loss": 0.1603, "step": 15157 }, { "epoch": 2.1403558316859645, "grad_norm": 2.7029980435098557, "learning_rate": 4.006335817202256e-06, "loss": 0.1402, "step": 15158 }, { "epoch": 2.1404970347359504, "grad_norm": 3.3183063046034547, "learning_rate": 4.005115600684655e-06, "loss": 0.1495, "step": 15159 }, { "epoch": 2.1406382377859363, "grad_norm": 3.2679324253842856, "learning_rate": 4.003895523485575e-06, "loss": 0.1622, "step": 15160 }, { "epoch": 2.140779440835922, "grad_norm": 3.079385951513124, "learning_rate": 4.002675585633375e-06, "loss": 0.1909, "step": 15161 }, { "epoch": 2.140920643885908, "grad_norm": 2.7531241135142106, "learning_rate": 4.001455787156407e-06, "loss": 0.1351, "step": 15162 }, { "epoch": 2.141061846935894, "grad_norm": 3.187182563919982, "learning_rate": 4.000236128083015e-06, "loss": 0.1676, "step": 15163 }, { "epoch": 2.14120304998588, "grad_norm": 3.0667966235965234, "learning_rate": 3.999016608441544e-06, "loss": 0.1932, "step": 15164 }, { "epoch": 2.1413442530358657, "grad_norm": 3.1194157442053454, "learning_rate": 3.997797228260335e-06, "loss": 0.1498, "step": 15165 }, { "epoch": 2.1414854560858516, "grad_norm": 3.7077525736991275, "learning_rate": 3.996577987567727e-06, "loss": 0.2051, "step": 15166 }, { "epoch": 2.1416266591358375, "grad_norm": 3.055954528693823, "learning_rate": 3.9953588863920535e-06, "loss": 0.1474, "step": 15167 }, { "epoch": 2.1417678621858234, "grad_norm": 2.7218662502817565, "learning_rate": 3.994139924761646e-06, "loss": 0.143, "step": 15168 }, { "epoch": 2.1419090652358093, "grad_norm": 3.496117808398058, "learning_rate": 3.992921102704834e-06, "loss": 0.127, "step": 15169 }, { "epoch": 2.142050268285795, "grad_norm": 3.6607248806627948, "learning_rate": 3.991702420249941e-06, "loss": 0.2107, "step": 15170 }, { "epoch": 2.142191471335781, "grad_norm": 2.7851012709042884, "learning_rate": 3.9904838774252885e-06, "loss": 0.1676, "step": 15171 }, { "epoch": 2.142332674385767, "grad_norm": 3.533113970563802, "learning_rate": 3.989265474259192e-06, "loss": 0.1713, "step": 15172 }, { "epoch": 2.142473877435753, "grad_norm": 3.199656264636358, "learning_rate": 3.988047210779973e-06, "loss": 0.1719, "step": 15173 }, { "epoch": 2.1426150804857387, "grad_norm": 3.0967609894905954, "learning_rate": 3.986829087015941e-06, "loss": 0.1173, "step": 15174 }, { "epoch": 2.1427562835357246, "grad_norm": 3.269090969058446, "learning_rate": 3.985611102995408e-06, "loss": 0.1818, "step": 15175 }, { "epoch": 2.1428974865857104, "grad_norm": 4.428602579776687, "learning_rate": 3.984393258746672e-06, "loss": 0.192, "step": 15176 }, { "epoch": 2.1430386896356963, "grad_norm": 3.357909048416758, "learning_rate": 3.983175554298035e-06, "loss": 0.1816, "step": 15177 }, { "epoch": 2.143179892685682, "grad_norm": 2.497142934246939, "learning_rate": 3.981957989677803e-06, "loss": 0.1563, "step": 15178 }, { "epoch": 2.143321095735668, "grad_norm": 3.1414115409473906, "learning_rate": 3.980740564914268e-06, "loss": 0.1574, "step": 15179 }, { "epoch": 2.143462298785654, "grad_norm": 3.267803826234112, "learning_rate": 3.979523280035723e-06, "loss": 0.1422, "step": 15180 }, { "epoch": 2.14360350183564, "grad_norm": 3.4330432940750986, "learning_rate": 3.978306135070457e-06, "loss": 0.1632, "step": 15181 }, { "epoch": 2.1437447048856253, "grad_norm": 3.4411052188654576, "learning_rate": 3.977089130046756e-06, "loss": 0.1554, "step": 15182 }, { "epoch": 2.143885907935611, "grad_norm": 3.558109655085892, "learning_rate": 3.975872264992903e-06, "loss": 0.2131, "step": 15183 }, { "epoch": 2.144027110985597, "grad_norm": 3.195918262159261, "learning_rate": 3.974655539937176e-06, "loss": 0.1716, "step": 15184 }, { "epoch": 2.144168314035583, "grad_norm": 4.040756047591546, "learning_rate": 3.973438954907852e-06, "loss": 0.1915, "step": 15185 }, { "epoch": 2.144309517085569, "grad_norm": 3.5318084546342217, "learning_rate": 3.972222509933204e-06, "loss": 0.1929, "step": 15186 }, { "epoch": 2.1444507201355547, "grad_norm": 2.962908962613317, "learning_rate": 3.971006205041503e-06, "loss": 0.1578, "step": 15187 }, { "epoch": 2.1445919231855406, "grad_norm": 3.1601363107077574, "learning_rate": 3.969790040261013e-06, "loss": 0.1551, "step": 15188 }, { "epoch": 2.1447331262355265, "grad_norm": 3.238774562066638, "learning_rate": 3.968574015619995e-06, "loss": 0.1853, "step": 15189 }, { "epoch": 2.1448743292855124, "grad_norm": 3.0739655052021053, "learning_rate": 3.967358131146716e-06, "loss": 0.1587, "step": 15190 }, { "epoch": 2.1450155323354982, "grad_norm": 3.4547586948207205, "learning_rate": 3.966142386869428e-06, "loss": 0.1342, "step": 15191 }, { "epoch": 2.145156735385484, "grad_norm": 2.7708926701923953, "learning_rate": 3.96492678281639e-06, "loss": 0.149, "step": 15192 }, { "epoch": 2.14529793843547, "grad_norm": 3.1304062740708614, "learning_rate": 3.963711319015842e-06, "loss": 0.1659, "step": 15193 }, { "epoch": 2.145439141485456, "grad_norm": 2.610024941630998, "learning_rate": 3.962495995496037e-06, "loss": 0.1337, "step": 15194 }, { "epoch": 2.1455803445354418, "grad_norm": 2.6931190965156504, "learning_rate": 3.961280812285213e-06, "loss": 0.1158, "step": 15195 }, { "epoch": 2.1457215475854277, "grad_norm": 3.741099711044749, "learning_rate": 3.960065769411619e-06, "loss": 0.2026, "step": 15196 }, { "epoch": 2.1458627506354135, "grad_norm": 3.127409213301519, "learning_rate": 3.958850866903488e-06, "loss": 0.1582, "step": 15197 }, { "epoch": 2.1460039536853994, "grad_norm": 3.489258447439934, "learning_rate": 3.957636104789056e-06, "loss": 0.1748, "step": 15198 }, { "epoch": 2.1461451567353853, "grad_norm": 3.1813346589851115, "learning_rate": 3.956421483096548e-06, "loss": 0.1766, "step": 15199 }, { "epoch": 2.146286359785371, "grad_norm": 3.5800884401977164, "learning_rate": 3.955207001854197e-06, "loss": 0.203, "step": 15200 }, { "epoch": 2.146427562835357, "grad_norm": 3.4382386229566118, "learning_rate": 3.953992661090224e-06, "loss": 0.1817, "step": 15201 }, { "epoch": 2.146568765885343, "grad_norm": 4.109308435708993, "learning_rate": 3.952778460832851e-06, "loss": 0.1811, "step": 15202 }, { "epoch": 2.146709968935329, "grad_norm": 4.025631857455741, "learning_rate": 3.951564401110295e-06, "loss": 0.2229, "step": 15203 }, { "epoch": 2.1468511719853147, "grad_norm": 3.630509849707339, "learning_rate": 3.95035048195077e-06, "loss": 0.1426, "step": 15204 }, { "epoch": 2.1469923750353006, "grad_norm": 4.073689971394328, "learning_rate": 3.949136703382487e-06, "loss": 0.2113, "step": 15205 }, { "epoch": 2.1471335780852865, "grad_norm": 2.829861950598597, "learning_rate": 3.947923065433654e-06, "loss": 0.145, "step": 15206 }, { "epoch": 2.1472747811352724, "grad_norm": 3.5289035426515123, "learning_rate": 3.9467095681324715e-06, "loss": 0.1772, "step": 15207 }, { "epoch": 2.1474159841852583, "grad_norm": 2.69034786456011, "learning_rate": 3.945496211507148e-06, "loss": 0.1617, "step": 15208 }, { "epoch": 2.147557187235244, "grad_norm": 3.927892543113057, "learning_rate": 3.944282995585882e-06, "loss": 0.2087, "step": 15209 }, { "epoch": 2.14769839028523, "grad_norm": 2.5936137684417844, "learning_rate": 3.943069920396859e-06, "loss": 0.1344, "step": 15210 }, { "epoch": 2.147839593335216, "grad_norm": 4.122725034284877, "learning_rate": 3.941856985968277e-06, "loss": 0.217, "step": 15211 }, { "epoch": 2.147980796385202, "grad_norm": 3.1809252842661024, "learning_rate": 3.940644192328317e-06, "loss": 0.1528, "step": 15212 }, { "epoch": 2.1481219994351877, "grad_norm": 5.875969087426035, "learning_rate": 3.939431539505173e-06, "loss": 0.1455, "step": 15213 }, { "epoch": 2.1482632024851736, "grad_norm": 3.6367583105917825, "learning_rate": 3.938219027527023e-06, "loss": 0.1589, "step": 15214 }, { "epoch": 2.1484044055351594, "grad_norm": 3.235366826311923, "learning_rate": 3.937006656422045e-06, "loss": 0.1686, "step": 15215 }, { "epoch": 2.1485456085851453, "grad_norm": 3.620322413932032, "learning_rate": 3.935794426218415e-06, "loss": 0.1757, "step": 15216 }, { "epoch": 2.148686811635131, "grad_norm": 3.057936909899692, "learning_rate": 3.934582336944303e-06, "loss": 0.1995, "step": 15217 }, { "epoch": 2.148828014685117, "grad_norm": 2.873163341300102, "learning_rate": 3.933370388627878e-06, "loss": 0.1603, "step": 15218 }, { "epoch": 2.148969217735103, "grad_norm": 2.587974463879932, "learning_rate": 3.932158581297304e-06, "loss": 0.122, "step": 15219 }, { "epoch": 2.149110420785089, "grad_norm": 2.6511872126179887, "learning_rate": 3.930946914980744e-06, "loss": 0.1325, "step": 15220 }, { "epoch": 2.1492516238350747, "grad_norm": 4.194467001381478, "learning_rate": 3.929735389706359e-06, "loss": 0.2317, "step": 15221 }, { "epoch": 2.1493928268850606, "grad_norm": 3.1830369730238304, "learning_rate": 3.9285240055023e-06, "loss": 0.1559, "step": 15222 }, { "epoch": 2.1495340299350465, "grad_norm": 3.3265606598456063, "learning_rate": 3.9273127623967214e-06, "loss": 0.223, "step": 15223 }, { "epoch": 2.1496752329850324, "grad_norm": 3.0650386940784142, "learning_rate": 3.926101660417769e-06, "loss": 0.1813, "step": 15224 }, { "epoch": 2.1498164360350183, "grad_norm": 4.414065020603255, "learning_rate": 3.924890699593593e-06, "loss": 0.2608, "step": 15225 }, { "epoch": 2.149957639085004, "grad_norm": 3.3103526540626755, "learning_rate": 3.9236798799523375e-06, "loss": 0.1755, "step": 15226 }, { "epoch": 2.15009884213499, "grad_norm": 2.7203075192672816, "learning_rate": 3.9224692015221345e-06, "loss": 0.1546, "step": 15227 }, { "epoch": 2.150240045184976, "grad_norm": 3.443101914034153, "learning_rate": 3.921258664331122e-06, "loss": 0.1638, "step": 15228 }, { "epoch": 2.150381248234962, "grad_norm": 3.400283166139419, "learning_rate": 3.9200482684074295e-06, "loss": 0.1657, "step": 15229 }, { "epoch": 2.1505224512849477, "grad_norm": 2.9367851158664, "learning_rate": 3.9188380137791934e-06, "loss": 0.1419, "step": 15230 }, { "epoch": 2.1506636543349336, "grad_norm": 2.750512390439242, "learning_rate": 3.917627900474535e-06, "loss": 0.1196, "step": 15231 }, { "epoch": 2.1508048573849194, "grad_norm": 3.112406590871209, "learning_rate": 3.9164179285215785e-06, "loss": 0.2019, "step": 15232 }, { "epoch": 2.1509460604349053, "grad_norm": 3.121517301643916, "learning_rate": 3.915208097948441e-06, "loss": 0.1534, "step": 15233 }, { "epoch": 2.151087263484891, "grad_norm": 3.1015066864255734, "learning_rate": 3.91399840878324e-06, "loss": 0.1738, "step": 15234 }, { "epoch": 2.151228466534877, "grad_norm": 2.6167542035203013, "learning_rate": 3.9127888610540875e-06, "loss": 0.1466, "step": 15235 }, { "epoch": 2.151369669584863, "grad_norm": 3.2346521186791093, "learning_rate": 3.911579454789093e-06, "loss": 0.1544, "step": 15236 }, { "epoch": 2.151510872634849, "grad_norm": 2.487383517119653, "learning_rate": 3.9103701900163625e-06, "loss": 0.1348, "step": 15237 }, { "epoch": 2.1516520756848347, "grad_norm": 3.5960891463619205, "learning_rate": 3.909161066763999e-06, "loss": 0.1931, "step": 15238 }, { "epoch": 2.1517932787348206, "grad_norm": 3.1006186838104175, "learning_rate": 3.9079520850601015e-06, "loss": 0.1578, "step": 15239 }, { "epoch": 2.1519344817848065, "grad_norm": 2.8495324753695854, "learning_rate": 3.906743244932767e-06, "loss": 0.1366, "step": 15240 }, { "epoch": 2.1520756848347924, "grad_norm": 3.489278642149818, "learning_rate": 3.905534546410085e-06, "loss": 0.1586, "step": 15241 }, { "epoch": 2.1522168878847783, "grad_norm": 3.331838978726516, "learning_rate": 3.90432598952015e-06, "loss": 0.1717, "step": 15242 }, { "epoch": 2.152358090934764, "grad_norm": 3.4704768721589976, "learning_rate": 3.903117574291051e-06, "loss": 0.1476, "step": 15243 }, { "epoch": 2.15249929398475, "grad_norm": 2.7336150874614953, "learning_rate": 3.9019093007508635e-06, "loss": 0.136, "step": 15244 }, { "epoch": 2.152640497034736, "grad_norm": 2.7102379668933034, "learning_rate": 3.900701168927669e-06, "loss": 0.1342, "step": 15245 }, { "epoch": 2.152781700084722, "grad_norm": 2.592063471231596, "learning_rate": 3.899493178849544e-06, "loss": 0.1407, "step": 15246 }, { "epoch": 2.1529229031347077, "grad_norm": 4.422908590941699, "learning_rate": 3.898285330544565e-06, "loss": 0.2081, "step": 15247 }, { "epoch": 2.1530641061846936, "grad_norm": 3.602474572692268, "learning_rate": 3.8970776240407995e-06, "loss": 0.2001, "step": 15248 }, { "epoch": 2.1532053092346795, "grad_norm": 3.554511121885151, "learning_rate": 3.895870059366315e-06, "loss": 0.2069, "step": 15249 }, { "epoch": 2.1533465122846653, "grad_norm": 3.240250043461691, "learning_rate": 3.8946626365491735e-06, "loss": 0.1669, "step": 15250 }, { "epoch": 2.153487715334651, "grad_norm": 3.352868308007309, "learning_rate": 3.893455355617436e-06, "loss": 0.1782, "step": 15251 }, { "epoch": 2.153628918384637, "grad_norm": 3.4416225348538165, "learning_rate": 3.892248216599158e-06, "loss": 0.1948, "step": 15252 }, { "epoch": 2.153770121434623, "grad_norm": 3.5919661481983534, "learning_rate": 3.891041219522394e-06, "loss": 0.1624, "step": 15253 }, { "epoch": 2.153911324484609, "grad_norm": 3.5036134787466207, "learning_rate": 3.8898343644151945e-06, "loss": 0.1934, "step": 15254 }, { "epoch": 2.1540525275345948, "grad_norm": 3.130168988300437, "learning_rate": 3.888627651305605e-06, "loss": 0.129, "step": 15255 }, { "epoch": 2.1541937305845806, "grad_norm": 2.866773811252313, "learning_rate": 3.887421080221671e-06, "loss": 0.1688, "step": 15256 }, { "epoch": 2.1543349336345665, "grad_norm": 3.534645331581875, "learning_rate": 3.88621465119143e-06, "loss": 0.1344, "step": 15257 }, { "epoch": 2.1544761366845524, "grad_norm": 2.987871963268388, "learning_rate": 3.885008364242917e-06, "loss": 0.1371, "step": 15258 }, { "epoch": 2.1546173397345383, "grad_norm": 3.286371591947467, "learning_rate": 3.8838022194041725e-06, "loss": 0.1774, "step": 15259 }, { "epoch": 2.154758542784524, "grad_norm": 2.534551743520572, "learning_rate": 3.882596216703226e-06, "loss": 0.1197, "step": 15260 }, { "epoch": 2.15489974583451, "grad_norm": 2.9437899527965987, "learning_rate": 3.881390356168098e-06, "loss": 0.1562, "step": 15261 }, { "epoch": 2.155040948884496, "grad_norm": 2.866136110078719, "learning_rate": 3.880184637826816e-06, "loss": 0.1356, "step": 15262 }, { "epoch": 2.155182151934482, "grad_norm": 2.6240266093338542, "learning_rate": 3.878979061707396e-06, "loss": 0.1432, "step": 15263 }, { "epoch": 2.1553233549844677, "grad_norm": 3.415561897251931, "learning_rate": 3.877773627837863e-06, "loss": 0.1843, "step": 15264 }, { "epoch": 2.1554645580344536, "grad_norm": 2.8768257785004865, "learning_rate": 3.876568336246228e-06, "loss": 0.1365, "step": 15265 }, { "epoch": 2.1556057610844395, "grad_norm": 3.3656178980087543, "learning_rate": 3.875363186960499e-06, "loss": 0.1969, "step": 15266 }, { "epoch": 2.1557469641344253, "grad_norm": 3.2037309209292135, "learning_rate": 3.874158180008685e-06, "loss": 0.1757, "step": 15267 }, { "epoch": 2.1558881671844112, "grad_norm": 3.4781164380295593, "learning_rate": 3.872953315418793e-06, "loss": 0.1873, "step": 15268 }, { "epoch": 2.156029370234397, "grad_norm": 3.4690197393626985, "learning_rate": 3.8717485932188124e-06, "loss": 0.1767, "step": 15269 }, { "epoch": 2.156170573284383, "grad_norm": 3.3157076970876047, "learning_rate": 3.870544013436751e-06, "loss": 0.2028, "step": 15270 }, { "epoch": 2.156311776334369, "grad_norm": 3.3084173472157437, "learning_rate": 3.8693395761006e-06, "loss": 0.1312, "step": 15271 }, { "epoch": 2.1564529793843548, "grad_norm": 3.918745350214336, "learning_rate": 3.868135281238351e-06, "loss": 0.1952, "step": 15272 }, { "epoch": 2.1565941824343406, "grad_norm": 2.9830337598343197, "learning_rate": 3.866931128877989e-06, "loss": 0.1834, "step": 15273 }, { "epoch": 2.1567353854843265, "grad_norm": 2.9262998541081875, "learning_rate": 3.865727119047497e-06, "loss": 0.1485, "step": 15274 }, { "epoch": 2.1568765885343124, "grad_norm": 2.8374622433568923, "learning_rate": 3.864523251774856e-06, "loss": 0.1583, "step": 15275 }, { "epoch": 2.1570177915842983, "grad_norm": 3.511293800393851, "learning_rate": 3.863319527088048e-06, "loss": 0.1712, "step": 15276 }, { "epoch": 2.157158994634284, "grad_norm": 3.468901440733914, "learning_rate": 3.862115945015048e-06, "loss": 0.1701, "step": 15277 }, { "epoch": 2.15730019768427, "grad_norm": 2.9560518900121924, "learning_rate": 3.860912505583819e-06, "loss": 0.1547, "step": 15278 }, { "epoch": 2.157441400734256, "grad_norm": 3.9744047529219397, "learning_rate": 3.859709208822332e-06, "loss": 0.2261, "step": 15279 }, { "epoch": 2.157582603784242, "grad_norm": 3.571880403574438, "learning_rate": 3.858506054758547e-06, "loss": 0.1599, "step": 15280 }, { "epoch": 2.1577238068342277, "grad_norm": 3.0631027697441615, "learning_rate": 3.857303043420434e-06, "loss": 0.1688, "step": 15281 }, { "epoch": 2.1578650098842136, "grad_norm": 2.8119924638164187, "learning_rate": 3.856100174835945e-06, "loss": 0.1784, "step": 15282 }, { "epoch": 2.1580062129341995, "grad_norm": 3.0827524247279987, "learning_rate": 3.854897449033034e-06, "loss": 0.155, "step": 15283 }, { "epoch": 2.1581474159841854, "grad_norm": 2.68446970016538, "learning_rate": 3.853694866039652e-06, "loss": 0.11, "step": 15284 }, { "epoch": 2.1582886190341712, "grad_norm": 3.5419555907107028, "learning_rate": 3.852492425883752e-06, "loss": 0.1708, "step": 15285 }, { "epoch": 2.158429822084157, "grad_norm": 2.953795648035667, "learning_rate": 3.851290128593265e-06, "loss": 0.1659, "step": 15286 }, { "epoch": 2.158571025134143, "grad_norm": 2.944376672317145, "learning_rate": 3.850087974196143e-06, "loss": 0.1562, "step": 15287 }, { "epoch": 2.158712228184129, "grad_norm": 3.2035503266163334, "learning_rate": 3.848885962720321e-06, "loss": 0.1421, "step": 15288 }, { "epoch": 2.1588534312341148, "grad_norm": 3.63941778898501, "learning_rate": 3.847684094193733e-06, "loss": 0.2136, "step": 15289 }, { "epoch": 2.1589946342841007, "grad_norm": 3.0149872853986106, "learning_rate": 3.84648236864431e-06, "loss": 0.1563, "step": 15290 }, { "epoch": 2.1591358373340865, "grad_norm": 3.3275718954586653, "learning_rate": 3.8452807860999785e-06, "loss": 0.1974, "step": 15291 }, { "epoch": 2.1592770403840724, "grad_norm": 3.2755945761329954, "learning_rate": 3.8440793465886595e-06, "loss": 0.2, "step": 15292 }, { "epoch": 2.1594182434340583, "grad_norm": 3.33383384526692, "learning_rate": 3.842878050138282e-06, "loss": 0.128, "step": 15293 }, { "epoch": 2.159559446484044, "grad_norm": 3.176719835032803, "learning_rate": 3.841676896776764e-06, "loss": 0.1643, "step": 15294 }, { "epoch": 2.15970064953403, "grad_norm": 3.613338091187333, "learning_rate": 3.840475886532011e-06, "loss": 0.2051, "step": 15295 }, { "epoch": 2.159841852584016, "grad_norm": 3.3625537353218165, "learning_rate": 3.8392750194319385e-06, "loss": 0.1376, "step": 15296 }, { "epoch": 2.159983055634002, "grad_norm": 3.146469835139758, "learning_rate": 3.838074295504454e-06, "loss": 0.1657, "step": 15297 }, { "epoch": 2.1601242586839877, "grad_norm": 2.6082015758698103, "learning_rate": 3.836873714777458e-06, "loss": 0.1632, "step": 15298 }, { "epoch": 2.1602654617339736, "grad_norm": 4.540842152243143, "learning_rate": 3.835673277278858e-06, "loss": 0.1981, "step": 15299 }, { "epoch": 2.1604066647839595, "grad_norm": 3.2490854969366683, "learning_rate": 3.834472983036551e-06, "loss": 0.167, "step": 15300 }, { "epoch": 2.1605478678339454, "grad_norm": 3.9556932777909286, "learning_rate": 3.8332728320784275e-06, "loss": 0.194, "step": 15301 }, { "epoch": 2.1606890708839313, "grad_norm": 2.700978814903802, "learning_rate": 3.832072824432385e-06, "loss": 0.1285, "step": 15302 }, { "epoch": 2.160830273933917, "grad_norm": 3.4352314085462674, "learning_rate": 3.830872960126297e-06, "loss": 0.1658, "step": 15303 }, { "epoch": 2.160971476983903, "grad_norm": 2.5588646615240687, "learning_rate": 3.829673239188063e-06, "loss": 0.1427, "step": 15304 }, { "epoch": 2.161112680033889, "grad_norm": 3.5142425041654355, "learning_rate": 3.828473661645558e-06, "loss": 0.1978, "step": 15305 }, { "epoch": 2.161253883083875, "grad_norm": 3.2926734622901854, "learning_rate": 3.827274227526658e-06, "loss": 0.1633, "step": 15306 }, { "epoch": 2.1613950861338607, "grad_norm": 2.9461290159189804, "learning_rate": 3.82607493685924e-06, "loss": 0.1412, "step": 15307 }, { "epoch": 2.1615362891838465, "grad_norm": 3.386064984708725, "learning_rate": 3.824875789671175e-06, "loss": 0.1566, "step": 15308 }, { "epoch": 2.1616774922338324, "grad_norm": 2.862460929557685, "learning_rate": 3.823676785990329e-06, "loss": 0.1278, "step": 15309 }, { "epoch": 2.1618186952838183, "grad_norm": 4.086992362211393, "learning_rate": 3.822477925844564e-06, "loss": 0.1909, "step": 15310 }, { "epoch": 2.161959898333804, "grad_norm": 3.7664697822042954, "learning_rate": 3.821279209261751e-06, "loss": 0.1795, "step": 15311 }, { "epoch": 2.16210110138379, "grad_norm": 3.3440607693996642, "learning_rate": 3.820080636269737e-06, "loss": 0.1616, "step": 15312 }, { "epoch": 2.162242304433776, "grad_norm": 3.0048849576878016, "learning_rate": 3.81888220689638e-06, "loss": 0.1434, "step": 15313 }, { "epoch": 2.162383507483762, "grad_norm": 2.896908253207604, "learning_rate": 3.8176839211695314e-06, "loss": 0.133, "step": 15314 }, { "epoch": 2.1625247105337477, "grad_norm": 3.2537810170854105, "learning_rate": 3.816485779117035e-06, "loss": 0.1665, "step": 15315 }, { "epoch": 2.1626659135837336, "grad_norm": 3.0621114896382533, "learning_rate": 3.815287780766743e-06, "loss": 0.1639, "step": 15316 }, { "epoch": 2.1628071166337195, "grad_norm": 3.6768795872327686, "learning_rate": 3.8140899261464915e-06, "loss": 0.1923, "step": 15317 }, { "epoch": 2.1629483196837054, "grad_norm": 2.7463785874010997, "learning_rate": 3.8128922152841188e-06, "loss": 0.1391, "step": 15318 }, { "epoch": 2.1630895227336913, "grad_norm": 3.244986865456969, "learning_rate": 3.811694648207461e-06, "loss": 0.1863, "step": 15319 }, { "epoch": 2.163230725783677, "grad_norm": 4.673323706078541, "learning_rate": 3.8104972249443417e-06, "loss": 0.218, "step": 15320 }, { "epoch": 2.163371928833663, "grad_norm": 2.526117921874665, "learning_rate": 3.8092999455225967e-06, "loss": 0.1265, "step": 15321 }, { "epoch": 2.163513131883649, "grad_norm": 3.296483664903737, "learning_rate": 3.808102809970048e-06, "loss": 0.1766, "step": 15322 }, { "epoch": 2.163654334933635, "grad_norm": 3.789448211818931, "learning_rate": 3.806905818314515e-06, "loss": 0.1762, "step": 15323 }, { "epoch": 2.1637955379836207, "grad_norm": 3.9197739271259597, "learning_rate": 3.805708970583817e-06, "loss": 0.1974, "step": 15324 }, { "epoch": 2.1639367410336066, "grad_norm": 3.1000307937396285, "learning_rate": 3.8045122668057668e-06, "loss": 0.141, "step": 15325 }, { "epoch": 2.164077944083592, "grad_norm": 2.657344620693299, "learning_rate": 3.803315707008176e-06, "loss": 0.15, "step": 15326 }, { "epoch": 2.164219147133578, "grad_norm": 2.8089350673781137, "learning_rate": 3.8021192912188485e-06, "loss": 0.1478, "step": 15327 }, { "epoch": 2.1643603501835638, "grad_norm": 3.7857709409979274, "learning_rate": 3.800923019465599e-06, "loss": 0.168, "step": 15328 }, { "epoch": 2.1645015532335496, "grad_norm": 3.50589759910851, "learning_rate": 3.7997268917762185e-06, "loss": 0.1819, "step": 15329 }, { "epoch": 2.1646427562835355, "grad_norm": 2.2082483455816955, "learning_rate": 3.798530908178506e-06, "loss": 0.0905, "step": 15330 }, { "epoch": 2.1647839593335214, "grad_norm": 3.490753386117512, "learning_rate": 3.7973350687002584e-06, "loss": 0.1793, "step": 15331 }, { "epoch": 2.1649251623835073, "grad_norm": 2.4821707090911875, "learning_rate": 3.796139373369261e-06, "loss": 0.1213, "step": 15332 }, { "epoch": 2.165066365433493, "grad_norm": 3.0543426265133684, "learning_rate": 3.7949438222133085e-06, "loss": 0.1226, "step": 15333 }, { "epoch": 2.165207568483479, "grad_norm": 3.287532178360304, "learning_rate": 3.7937484152601824e-06, "loss": 0.1563, "step": 15334 }, { "epoch": 2.165348771533465, "grad_norm": 3.243183900110629, "learning_rate": 3.7925531525376624e-06, "loss": 0.1382, "step": 15335 }, { "epoch": 2.165489974583451, "grad_norm": 3.2933214377181224, "learning_rate": 3.79135803407353e-06, "loss": 0.2206, "step": 15336 }, { "epoch": 2.1656311776334367, "grad_norm": 3.0993382501902786, "learning_rate": 3.7901630598955485e-06, "loss": 0.1694, "step": 15337 }, { "epoch": 2.1657723806834226, "grad_norm": 3.3899711113670254, "learning_rate": 3.7889682300315e-06, "loss": 0.1695, "step": 15338 }, { "epoch": 2.1659135837334085, "grad_norm": 3.005882933121945, "learning_rate": 3.787773544509147e-06, "loss": 0.1497, "step": 15339 }, { "epoch": 2.1660547867833944, "grad_norm": 4.0668197087918605, "learning_rate": 3.7865790033562532e-06, "loss": 0.2043, "step": 15340 }, { "epoch": 2.1661959898333802, "grad_norm": 3.642277064534681, "learning_rate": 3.7853846066005807e-06, "loss": 0.2199, "step": 15341 }, { "epoch": 2.166337192883366, "grad_norm": 3.371790047851958, "learning_rate": 3.7841903542698855e-06, "loss": 0.1819, "step": 15342 }, { "epoch": 2.166478395933352, "grad_norm": 3.7317281704497307, "learning_rate": 3.7829962463919236e-06, "loss": 0.1712, "step": 15343 }, { "epoch": 2.166619598983338, "grad_norm": 3.5231949009561214, "learning_rate": 3.781802282994439e-06, "loss": 0.1687, "step": 15344 }, { "epoch": 2.1667608020333238, "grad_norm": 4.350786320114571, "learning_rate": 3.780608464105191e-06, "loss": 0.2178, "step": 15345 }, { "epoch": 2.1669020050833097, "grad_norm": 3.5252677665927776, "learning_rate": 3.7794147897519128e-06, "loss": 0.1751, "step": 15346 }, { "epoch": 2.1670432081332955, "grad_norm": 3.4246569735107126, "learning_rate": 3.778221259962349e-06, "loss": 0.1673, "step": 15347 }, { "epoch": 2.1671844111832814, "grad_norm": 3.3325876889875126, "learning_rate": 3.7770278747642364e-06, "loss": 0.1741, "step": 15348 }, { "epoch": 2.1673256142332673, "grad_norm": 2.907369636944353, "learning_rate": 3.7758346341853045e-06, "loss": 0.1463, "step": 15349 }, { "epoch": 2.167466817283253, "grad_norm": 3.784229481613177, "learning_rate": 3.7746415382532907e-06, "loss": 0.1882, "step": 15350 }, { "epoch": 2.167608020333239, "grad_norm": 3.8002700779615926, "learning_rate": 3.7734485869959193e-06, "loss": 0.1939, "step": 15351 }, { "epoch": 2.167749223383225, "grad_norm": 3.2231402742419397, "learning_rate": 3.7722557804409145e-06, "loss": 0.1108, "step": 15352 }, { "epoch": 2.167890426433211, "grad_norm": 2.6906017540948968, "learning_rate": 3.7710631186159984e-06, "loss": 0.1429, "step": 15353 }, { "epoch": 2.1680316294831967, "grad_norm": 3.60123040054035, "learning_rate": 3.7698706015488783e-06, "loss": 0.1755, "step": 15354 }, { "epoch": 2.1681728325331826, "grad_norm": 2.6837982470633093, "learning_rate": 3.7686782292672797e-06, "loss": 0.1457, "step": 15355 }, { "epoch": 2.1683140355831685, "grad_norm": 3.21164042358376, "learning_rate": 3.767486001798907e-06, "loss": 0.1484, "step": 15356 }, { "epoch": 2.1684552386331544, "grad_norm": 2.6316559880922816, "learning_rate": 3.7662939191714678e-06, "loss": 0.1393, "step": 15357 }, { "epoch": 2.1685964416831403, "grad_norm": 3.7521457753440535, "learning_rate": 3.7651019814126656e-06, "loss": 0.1746, "step": 15358 }, { "epoch": 2.168737644733126, "grad_norm": 3.6845159556381613, "learning_rate": 3.7639101885502015e-06, "loss": 0.1509, "step": 15359 }, { "epoch": 2.168878847783112, "grad_norm": 2.8416102219929327, "learning_rate": 3.7627185406117707e-06, "loss": 0.1396, "step": 15360 }, { "epoch": 2.169020050833098, "grad_norm": 3.3991676958603536, "learning_rate": 3.7615270376250646e-06, "loss": 0.2031, "step": 15361 }, { "epoch": 2.169161253883084, "grad_norm": 3.0516482272611967, "learning_rate": 3.7603356796177824e-06, "loss": 0.2067, "step": 15362 }, { "epoch": 2.1693024569330697, "grad_norm": 3.755994710291307, "learning_rate": 3.7591444666176004e-06, "loss": 0.2202, "step": 15363 }, { "epoch": 2.1694436599830556, "grad_norm": 2.9050947108982568, "learning_rate": 3.7579533986522065e-06, "loss": 0.1587, "step": 15364 }, { "epoch": 2.1695848630330414, "grad_norm": 3.0943909538038774, "learning_rate": 3.7567624757492795e-06, "loss": 0.1763, "step": 15365 }, { "epoch": 2.1697260660830273, "grad_norm": 4.203254514419069, "learning_rate": 3.755571697936493e-06, "loss": 0.2048, "step": 15366 }, { "epoch": 2.169867269133013, "grad_norm": 2.4446060555671894, "learning_rate": 3.754381065241527e-06, "loss": 0.139, "step": 15367 }, { "epoch": 2.170008472182999, "grad_norm": 2.9877278217025465, "learning_rate": 3.7531905776920474e-06, "loss": 0.1426, "step": 15368 }, { "epoch": 2.170149675232985, "grad_norm": 2.569160547657622, "learning_rate": 3.7520002353157213e-06, "loss": 0.1541, "step": 15369 }, { "epoch": 2.170290878282971, "grad_norm": 3.3145568733824993, "learning_rate": 3.7508100381402157e-06, "loss": 0.1545, "step": 15370 }, { "epoch": 2.1704320813329567, "grad_norm": 3.445364492838091, "learning_rate": 3.7496199861931815e-06, "loss": 0.141, "step": 15371 }, { "epoch": 2.1705732843829426, "grad_norm": 2.8588574401648783, "learning_rate": 3.7484300795022765e-06, "loss": 0.1223, "step": 15372 }, { "epoch": 2.1707144874329285, "grad_norm": 2.7249217650030375, "learning_rate": 3.74724031809516e-06, "loss": 0.1417, "step": 15373 }, { "epoch": 2.1708556904829144, "grad_norm": 3.3091110139399564, "learning_rate": 3.7460507019994775e-06, "loss": 0.1378, "step": 15374 }, { "epoch": 2.1709968935329003, "grad_norm": 3.712664975473104, "learning_rate": 3.7448612312428765e-06, "loss": 0.1754, "step": 15375 }, { "epoch": 2.171138096582886, "grad_norm": 2.817819294199809, "learning_rate": 3.743671905852999e-06, "loss": 0.1415, "step": 15376 }, { "epoch": 2.171279299632872, "grad_norm": 3.2452438491015685, "learning_rate": 3.7424827258574837e-06, "loss": 0.1871, "step": 15377 }, { "epoch": 2.171420502682858, "grad_norm": 2.813564694746286, "learning_rate": 3.741293691283968e-06, "loss": 0.1259, "step": 15378 }, { "epoch": 2.171561705732844, "grad_norm": 3.1138021737627026, "learning_rate": 3.740104802160085e-06, "loss": 0.1872, "step": 15379 }, { "epoch": 2.1717029087828297, "grad_norm": 3.052937746372439, "learning_rate": 3.738916058513462e-06, "loss": 0.1705, "step": 15380 }, { "epoch": 2.1718441118328156, "grad_norm": 2.925382643249554, "learning_rate": 3.7377274603717262e-06, "loss": 0.1757, "step": 15381 }, { "epoch": 2.1719853148828014, "grad_norm": 4.173324737980824, "learning_rate": 3.7365390077624985e-06, "loss": 0.1868, "step": 15382 }, { "epoch": 2.1721265179327873, "grad_norm": 3.317451027431448, "learning_rate": 3.7353507007134005e-06, "loss": 0.1904, "step": 15383 }, { "epoch": 2.172267720982773, "grad_norm": 3.0507985019053985, "learning_rate": 3.734162539252043e-06, "loss": 0.133, "step": 15384 }, { "epoch": 2.172408924032759, "grad_norm": 3.338489159534771, "learning_rate": 3.7329745234060457e-06, "loss": 0.1546, "step": 15385 }, { "epoch": 2.172550127082745, "grad_norm": 2.5675335271428463, "learning_rate": 3.7317866532030145e-06, "loss": 0.1563, "step": 15386 }, { "epoch": 2.172691330132731, "grad_norm": 2.2702537802327427, "learning_rate": 3.730598928670557e-06, "loss": 0.1227, "step": 15387 }, { "epoch": 2.1728325331827167, "grad_norm": 2.6190434464765997, "learning_rate": 3.7294113498362695e-06, "loss": 0.1271, "step": 15388 }, { "epoch": 2.1729737362327026, "grad_norm": 3.629753094343204, "learning_rate": 3.7282239167277513e-06, "loss": 0.1917, "step": 15389 }, { "epoch": 2.1731149392826885, "grad_norm": 3.8668453061453554, "learning_rate": 3.7270366293726033e-06, "loss": 0.1975, "step": 15390 }, { "epoch": 2.1732561423326744, "grad_norm": 3.0225970713047356, "learning_rate": 3.7258494877984154e-06, "loss": 0.1692, "step": 15391 }, { "epoch": 2.1733973453826603, "grad_norm": 3.327321908791253, "learning_rate": 3.7246624920327755e-06, "loss": 0.1612, "step": 15392 }, { "epoch": 2.173538548432646, "grad_norm": 3.0419769941703856, "learning_rate": 3.7234756421032694e-06, "loss": 0.1455, "step": 15393 }, { "epoch": 2.173679751482632, "grad_norm": 3.0845212846518533, "learning_rate": 3.722288938037478e-06, "loss": 0.156, "step": 15394 }, { "epoch": 2.173820954532618, "grad_norm": 2.9778173084114212, "learning_rate": 3.721102379862982e-06, "loss": 0.1469, "step": 15395 }, { "epoch": 2.173962157582604, "grad_norm": 2.8932410477389476, "learning_rate": 3.719915967607355e-06, "loss": 0.1483, "step": 15396 }, { "epoch": 2.1741033606325897, "grad_norm": 3.2071600955915005, "learning_rate": 3.718729701298167e-06, "loss": 0.1381, "step": 15397 }, { "epoch": 2.1742445636825756, "grad_norm": 2.857276049412006, "learning_rate": 3.71754358096299e-06, "loss": 0.1323, "step": 15398 }, { "epoch": 2.1743857667325615, "grad_norm": 2.4863503600481924, "learning_rate": 3.716357606629386e-06, "loss": 0.1333, "step": 15399 }, { "epoch": 2.1745269697825473, "grad_norm": 4.1421598585326995, "learning_rate": 3.7151717783249175e-06, "loss": 0.1688, "step": 15400 }, { "epoch": 2.174668172832533, "grad_norm": 2.8460378231879417, "learning_rate": 3.71398609607714e-06, "loss": 0.1227, "step": 15401 }, { "epoch": 2.174809375882519, "grad_norm": 3.8582424314658006, "learning_rate": 3.712800559913613e-06, "loss": 0.1703, "step": 15402 }, { "epoch": 2.174950578932505, "grad_norm": 3.2118177604130866, "learning_rate": 3.7116151698618874e-06, "loss": 0.1601, "step": 15403 }, { "epoch": 2.175091781982491, "grad_norm": 4.1378949072083575, "learning_rate": 3.7104299259495113e-06, "loss": 0.2213, "step": 15404 }, { "epoch": 2.1752329850324768, "grad_norm": 3.3155670257078507, "learning_rate": 3.709244828204025e-06, "loss": 0.1832, "step": 15405 }, { "epoch": 2.1753741880824626, "grad_norm": 3.0999955812339794, "learning_rate": 3.7080598766529686e-06, "loss": 0.1466, "step": 15406 }, { "epoch": 2.1755153911324485, "grad_norm": 3.5242071960954253, "learning_rate": 3.7068750713238867e-06, "loss": 0.1811, "step": 15407 }, { "epoch": 2.1756565941824344, "grad_norm": 3.150394533370885, "learning_rate": 3.7056904122443105e-06, "loss": 0.1602, "step": 15408 }, { "epoch": 2.1757977972324203, "grad_norm": 3.414828098630438, "learning_rate": 3.7045058994417702e-06, "loss": 0.1608, "step": 15409 }, { "epoch": 2.175939000282406, "grad_norm": 3.4177649883738255, "learning_rate": 3.7033215329437945e-06, "loss": 0.1634, "step": 15410 }, { "epoch": 2.176080203332392, "grad_norm": 4.0936006415291795, "learning_rate": 3.702137312777907e-06, "loss": 0.2131, "step": 15411 }, { "epoch": 2.176221406382378, "grad_norm": 3.292819154971959, "learning_rate": 3.7009532389716283e-06, "loss": 0.1683, "step": 15412 }, { "epoch": 2.176362609432364, "grad_norm": 2.566240218603671, "learning_rate": 3.6997693115524767e-06, "loss": 0.1377, "step": 15413 }, { "epoch": 2.1765038124823497, "grad_norm": 2.9993635308870936, "learning_rate": 3.6985855305479645e-06, "loss": 0.1409, "step": 15414 }, { "epoch": 2.1766450155323356, "grad_norm": 3.487226435877667, "learning_rate": 3.697401895985604e-06, "loss": 0.2029, "step": 15415 }, { "epoch": 2.1767862185823215, "grad_norm": 3.082418296204254, "learning_rate": 3.696218407892901e-06, "loss": 0.157, "step": 15416 }, { "epoch": 2.1769274216323073, "grad_norm": 2.9392330969402853, "learning_rate": 3.69503506629736e-06, "loss": 0.1356, "step": 15417 }, { "epoch": 2.1770686246822932, "grad_norm": 3.1772935088259664, "learning_rate": 3.6938518712264783e-06, "loss": 0.1537, "step": 15418 }, { "epoch": 2.177209827732279, "grad_norm": 2.7973819926962378, "learning_rate": 3.692668822707758e-06, "loss": 0.1101, "step": 15419 }, { "epoch": 2.177351030782265, "grad_norm": 3.5444023652906864, "learning_rate": 3.6914859207686916e-06, "loss": 0.1875, "step": 15420 }, { "epoch": 2.177492233832251, "grad_norm": 3.07895743309229, "learning_rate": 3.6903031654367704e-06, "loss": 0.1655, "step": 15421 }, { "epoch": 2.1776334368822368, "grad_norm": 3.431575091072321, "learning_rate": 3.689120556739475e-06, "loss": 0.1765, "step": 15422 }, { "epoch": 2.1777746399322226, "grad_norm": 3.0308792051961704, "learning_rate": 3.68793809470429e-06, "loss": 0.18, "step": 15423 }, { "epoch": 2.1779158429822085, "grad_norm": 3.2060547388156104, "learning_rate": 3.6867557793587005e-06, "loss": 0.1604, "step": 15424 }, { "epoch": 2.1780570460321944, "grad_norm": 2.7486338960706265, "learning_rate": 3.6855736107301798e-06, "loss": 0.1486, "step": 15425 }, { "epoch": 2.1781982490821803, "grad_norm": 3.371587265788391, "learning_rate": 3.6843915888462022e-06, "loss": 0.203, "step": 15426 }, { "epoch": 2.178339452132166, "grad_norm": 2.1483417185334965, "learning_rate": 3.683209713734237e-06, "loss": 0.1202, "step": 15427 }, { "epoch": 2.178480655182152, "grad_norm": 2.8470306628142428, "learning_rate": 3.682027985421749e-06, "loss": 0.1494, "step": 15428 }, { "epoch": 2.178621858232138, "grad_norm": 3.0798969842550004, "learning_rate": 3.6808464039362025e-06, "loss": 0.1611, "step": 15429 }, { "epoch": 2.178763061282124, "grad_norm": 2.8322921348360186, "learning_rate": 3.6796649693050568e-06, "loss": 0.1306, "step": 15430 }, { "epoch": 2.1789042643321097, "grad_norm": 3.625112437056275, "learning_rate": 3.6784836815557667e-06, "loss": 0.2418, "step": 15431 }, { "epoch": 2.1790454673820956, "grad_norm": 3.0233691197743253, "learning_rate": 3.677302540715787e-06, "loss": 0.1293, "step": 15432 }, { "epoch": 2.1791866704320815, "grad_norm": 3.874934332062811, "learning_rate": 3.6761215468125645e-06, "loss": 0.2121, "step": 15433 }, { "epoch": 2.1793278734820674, "grad_norm": 4.180066645550741, "learning_rate": 3.674940699873547e-06, "loss": 0.1549, "step": 15434 }, { "epoch": 2.1794690765320532, "grad_norm": 2.6413327577496966, "learning_rate": 3.673759999926173e-06, "loss": 0.152, "step": 15435 }, { "epoch": 2.179610279582039, "grad_norm": 3.541495736770971, "learning_rate": 3.672579446997887e-06, "loss": 0.1903, "step": 15436 }, { "epoch": 2.179751482632025, "grad_norm": 3.654485372123263, "learning_rate": 3.6713990411161226e-06, "loss": 0.1529, "step": 15437 }, { "epoch": 2.179892685682011, "grad_norm": 2.8836447095673092, "learning_rate": 3.6702187823083147e-06, "loss": 0.1716, "step": 15438 }, { "epoch": 2.1800338887319968, "grad_norm": 2.8071583988094218, "learning_rate": 3.6690386706018845e-06, "loss": 0.1875, "step": 15439 }, { "epoch": 2.1801750917819827, "grad_norm": 2.967512565000615, "learning_rate": 3.6678587060242586e-06, "loss": 0.1543, "step": 15440 }, { "epoch": 2.1803162948319685, "grad_norm": 2.9725164212249755, "learning_rate": 3.666678888602866e-06, "loss": 0.1588, "step": 15441 }, { "epoch": 2.1804574978819544, "grad_norm": 3.066526127589556, "learning_rate": 3.6654992183651193e-06, "loss": 0.1544, "step": 15442 }, { "epoch": 2.1805987009319403, "grad_norm": 2.6982121110481065, "learning_rate": 3.6643196953384364e-06, "loss": 0.1409, "step": 15443 }, { "epoch": 2.180739903981926, "grad_norm": 3.3729030614087705, "learning_rate": 3.663140319550227e-06, "loss": 0.2017, "step": 15444 }, { "epoch": 2.180881107031912, "grad_norm": 3.1231475995343825, "learning_rate": 3.6619610910279002e-06, "loss": 0.1847, "step": 15445 }, { "epoch": 2.181022310081898, "grad_norm": 3.5244871861513847, "learning_rate": 3.66078200979886e-06, "loss": 0.2072, "step": 15446 }, { "epoch": 2.181163513131884, "grad_norm": 3.7962253937521853, "learning_rate": 3.659603075890509e-06, "loss": 0.1711, "step": 15447 }, { "epoch": 2.1813047161818697, "grad_norm": 2.752664722262145, "learning_rate": 3.6584242893302436e-06, "loss": 0.1511, "step": 15448 }, { "epoch": 2.1814459192318556, "grad_norm": 3.0628206682511583, "learning_rate": 3.657245650145459e-06, "loss": 0.1665, "step": 15449 }, { "epoch": 2.1815871222818415, "grad_norm": 3.2191298711439855, "learning_rate": 3.6560671583635467e-06, "loss": 0.1493, "step": 15450 }, { "epoch": 2.1817283253318274, "grad_norm": 2.6842545405288023, "learning_rate": 3.6548888140118943e-06, "loss": 0.1125, "step": 15451 }, { "epoch": 2.1818695283818133, "grad_norm": 3.534330896928706, "learning_rate": 3.6537106171178817e-06, "loss": 0.1697, "step": 15452 }, { "epoch": 2.182010731431799, "grad_norm": 2.418358411102102, "learning_rate": 3.652532567708896e-06, "loss": 0.096, "step": 15453 }, { "epoch": 2.1821519344817846, "grad_norm": 3.14295379651231, "learning_rate": 3.651354665812313e-06, "loss": 0.1307, "step": 15454 }, { "epoch": 2.1822931375317705, "grad_norm": 2.7656813557578332, "learning_rate": 3.6501769114555098e-06, "loss": 0.1346, "step": 15455 }, { "epoch": 2.1824343405817563, "grad_norm": 3.6881461191630525, "learning_rate": 3.648999304665849e-06, "loss": 0.1808, "step": 15456 }, { "epoch": 2.182575543631742, "grad_norm": 3.534165451754848, "learning_rate": 3.6478218454706983e-06, "loss": 0.1695, "step": 15457 }, { "epoch": 2.182716746681728, "grad_norm": 2.6771873401821042, "learning_rate": 3.6466445338974276e-06, "loss": 0.1483, "step": 15458 }, { "epoch": 2.182857949731714, "grad_norm": 3.6080116822824833, "learning_rate": 3.6454673699733944e-06, "loss": 0.1412, "step": 15459 }, { "epoch": 2.1829991527817, "grad_norm": 3.5758350250237045, "learning_rate": 3.6442903537259556e-06, "loss": 0.2014, "step": 15460 }, { "epoch": 2.1831403558316858, "grad_norm": 3.158750203895158, "learning_rate": 3.6431134851824636e-06, "loss": 0.191, "step": 15461 }, { "epoch": 2.1832815588816716, "grad_norm": 3.8110112606242406, "learning_rate": 3.6419367643702684e-06, "loss": 0.1701, "step": 15462 }, { "epoch": 2.1834227619316575, "grad_norm": 3.2163903311923443, "learning_rate": 3.640760191316718e-06, "loss": 0.165, "step": 15463 }, { "epoch": 2.1835639649816434, "grad_norm": 3.193217910165306, "learning_rate": 3.6395837660491538e-06, "loss": 0.1494, "step": 15464 }, { "epoch": 2.1837051680316293, "grad_norm": 3.5142204541464577, "learning_rate": 3.638407488594916e-06, "loss": 0.1474, "step": 15465 }, { "epoch": 2.183846371081615, "grad_norm": 3.1809566022361935, "learning_rate": 3.637231358981341e-06, "loss": 0.1577, "step": 15466 }, { "epoch": 2.183987574131601, "grad_norm": 3.5099659995980295, "learning_rate": 3.636055377235762e-06, "loss": 0.1692, "step": 15467 }, { "epoch": 2.184128777181587, "grad_norm": 3.2532870930739692, "learning_rate": 3.6348795433855064e-06, "loss": 0.1542, "step": 15468 }, { "epoch": 2.184269980231573, "grad_norm": 3.2782038186335765, "learning_rate": 3.6337038574578988e-06, "loss": 0.1698, "step": 15469 }, { "epoch": 2.1844111832815587, "grad_norm": 3.5817459850788276, "learning_rate": 3.6325283194802675e-06, "loss": 0.1871, "step": 15470 }, { "epoch": 2.1845523863315446, "grad_norm": 2.6448900413850946, "learning_rate": 3.6313529294799275e-06, "loss": 0.16, "step": 15471 }, { "epoch": 2.1846935893815305, "grad_norm": 2.5208165213065676, "learning_rate": 3.6301776874841997e-06, "loss": 0.1581, "step": 15472 }, { "epoch": 2.1848347924315163, "grad_norm": 3.1225711057643695, "learning_rate": 3.6290025935203864e-06, "loss": 0.1898, "step": 15473 }, { "epoch": 2.1849759954815022, "grad_norm": 3.403694052392181, "learning_rate": 3.6278276476158025e-06, "loss": 0.1458, "step": 15474 }, { "epoch": 2.185117198531488, "grad_norm": 3.1226971520894713, "learning_rate": 3.6266528497977495e-06, "loss": 0.1704, "step": 15475 }, { "epoch": 2.185258401581474, "grad_norm": 3.0105009340355338, "learning_rate": 3.625478200093534e-06, "loss": 0.1883, "step": 15476 }, { "epoch": 2.18539960463146, "grad_norm": 3.2162702038479525, "learning_rate": 3.6243036985304525e-06, "loss": 0.1557, "step": 15477 }, { "epoch": 2.1855408076814458, "grad_norm": 3.246406867101034, "learning_rate": 3.6231293451357994e-06, "loss": 0.1541, "step": 15478 }, { "epoch": 2.1856820107314316, "grad_norm": 3.1570231730553378, "learning_rate": 3.621955139936867e-06, "loss": 0.1598, "step": 15479 }, { "epoch": 2.1858232137814175, "grad_norm": 3.0622882854010336, "learning_rate": 3.6207810829609414e-06, "loss": 0.1443, "step": 15480 }, { "epoch": 2.1859644168314034, "grad_norm": 3.2430220739027398, "learning_rate": 3.6196071742353087e-06, "loss": 0.1617, "step": 15481 }, { "epoch": 2.1861056198813893, "grad_norm": 3.169693848274505, "learning_rate": 3.6184334137872514e-06, "loss": 0.1687, "step": 15482 }, { "epoch": 2.186246822931375, "grad_norm": 2.869992721617655, "learning_rate": 3.6172598016440443e-06, "loss": 0.137, "step": 15483 }, { "epoch": 2.186388025981361, "grad_norm": 3.3487323292351765, "learning_rate": 3.6160863378329636e-06, "loss": 0.1711, "step": 15484 }, { "epoch": 2.186529229031347, "grad_norm": 2.8589989866442944, "learning_rate": 3.614913022381279e-06, "loss": 0.135, "step": 15485 }, { "epoch": 2.186670432081333, "grad_norm": 2.3762411535808687, "learning_rate": 3.613739855316257e-06, "loss": 0.1128, "step": 15486 }, { "epoch": 2.1868116351313187, "grad_norm": 3.4808161481006477, "learning_rate": 3.6125668366651613e-06, "loss": 0.1975, "step": 15487 }, { "epoch": 2.1869528381813046, "grad_norm": 2.9210077402670014, "learning_rate": 3.6113939664552567e-06, "loss": 0.1385, "step": 15488 }, { "epoch": 2.1870940412312905, "grad_norm": 3.288617646368304, "learning_rate": 3.6102212447137995e-06, "loss": 0.1307, "step": 15489 }, { "epoch": 2.1872352442812764, "grad_norm": 3.410335665689771, "learning_rate": 3.6090486714680396e-06, "loss": 0.1823, "step": 15490 }, { "epoch": 2.1873764473312622, "grad_norm": 3.660905365325739, "learning_rate": 3.607876246745228e-06, "loss": 0.202, "step": 15491 }, { "epoch": 2.187517650381248, "grad_norm": 3.020272203245384, "learning_rate": 3.606703970572607e-06, "loss": 0.1671, "step": 15492 }, { "epoch": 2.187658853431234, "grad_norm": 2.6932189650276976, "learning_rate": 3.6055318429774287e-06, "loss": 0.1351, "step": 15493 }, { "epoch": 2.18780005648122, "grad_norm": 3.0913154482352074, "learning_rate": 3.6043598639869293e-06, "loss": 0.1663, "step": 15494 }, { "epoch": 2.1879412595312058, "grad_norm": 2.7071655431665045, "learning_rate": 3.6031880336283453e-06, "loss": 0.1411, "step": 15495 }, { "epoch": 2.1880824625811917, "grad_norm": 3.2039178833115938, "learning_rate": 3.6020163519289077e-06, "loss": 0.1524, "step": 15496 }, { "epoch": 2.1882236656311775, "grad_norm": 3.205883948218298, "learning_rate": 3.6008448189158472e-06, "loss": 0.1371, "step": 15497 }, { "epoch": 2.1883648686811634, "grad_norm": 2.8837696156068633, "learning_rate": 3.5996734346163897e-06, "loss": 0.1116, "step": 15498 }, { "epoch": 2.1885060717311493, "grad_norm": 3.0605450916357606, "learning_rate": 3.5985021990577575e-06, "loss": 0.1363, "step": 15499 }, { "epoch": 2.188647274781135, "grad_norm": 2.960532549716206, "learning_rate": 3.5973311122671695e-06, "loss": 0.1575, "step": 15500 }, { "epoch": 2.188788477831121, "grad_norm": 2.556969891109087, "learning_rate": 3.596160174271841e-06, "loss": 0.1097, "step": 15501 }, { "epoch": 2.188929680881107, "grad_norm": 2.393406266888482, "learning_rate": 3.594989385098985e-06, "loss": 0.0893, "step": 15502 }, { "epoch": 2.189070883931093, "grad_norm": 2.87106077466674, "learning_rate": 3.5938187447758098e-06, "loss": 0.1418, "step": 15503 }, { "epoch": 2.1892120869810787, "grad_norm": 3.498501298338676, "learning_rate": 3.592648253329516e-06, "loss": 0.1455, "step": 15504 }, { "epoch": 2.1893532900310646, "grad_norm": 3.312113969650062, "learning_rate": 3.5914779107873145e-06, "loss": 0.1903, "step": 15505 }, { "epoch": 2.1894944930810505, "grad_norm": 3.3138531253366783, "learning_rate": 3.590307717176401e-06, "loss": 0.1548, "step": 15506 }, { "epoch": 2.1896356961310364, "grad_norm": 3.4241178725784516, "learning_rate": 3.5891376725239648e-06, "loss": 0.1396, "step": 15507 }, { "epoch": 2.1897768991810223, "grad_norm": 3.8812288976148923, "learning_rate": 3.587967776857201e-06, "loss": 0.1762, "step": 15508 }, { "epoch": 2.189918102231008, "grad_norm": 3.0153213031071555, "learning_rate": 3.5867980302032925e-06, "loss": 0.1231, "step": 15509 }, { "epoch": 2.190059305280994, "grad_norm": 3.689432467746225, "learning_rate": 3.585628432589432e-06, "loss": 0.1773, "step": 15510 }, { "epoch": 2.19020050833098, "grad_norm": 4.087967401312385, "learning_rate": 3.5844589840427968e-06, "loss": 0.1768, "step": 15511 }, { "epoch": 2.190341711380966, "grad_norm": 2.7355034519757053, "learning_rate": 3.583289684590564e-06, "loss": 0.1159, "step": 15512 }, { "epoch": 2.1904829144309517, "grad_norm": 3.32114138797245, "learning_rate": 3.5821205342599075e-06, "loss": 0.132, "step": 15513 }, { "epoch": 2.1906241174809375, "grad_norm": 3.3275525807685544, "learning_rate": 3.5809515330779985e-06, "loss": 0.1565, "step": 15514 }, { "epoch": 2.1907653205309234, "grad_norm": 3.401397357156667, "learning_rate": 3.579782681072004e-06, "loss": 0.1589, "step": 15515 }, { "epoch": 2.1909065235809093, "grad_norm": 3.896007963400888, "learning_rate": 3.5786139782690867e-06, "loss": 0.1544, "step": 15516 }, { "epoch": 2.191047726630895, "grad_norm": 3.5068807068076473, "learning_rate": 3.5774454246964075e-06, "loss": 0.15, "step": 15517 }, { "epoch": 2.191188929680881, "grad_norm": 2.346000358708473, "learning_rate": 3.5762770203811225e-06, "loss": 0.0986, "step": 15518 }, { "epoch": 2.191330132730867, "grad_norm": 4.108947430965978, "learning_rate": 3.5751087653503856e-06, "loss": 0.1797, "step": 15519 }, { "epoch": 2.191471335780853, "grad_norm": 3.210085783801295, "learning_rate": 3.5739406596313474e-06, "loss": 0.1647, "step": 15520 }, { "epoch": 2.1916125388308387, "grad_norm": 3.149979224664698, "learning_rate": 3.5727727032511483e-06, "loss": 0.1503, "step": 15521 }, { "epoch": 2.1917537418808246, "grad_norm": 3.0772933012260206, "learning_rate": 3.57160489623694e-06, "loss": 0.1483, "step": 15522 }, { "epoch": 2.1918949449308105, "grad_norm": 3.0396943831990257, "learning_rate": 3.5704372386158602e-06, "loss": 0.1241, "step": 15523 }, { "epoch": 2.1920361479807964, "grad_norm": 3.1724040118368615, "learning_rate": 3.56926973041504e-06, "loss": 0.1607, "step": 15524 }, { "epoch": 2.1921773510307823, "grad_norm": 3.56742572941602, "learning_rate": 3.5681023716616136e-06, "loss": 0.2228, "step": 15525 }, { "epoch": 2.192318554080768, "grad_norm": 3.046897843931735, "learning_rate": 3.5669351623827077e-06, "loss": 0.1642, "step": 15526 }, { "epoch": 2.192459757130754, "grad_norm": 2.889145676504674, "learning_rate": 3.5657681026054523e-06, "loss": 0.1663, "step": 15527 }, { "epoch": 2.19260096018074, "grad_norm": 3.598844002476634, "learning_rate": 3.564601192356968e-06, "loss": 0.1782, "step": 15528 }, { "epoch": 2.192742163230726, "grad_norm": 3.6396644539254535, "learning_rate": 3.5634344316643734e-06, "loss": 0.1486, "step": 15529 }, { "epoch": 2.1928833662807117, "grad_norm": 3.5003476093235104, "learning_rate": 3.5622678205547824e-06, "loss": 0.1812, "step": 15530 }, { "epoch": 2.1930245693306976, "grad_norm": 4.349272828361988, "learning_rate": 3.5611013590553066e-06, "loss": 0.2196, "step": 15531 }, { "epoch": 2.1931657723806834, "grad_norm": 3.1974056349920827, "learning_rate": 3.559935047193055e-06, "loss": 0.1188, "step": 15532 }, { "epoch": 2.1933069754306693, "grad_norm": 3.158794859379246, "learning_rate": 3.558768884995132e-06, "loss": 0.1648, "step": 15533 }, { "epoch": 2.193448178480655, "grad_norm": 2.9644594413014334, "learning_rate": 3.557602872488638e-06, "loss": 0.1459, "step": 15534 }, { "epoch": 2.193589381530641, "grad_norm": 3.203096089221971, "learning_rate": 3.5564370097006706e-06, "loss": 0.1641, "step": 15535 }, { "epoch": 2.193730584580627, "grad_norm": 3.29050921376092, "learning_rate": 3.5552712966583247e-06, "loss": 0.1559, "step": 15536 }, { "epoch": 2.193871787630613, "grad_norm": 2.928743308749797, "learning_rate": 3.5541057333886906e-06, "loss": 0.1482, "step": 15537 }, { "epoch": 2.1940129906805987, "grad_norm": 2.9213264167337436, "learning_rate": 3.5529403199188517e-06, "loss": 0.174, "step": 15538 }, { "epoch": 2.1941541937305846, "grad_norm": 4.008914285312878, "learning_rate": 3.5517750562759025e-06, "loss": 0.1611, "step": 15539 }, { "epoch": 2.1942953967805705, "grad_norm": 2.977424809507221, "learning_rate": 3.5506099424869133e-06, "loss": 0.1398, "step": 15540 }, { "epoch": 2.1944365998305564, "grad_norm": 2.7763184853018874, "learning_rate": 3.5494449785789633e-06, "loss": 0.1376, "step": 15541 }, { "epoch": 2.1945778028805423, "grad_norm": 2.6082195398341126, "learning_rate": 3.5482801645791266e-06, "loss": 0.1344, "step": 15542 }, { "epoch": 2.194719005930528, "grad_norm": 3.660159219676567, "learning_rate": 3.5471155005144685e-06, "loss": 0.1815, "step": 15543 }, { "epoch": 2.194860208980514, "grad_norm": 3.7277000069190387, "learning_rate": 3.545950986412063e-06, "loss": 0.1983, "step": 15544 }, { "epoch": 2.1950014120305, "grad_norm": 3.239905666572818, "learning_rate": 3.5447866222989703e-06, "loss": 0.1383, "step": 15545 }, { "epoch": 2.195142615080486, "grad_norm": 3.696004265027511, "learning_rate": 3.5436224082022476e-06, "loss": 0.1737, "step": 15546 }, { "epoch": 2.1952838181304717, "grad_norm": 3.6919058263885693, "learning_rate": 3.5424583441489525e-06, "loss": 0.1749, "step": 15547 }, { "epoch": 2.1954250211804576, "grad_norm": 2.9146650316629095, "learning_rate": 3.5412944301661356e-06, "loss": 0.1354, "step": 15548 }, { "epoch": 2.1955662242304435, "grad_norm": 2.8525384281847868, "learning_rate": 3.5401306662808476e-06, "loss": 0.1452, "step": 15549 }, { "epoch": 2.1957074272804293, "grad_norm": 3.4674071214332702, "learning_rate": 3.5389670525201335e-06, "loss": 0.1594, "step": 15550 }, { "epoch": 2.195848630330415, "grad_norm": 3.744334989956807, "learning_rate": 3.537803588911034e-06, "loss": 0.168, "step": 15551 }, { "epoch": 2.195989833380401, "grad_norm": 2.628210090421226, "learning_rate": 3.5366402754805885e-06, "loss": 0.111, "step": 15552 }, { "epoch": 2.196131036430387, "grad_norm": 3.6926593068564064, "learning_rate": 3.5354771122558317e-06, "loss": 0.1899, "step": 15553 }, { "epoch": 2.196272239480373, "grad_norm": 3.050958870320738, "learning_rate": 3.5343140992637946e-06, "loss": 0.1469, "step": 15554 }, { "epoch": 2.1964134425303588, "grad_norm": 4.066587017311011, "learning_rate": 3.5331512365315035e-06, "loss": 0.1865, "step": 15555 }, { "epoch": 2.1965546455803446, "grad_norm": 3.174462163781929, "learning_rate": 3.531988524085991e-06, "loss": 0.1781, "step": 15556 }, { "epoch": 2.1966958486303305, "grad_norm": 2.958540575444288, "learning_rate": 3.5308259619542675e-06, "loss": 0.1338, "step": 15557 }, { "epoch": 2.1968370516803164, "grad_norm": 3.4296430864089653, "learning_rate": 3.5296635501633558e-06, "loss": 0.1531, "step": 15558 }, { "epoch": 2.1969782547303023, "grad_norm": 3.420116708503005, "learning_rate": 3.5285012887402693e-06, "loss": 0.1668, "step": 15559 }, { "epoch": 2.197119457780288, "grad_norm": 2.604031384113819, "learning_rate": 3.5273391777120136e-06, "loss": 0.1446, "step": 15560 }, { "epoch": 2.197260660830274, "grad_norm": 3.5575521341827065, "learning_rate": 3.5261772171056043e-06, "loss": 0.165, "step": 15561 }, { "epoch": 2.19740186388026, "grad_norm": 2.5490296077341927, "learning_rate": 3.525015406948039e-06, "loss": 0.1243, "step": 15562 }, { "epoch": 2.197543066930246, "grad_norm": 2.9491939267605267, "learning_rate": 3.5238537472663214e-06, "loss": 0.1397, "step": 15563 }, { "epoch": 2.1976842699802317, "grad_norm": 2.79821879202331, "learning_rate": 3.522692238087445e-06, "loss": 0.1281, "step": 15564 }, { "epoch": 2.1978254730302176, "grad_norm": 4.1682155702094414, "learning_rate": 3.5215308794384073e-06, "loss": 0.221, "step": 15565 }, { "epoch": 2.1979666760802035, "grad_norm": 3.7080448116804634, "learning_rate": 3.5203696713461866e-06, "loss": 0.1559, "step": 15566 }, { "epoch": 2.1981078791301893, "grad_norm": 2.792999718789201, "learning_rate": 3.5192086138377803e-06, "loss": 0.1457, "step": 15567 }, { "epoch": 2.1982490821801752, "grad_norm": 3.6272383634385177, "learning_rate": 3.5180477069401664e-06, "loss": 0.1744, "step": 15568 }, { "epoch": 2.198390285230161, "grad_norm": 3.53276842630283, "learning_rate": 3.516886950680326e-06, "loss": 0.198, "step": 15569 }, { "epoch": 2.198531488280147, "grad_norm": 2.527188342883634, "learning_rate": 3.515726345085232e-06, "loss": 0.1339, "step": 15570 }, { "epoch": 2.198672691330133, "grad_norm": 2.9393858275727687, "learning_rate": 3.514565890181857e-06, "loss": 0.1429, "step": 15571 }, { "epoch": 2.1988138943801188, "grad_norm": 3.6430317408716233, "learning_rate": 3.5134055859971674e-06, "loss": 0.1757, "step": 15572 }, { "epoch": 2.1989550974301046, "grad_norm": 3.649663171762288, "learning_rate": 3.5122454325581368e-06, "loss": 0.1391, "step": 15573 }, { "epoch": 2.1990963004800905, "grad_norm": 3.5075618484218074, "learning_rate": 3.511085429891716e-06, "loss": 0.1453, "step": 15574 }, { "epoch": 2.1992375035300764, "grad_norm": 2.6316673569793068, "learning_rate": 3.5099255780248696e-06, "loss": 0.1384, "step": 15575 }, { "epoch": 2.1993787065800623, "grad_norm": 3.39901094578447, "learning_rate": 3.508765876984549e-06, "loss": 0.1402, "step": 15576 }, { "epoch": 2.199519909630048, "grad_norm": 3.047342225131142, "learning_rate": 3.5076063267977055e-06, "loss": 0.1509, "step": 15577 }, { "epoch": 2.199661112680034, "grad_norm": 3.932402688371889, "learning_rate": 3.5064469274912847e-06, "loss": 0.2017, "step": 15578 }, { "epoch": 2.19980231573002, "grad_norm": 2.6601033492229433, "learning_rate": 3.5052876790922364e-06, "loss": 0.144, "step": 15579 }, { "epoch": 2.199943518780006, "grad_norm": 3.4448177441183034, "learning_rate": 3.504128581627497e-06, "loss": 0.1826, "step": 15580 }, { "epoch": 2.2000847218299917, "grad_norm": 3.2706589267061683, "learning_rate": 3.5029696351240038e-06, "loss": 0.1807, "step": 15581 }, { "epoch": 2.2002259248799776, "grad_norm": 2.742753398924691, "learning_rate": 3.5018108396086945e-06, "loss": 0.174, "step": 15582 }, { "epoch": 2.2003671279299635, "grad_norm": 2.953193861342629, "learning_rate": 3.5006521951084873e-06, "loss": 0.1661, "step": 15583 }, { "epoch": 2.2005083309799494, "grad_norm": 2.526300017828238, "learning_rate": 3.4994937016503206e-06, "loss": 0.1097, "step": 15584 }, { "epoch": 2.2006495340299352, "grad_norm": 4.370387192762125, "learning_rate": 3.4983353592611124e-06, "loss": 0.2159, "step": 15585 }, { "epoch": 2.200790737079921, "grad_norm": 2.9884731553413197, "learning_rate": 3.4971771679677826e-06, "loss": 0.1593, "step": 15586 }, { "epoch": 2.200931940129907, "grad_norm": 3.4606977705397877, "learning_rate": 3.4960191277972466e-06, "loss": 0.1884, "step": 15587 }, { "epoch": 2.201073143179893, "grad_norm": 2.9784144373373094, "learning_rate": 3.494861238776418e-06, "loss": 0.1246, "step": 15588 }, { "epoch": 2.2012143462298788, "grad_norm": 3.3163326437687464, "learning_rate": 3.493703500932205e-06, "loss": 0.1451, "step": 15589 }, { "epoch": 2.2013555492798647, "grad_norm": 2.5299314026716004, "learning_rate": 3.492545914291512e-06, "loss": 0.1484, "step": 15590 }, { "epoch": 2.2014967523298505, "grad_norm": 3.720526483263536, "learning_rate": 3.4913884788812426e-06, "loss": 0.2066, "step": 15591 }, { "epoch": 2.2016379553798364, "grad_norm": 2.8546504844420375, "learning_rate": 3.490231194728293e-06, "loss": 0.1573, "step": 15592 }, { "epoch": 2.2017791584298223, "grad_norm": 3.145963293252708, "learning_rate": 3.4890740618595597e-06, "loss": 0.1495, "step": 15593 }, { "epoch": 2.201920361479808, "grad_norm": 2.6492356628309106, "learning_rate": 3.4879170803019336e-06, "loss": 0.1589, "step": 15594 }, { "epoch": 2.202061564529794, "grad_norm": 3.4177223058894612, "learning_rate": 3.4867602500822984e-06, "loss": 0.1905, "step": 15595 }, { "epoch": 2.20220276757978, "grad_norm": 3.2071947879860376, "learning_rate": 3.485603571227545e-06, "loss": 0.146, "step": 15596 }, { "epoch": 2.202343970629766, "grad_norm": 2.604289033682239, "learning_rate": 3.4844470437645515e-06, "loss": 0.1496, "step": 15597 }, { "epoch": 2.2024851736797517, "grad_norm": 3.3015462211953333, "learning_rate": 3.483290667720196e-06, "loss": 0.1804, "step": 15598 }, { "epoch": 2.202626376729737, "grad_norm": 3.141592440690824, "learning_rate": 3.4821344431213533e-06, "loss": 0.1309, "step": 15599 }, { "epoch": 2.202767579779723, "grad_norm": 3.2791581681809157, "learning_rate": 3.480978369994885e-06, "loss": 0.186, "step": 15600 }, { "epoch": 2.202908782829709, "grad_norm": 3.794432429743621, "learning_rate": 3.4798224483676667e-06, "loss": 0.2188, "step": 15601 }, { "epoch": 2.203049985879695, "grad_norm": 2.818365810982154, "learning_rate": 3.478666678266559e-06, "loss": 0.158, "step": 15602 }, { "epoch": 2.2031911889296807, "grad_norm": 2.877744586637356, "learning_rate": 3.4775110597184226e-06, "loss": 0.1725, "step": 15603 }, { "epoch": 2.2033323919796666, "grad_norm": 3.206000978097044, "learning_rate": 3.4763555927501113e-06, "loss": 0.153, "step": 15604 }, { "epoch": 2.2034735950296525, "grad_norm": 3.1125095190367604, "learning_rate": 3.4752002773884784e-06, "loss": 0.1737, "step": 15605 }, { "epoch": 2.2036147980796383, "grad_norm": 3.0557712796713203, "learning_rate": 3.4740451136603737e-06, "loss": 0.1476, "step": 15606 }, { "epoch": 2.203756001129624, "grad_norm": 3.5228812255998383, "learning_rate": 3.472890101592642e-06, "loss": 0.1831, "step": 15607 }, { "epoch": 2.20389720417961, "grad_norm": 2.4463382648254317, "learning_rate": 3.4717352412121254e-06, "loss": 0.1241, "step": 15608 }, { "epoch": 2.204038407229596, "grad_norm": 2.3047849335814425, "learning_rate": 3.4705805325456632e-06, "loss": 0.1304, "step": 15609 }, { "epoch": 2.204179610279582, "grad_norm": 2.7671663549180487, "learning_rate": 3.4694259756200888e-06, "loss": 0.1214, "step": 15610 }, { "epoch": 2.2043208133295678, "grad_norm": 3.154662128019658, "learning_rate": 3.468271570462235e-06, "loss": 0.1813, "step": 15611 }, { "epoch": 2.2044620163795536, "grad_norm": 3.921755315844086, "learning_rate": 3.467117317098925e-06, "loss": 0.1957, "step": 15612 }, { "epoch": 2.2046032194295395, "grad_norm": 3.4731848085552617, "learning_rate": 3.465963215556991e-06, "loss": 0.13, "step": 15613 }, { "epoch": 2.2047444224795254, "grad_norm": 3.136391809862563, "learning_rate": 3.4648092658632506e-06, "loss": 0.1492, "step": 15614 }, { "epoch": 2.2048856255295113, "grad_norm": 3.7541042780481337, "learning_rate": 3.463655468044519e-06, "loss": 0.178, "step": 15615 }, { "epoch": 2.205026828579497, "grad_norm": 2.9272664229772407, "learning_rate": 3.4625018221276165e-06, "loss": 0.1399, "step": 15616 }, { "epoch": 2.205168031629483, "grad_norm": 2.870549763141004, "learning_rate": 3.46134832813934e-06, "loss": 0.1595, "step": 15617 }, { "epoch": 2.205309234679469, "grad_norm": 2.544576975705254, "learning_rate": 3.4601949861065086e-06, "loss": 0.1311, "step": 15618 }, { "epoch": 2.205450437729455, "grad_norm": 3.069497534194778, "learning_rate": 3.459041796055922e-06, "loss": 0.1586, "step": 15619 }, { "epoch": 2.2055916407794407, "grad_norm": 3.1570581807545515, "learning_rate": 3.4578887580143793e-06, "loss": 0.1427, "step": 15620 }, { "epoch": 2.2057328438294266, "grad_norm": 3.286948367213691, "learning_rate": 3.4567358720086753e-06, "loss": 0.1467, "step": 15621 }, { "epoch": 2.2058740468794125, "grad_norm": 2.9849117498271327, "learning_rate": 3.4555831380656044e-06, "loss": 0.1221, "step": 15622 }, { "epoch": 2.2060152499293983, "grad_norm": 3.879846768865645, "learning_rate": 3.4544305562119562e-06, "loss": 0.1821, "step": 15623 }, { "epoch": 2.2061564529793842, "grad_norm": 3.679068931812458, "learning_rate": 3.453278126474513e-06, "loss": 0.1598, "step": 15624 }, { "epoch": 2.20629765602937, "grad_norm": 3.1669463318673126, "learning_rate": 3.45212584888006e-06, "loss": 0.1704, "step": 15625 }, { "epoch": 2.206438859079356, "grad_norm": 3.573749137795458, "learning_rate": 3.4509737234553752e-06, "loss": 0.2042, "step": 15626 }, { "epoch": 2.206580062129342, "grad_norm": 4.138196376238599, "learning_rate": 3.4498217502272313e-06, "loss": 0.2017, "step": 15627 }, { "epoch": 2.2067212651793278, "grad_norm": 3.1905621942805835, "learning_rate": 3.4486699292224022e-06, "loss": 0.1498, "step": 15628 }, { "epoch": 2.2068624682293136, "grad_norm": 3.091015484893001, "learning_rate": 3.4475182604676505e-06, "loss": 0.1624, "step": 15629 }, { "epoch": 2.2070036712792995, "grad_norm": 3.788176280245027, "learning_rate": 3.4463667439897486e-06, "loss": 0.1888, "step": 15630 }, { "epoch": 2.2071448743292854, "grad_norm": 4.032984696661167, "learning_rate": 3.445215379815453e-06, "loss": 0.2018, "step": 15631 }, { "epoch": 2.2072860773792713, "grad_norm": 3.1215908446261396, "learning_rate": 3.4440641679715204e-06, "loss": 0.1527, "step": 15632 }, { "epoch": 2.207427280429257, "grad_norm": 3.2332655314644745, "learning_rate": 3.44291310848471e-06, "loss": 0.1586, "step": 15633 }, { "epoch": 2.207568483479243, "grad_norm": 3.312279772390816, "learning_rate": 3.4417622013817597e-06, "loss": 0.1738, "step": 15634 }, { "epoch": 2.207709686529229, "grad_norm": 2.832093298271664, "learning_rate": 3.4406114466894267e-06, "loss": 0.1421, "step": 15635 }, { "epoch": 2.207850889579215, "grad_norm": 3.9741969377328177, "learning_rate": 3.4394608444344514e-06, "loss": 0.1664, "step": 15636 }, { "epoch": 2.2079920926292007, "grad_norm": 3.07307251897038, "learning_rate": 3.4383103946435727e-06, "loss": 0.1267, "step": 15637 }, { "epoch": 2.2081332956791866, "grad_norm": 3.050982191181699, "learning_rate": 3.437160097343526e-06, "loss": 0.1488, "step": 15638 }, { "epoch": 2.2082744987291725, "grad_norm": 2.5997733705230006, "learning_rate": 3.4360099525610457e-06, "loss": 0.1345, "step": 15639 }, { "epoch": 2.2084157017791584, "grad_norm": 3.001367584192087, "learning_rate": 3.4348599603228584e-06, "loss": 0.1721, "step": 15640 }, { "epoch": 2.2085569048291442, "grad_norm": 3.3351313907572355, "learning_rate": 3.43371012065569e-06, "loss": 0.1912, "step": 15641 }, { "epoch": 2.20869810787913, "grad_norm": 3.029817811029279, "learning_rate": 3.432560433586264e-06, "loss": 0.1428, "step": 15642 }, { "epoch": 2.208839310929116, "grad_norm": 3.6562105950354757, "learning_rate": 3.4314108991412962e-06, "loss": 0.1633, "step": 15643 }, { "epoch": 2.208980513979102, "grad_norm": 3.092036722756669, "learning_rate": 3.4302615173475017e-06, "loss": 0.1451, "step": 15644 }, { "epoch": 2.2091217170290878, "grad_norm": 3.9595077309896407, "learning_rate": 3.429112288231594e-06, "loss": 0.2064, "step": 15645 }, { "epoch": 2.2092629200790737, "grad_norm": 4.028040810081268, "learning_rate": 3.4279632118202744e-06, "loss": 0.1862, "step": 15646 }, { "epoch": 2.2094041231290595, "grad_norm": 3.350279285656922, "learning_rate": 3.4268142881402556e-06, "loss": 0.1603, "step": 15647 }, { "epoch": 2.2095453261790454, "grad_norm": 2.9739180107893053, "learning_rate": 3.4256655172182328e-06, "loss": 0.162, "step": 15648 }, { "epoch": 2.2096865292290313, "grad_norm": 3.262406257037821, "learning_rate": 3.424516899080905e-06, "loss": 0.195, "step": 15649 }, { "epoch": 2.209827732279017, "grad_norm": 3.388567344602636, "learning_rate": 3.423368433754969e-06, "loss": 0.1289, "step": 15650 }, { "epoch": 2.209968935329003, "grad_norm": 3.0903830180643848, "learning_rate": 3.4222201212671026e-06, "loss": 0.152, "step": 15651 }, { "epoch": 2.210110138378989, "grad_norm": 2.858756922274672, "learning_rate": 3.4210719616440025e-06, "loss": 0.1717, "step": 15652 }, { "epoch": 2.210251341428975, "grad_norm": 3.6556604304406863, "learning_rate": 3.4199239549123497e-06, "loss": 0.1778, "step": 15653 }, { "epoch": 2.2103925444789607, "grad_norm": 3.724922685326456, "learning_rate": 3.418776101098823e-06, "loss": 0.181, "step": 15654 }, { "epoch": 2.2105337475289466, "grad_norm": 3.537408361320555, "learning_rate": 3.4176284002300963e-06, "loss": 0.1876, "step": 15655 }, { "epoch": 2.2106749505789325, "grad_norm": 3.3643299438794307, "learning_rate": 3.416480852332844e-06, "loss": 0.1657, "step": 15656 }, { "epoch": 2.2108161536289184, "grad_norm": 2.5821043413242806, "learning_rate": 3.415333457433733e-06, "loss": 0.1603, "step": 15657 }, { "epoch": 2.2109573566789043, "grad_norm": 2.656925866407468, "learning_rate": 3.4141862155594283e-06, "loss": 0.1511, "step": 15658 }, { "epoch": 2.21109855972889, "grad_norm": 3.268806224645829, "learning_rate": 3.4130391267365926e-06, "loss": 0.1627, "step": 15659 }, { "epoch": 2.211239762778876, "grad_norm": 3.2957430994450516, "learning_rate": 3.411892190991882e-06, "loss": 0.1641, "step": 15660 }, { "epoch": 2.211380965828862, "grad_norm": 2.800099091748301, "learning_rate": 3.410745408351952e-06, "loss": 0.1089, "step": 15661 }, { "epoch": 2.211522168878848, "grad_norm": 2.611094992840617, "learning_rate": 3.4095987788434538e-06, "loss": 0.1506, "step": 15662 }, { "epoch": 2.2116633719288337, "grad_norm": 3.6397298474767545, "learning_rate": 3.4084523024930295e-06, "loss": 0.1734, "step": 15663 }, { "epoch": 2.2118045749788195, "grad_norm": 3.214676487361643, "learning_rate": 3.407305979327331e-06, "loss": 0.1821, "step": 15664 }, { "epoch": 2.2119457780288054, "grad_norm": 3.45248651732651, "learning_rate": 3.4061598093729942e-06, "loss": 0.1696, "step": 15665 }, { "epoch": 2.2120869810787913, "grad_norm": 3.562286855656202, "learning_rate": 3.405013792656655e-06, "loss": 0.2142, "step": 15666 }, { "epoch": 2.212228184128777, "grad_norm": 2.952491991553645, "learning_rate": 3.4038679292049516e-06, "loss": 0.1368, "step": 15667 }, { "epoch": 2.212369387178763, "grad_norm": 2.8960036063708365, "learning_rate": 3.402722219044505e-06, "loss": 0.1605, "step": 15668 }, { "epoch": 2.212510590228749, "grad_norm": 2.7518029862601128, "learning_rate": 3.401576662201942e-06, "loss": 0.1374, "step": 15669 }, { "epoch": 2.212651793278735, "grad_norm": 3.542001261614874, "learning_rate": 3.40043125870389e-06, "loss": 0.1816, "step": 15670 }, { "epoch": 2.2127929963287207, "grad_norm": 3.278195091539961, "learning_rate": 3.3992860085769665e-06, "loss": 0.1882, "step": 15671 }, { "epoch": 2.2129341993787066, "grad_norm": 3.670846777473275, "learning_rate": 3.3981409118477847e-06, "loss": 0.1549, "step": 15672 }, { "epoch": 2.2130754024286925, "grad_norm": 3.041137694025137, "learning_rate": 3.3969959685429575e-06, "loss": 0.1461, "step": 15673 }, { "epoch": 2.2132166054786784, "grad_norm": 2.596643663606384, "learning_rate": 3.3958511786890923e-06, "loss": 0.1394, "step": 15674 }, { "epoch": 2.2133578085286643, "grad_norm": 3.5255164401121224, "learning_rate": 3.3947065423127933e-06, "loss": 0.1966, "step": 15675 }, { "epoch": 2.21349901157865, "grad_norm": 2.8280480584871106, "learning_rate": 3.3935620594406614e-06, "loss": 0.1318, "step": 15676 }, { "epoch": 2.213640214628636, "grad_norm": 3.4198892023183824, "learning_rate": 3.3924177300992956e-06, "loss": 0.1669, "step": 15677 }, { "epoch": 2.213781417678622, "grad_norm": 3.9889423897436664, "learning_rate": 3.3912735543152864e-06, "loss": 0.2316, "step": 15678 }, { "epoch": 2.213922620728608, "grad_norm": 3.2195569890920184, "learning_rate": 3.3901295321152273e-06, "loss": 0.1557, "step": 15679 }, { "epoch": 2.2140638237785937, "grad_norm": 3.7673095821594615, "learning_rate": 3.3889856635257024e-06, "loss": 0.2016, "step": 15680 }, { "epoch": 2.2142050268285796, "grad_norm": 3.095199991575782, "learning_rate": 3.3878419485732915e-06, "loss": 0.1684, "step": 15681 }, { "epoch": 2.2143462298785654, "grad_norm": 2.6545902731109505, "learning_rate": 3.3866983872845826e-06, "loss": 0.1361, "step": 15682 }, { "epoch": 2.2144874329285513, "grad_norm": 2.308066488363653, "learning_rate": 3.3855549796861476e-06, "loss": 0.1236, "step": 15683 }, { "epoch": 2.214628635978537, "grad_norm": 3.6116324993092492, "learning_rate": 3.3844117258045604e-06, "loss": 0.1873, "step": 15684 }, { "epoch": 2.214769839028523, "grad_norm": 3.0545957303844244, "learning_rate": 3.383268625666385e-06, "loss": 0.1434, "step": 15685 }, { "epoch": 2.214911042078509, "grad_norm": 2.9313223858955197, "learning_rate": 3.382125679298185e-06, "loss": 0.128, "step": 15686 }, { "epoch": 2.215052245128495, "grad_norm": 3.3385319862182197, "learning_rate": 3.3809828867265305e-06, "loss": 0.1878, "step": 15687 }, { "epoch": 2.2151934481784807, "grad_norm": 3.245513740761717, "learning_rate": 3.3798402479779747e-06, "loss": 0.1969, "step": 15688 }, { "epoch": 2.2153346512284666, "grad_norm": 2.8316793996429515, "learning_rate": 3.378697763079073e-06, "loss": 0.1149, "step": 15689 }, { "epoch": 2.2154758542784525, "grad_norm": 3.1631492443029723, "learning_rate": 3.3775554320563753e-06, "loss": 0.1541, "step": 15690 }, { "epoch": 2.2156170573284384, "grad_norm": 3.693464537336146, "learning_rate": 3.3764132549364292e-06, "loss": 0.1553, "step": 15691 }, { "epoch": 2.2157582603784243, "grad_norm": 4.036733371291441, "learning_rate": 3.375271231745779e-06, "loss": 0.2444, "step": 15692 }, { "epoch": 2.21589946342841, "grad_norm": 3.6903047302979597, "learning_rate": 3.3741293625109637e-06, "loss": 0.1982, "step": 15693 }, { "epoch": 2.216040666478396, "grad_norm": 3.1248599482853967, "learning_rate": 3.372987647258521e-06, "loss": 0.1411, "step": 15694 }, { "epoch": 2.216181869528382, "grad_norm": 3.3021161750669927, "learning_rate": 3.3718460860149837e-06, "loss": 0.1436, "step": 15695 }, { "epoch": 2.216323072578368, "grad_norm": 2.9327442605610896, "learning_rate": 3.370704678806881e-06, "loss": 0.1363, "step": 15696 }, { "epoch": 2.2164642756283537, "grad_norm": 3.2576245442388636, "learning_rate": 3.369563425660738e-06, "loss": 0.1326, "step": 15697 }, { "epoch": 2.2166054786783396, "grad_norm": 2.608601636073404, "learning_rate": 3.368422326603075e-06, "loss": 0.1618, "step": 15698 }, { "epoch": 2.2167466817283255, "grad_norm": 2.9267272403692055, "learning_rate": 3.3672813816604155e-06, "loss": 0.1838, "step": 15699 }, { "epoch": 2.2168878847783113, "grad_norm": 2.6470409405898794, "learning_rate": 3.366140590859276e-06, "loss": 0.1233, "step": 15700 }, { "epoch": 2.217029087828297, "grad_norm": 3.188742773804492, "learning_rate": 3.3649999542261604e-06, "loss": 0.1515, "step": 15701 }, { "epoch": 2.217170290878283, "grad_norm": 2.4595490228561974, "learning_rate": 3.3638594717875807e-06, "loss": 0.1654, "step": 15702 }, { "epoch": 2.217311493928269, "grad_norm": 3.3076970065592373, "learning_rate": 3.362719143570038e-06, "loss": 0.1737, "step": 15703 }, { "epoch": 2.217452696978255, "grad_norm": 3.338391469243303, "learning_rate": 3.361578969600039e-06, "loss": 0.1788, "step": 15704 }, { "epoch": 2.2175939000282407, "grad_norm": 2.8281440340202875, "learning_rate": 3.360438949904078e-06, "loss": 0.1507, "step": 15705 }, { "epoch": 2.2177351030782266, "grad_norm": 4.294600393660219, "learning_rate": 3.359299084508648e-06, "loss": 0.2204, "step": 15706 }, { "epoch": 2.2178763061282125, "grad_norm": 3.4297215318704386, "learning_rate": 3.3581593734402397e-06, "loss": 0.1463, "step": 15707 }, { "epoch": 2.2180175091781984, "grad_norm": 2.6719295412169664, "learning_rate": 3.3570198167253386e-06, "loss": 0.1609, "step": 15708 }, { "epoch": 2.2181587122281843, "grad_norm": 2.660703693790063, "learning_rate": 3.355880414390429e-06, "loss": 0.1419, "step": 15709 }, { "epoch": 2.21829991527817, "grad_norm": 3.646976458465951, "learning_rate": 3.354741166461989e-06, "loss": 0.1764, "step": 15710 }, { "epoch": 2.218441118328156, "grad_norm": 3.9364138475962283, "learning_rate": 3.353602072966494e-06, "loss": 0.2237, "step": 15711 }, { "epoch": 2.218582321378142, "grad_norm": 3.4177395692243637, "learning_rate": 3.352463133930417e-06, "loss": 0.1595, "step": 15712 }, { "epoch": 2.218723524428128, "grad_norm": 2.3873253761224067, "learning_rate": 3.351324349380226e-06, "loss": 0.1001, "step": 15713 }, { "epoch": 2.2188647274781137, "grad_norm": 2.9148826388815126, "learning_rate": 3.350185719342385e-06, "loss": 0.1376, "step": 15714 }, { "epoch": 2.2190059305280996, "grad_norm": 2.405205692890868, "learning_rate": 3.349047243843353e-06, "loss": 0.1486, "step": 15715 }, { "epoch": 2.2191471335780855, "grad_norm": 2.616417958798766, "learning_rate": 3.347908922909594e-06, "loss": 0.1229, "step": 15716 }, { "epoch": 2.2192883366280713, "grad_norm": 2.885408835520429, "learning_rate": 3.346770756567562e-06, "loss": 0.1106, "step": 15717 }, { "epoch": 2.2194295396780572, "grad_norm": 3.181057840772467, "learning_rate": 3.345632744843702e-06, "loss": 0.1203, "step": 15718 }, { "epoch": 2.219570742728043, "grad_norm": 3.2419250683903758, "learning_rate": 3.344494887764462e-06, "loss": 0.1388, "step": 15719 }, { "epoch": 2.219711945778029, "grad_norm": 3.821736947692024, "learning_rate": 3.343357185356284e-06, "loss": 0.1585, "step": 15720 }, { "epoch": 2.219853148828015, "grad_norm": 3.3239355777948334, "learning_rate": 3.342219637645614e-06, "loss": 0.1676, "step": 15721 }, { "epoch": 2.2199943518780008, "grad_norm": 3.6430035692325418, "learning_rate": 3.3410822446588833e-06, "loss": 0.2172, "step": 15722 }, { "epoch": 2.2201355549279866, "grad_norm": 3.403442294379391, "learning_rate": 3.339945006422526e-06, "loss": 0.1393, "step": 15723 }, { "epoch": 2.2202767579779725, "grad_norm": 2.913081381127837, "learning_rate": 3.33880792296297e-06, "loss": 0.1571, "step": 15724 }, { "epoch": 2.2204179610279584, "grad_norm": 3.49595504738506, "learning_rate": 3.337670994306641e-06, "loss": 0.1607, "step": 15725 }, { "epoch": 2.220559164077944, "grad_norm": 2.8436860530913535, "learning_rate": 3.3365342204799613e-06, "loss": 0.1621, "step": 15726 }, { "epoch": 2.2207003671279297, "grad_norm": 3.4043610527765504, "learning_rate": 3.3353976015093492e-06, "loss": 0.1527, "step": 15727 }, { "epoch": 2.2208415701779156, "grad_norm": 3.2610050998026456, "learning_rate": 3.3342611374212176e-06, "loss": 0.1661, "step": 15728 }, { "epoch": 2.2209827732279015, "grad_norm": 3.401444687883845, "learning_rate": 3.333124828241978e-06, "loss": 0.1811, "step": 15729 }, { "epoch": 2.2211239762778874, "grad_norm": 3.466748811619931, "learning_rate": 3.33198867399804e-06, "loss": 0.1527, "step": 15730 }, { "epoch": 2.2212651793278733, "grad_norm": 2.641741944783832, "learning_rate": 3.3308526747158045e-06, "loss": 0.1417, "step": 15731 }, { "epoch": 2.221406382377859, "grad_norm": 3.371473758765488, "learning_rate": 3.3297168304216686e-06, "loss": 0.1445, "step": 15732 }, { "epoch": 2.221547585427845, "grad_norm": 2.6942970632325043, "learning_rate": 3.3285811411420376e-06, "loss": 0.1019, "step": 15733 }, { "epoch": 2.221688788477831, "grad_norm": 3.3082701614976147, "learning_rate": 3.3274456069033024e-06, "loss": 0.1399, "step": 15734 }, { "epoch": 2.221829991527817, "grad_norm": 4.015160064827532, "learning_rate": 3.326310227731846e-06, "loss": 0.2104, "step": 15735 }, { "epoch": 2.2219711945778027, "grad_norm": 3.613621652353988, "learning_rate": 3.3251750036540585e-06, "loss": 0.1947, "step": 15736 }, { "epoch": 2.2221123976277886, "grad_norm": 3.3326432832376613, "learning_rate": 3.324039934696317e-06, "loss": 0.1408, "step": 15737 }, { "epoch": 2.2222536006777744, "grad_norm": 2.876790017961229, "learning_rate": 3.322905020885009e-06, "loss": 0.136, "step": 15738 }, { "epoch": 2.2223948037277603, "grad_norm": 3.518112132181415, "learning_rate": 3.321770262246503e-06, "loss": 0.2031, "step": 15739 }, { "epoch": 2.222536006777746, "grad_norm": 2.7449825368553658, "learning_rate": 3.3206356588071733e-06, "loss": 0.1441, "step": 15740 }, { "epoch": 2.222677209827732, "grad_norm": 3.243134391336527, "learning_rate": 3.3195012105933856e-06, "loss": 0.1371, "step": 15741 }, { "epoch": 2.222818412877718, "grad_norm": 3.973782071016101, "learning_rate": 3.3183669176315046e-06, "loss": 0.2039, "step": 15742 }, { "epoch": 2.222959615927704, "grad_norm": 2.801072500632294, "learning_rate": 3.317232779947891e-06, "loss": 0.1478, "step": 15743 }, { "epoch": 2.2231008189776897, "grad_norm": 3.1966330183294853, "learning_rate": 3.3160987975689017e-06, "loss": 0.1631, "step": 15744 }, { "epoch": 2.2232420220276756, "grad_norm": 3.53213152390162, "learning_rate": 3.3149649705208908e-06, "loss": 0.1783, "step": 15745 }, { "epoch": 2.2233832250776615, "grad_norm": 3.245032997211984, "learning_rate": 3.3138312988302055e-06, "loss": 0.1614, "step": 15746 }, { "epoch": 2.2235244281276474, "grad_norm": 2.481262988513125, "learning_rate": 3.312697782523193e-06, "loss": 0.1006, "step": 15747 }, { "epoch": 2.2236656311776333, "grad_norm": 3.0742634060552145, "learning_rate": 3.3115644216261977e-06, "loss": 0.1925, "step": 15748 }, { "epoch": 2.223806834227619, "grad_norm": 2.656242117122283, "learning_rate": 3.3104312161655516e-06, "loss": 0.1176, "step": 15749 }, { "epoch": 2.223948037277605, "grad_norm": 3.1514652220339525, "learning_rate": 3.309298166167599e-06, "loss": 0.136, "step": 15750 }, { "epoch": 2.224089240327591, "grad_norm": 2.934619275953598, "learning_rate": 3.308165271658672e-06, "loss": 0.1342, "step": 15751 }, { "epoch": 2.224230443377577, "grad_norm": 3.9173100613234175, "learning_rate": 3.30703253266509e-06, "loss": 0.1944, "step": 15752 }, { "epoch": 2.2243716464275627, "grad_norm": 3.423566457899086, "learning_rate": 3.3058999492131805e-06, "loss": 0.1702, "step": 15753 }, { "epoch": 2.2245128494775486, "grad_norm": 2.827232034685807, "learning_rate": 3.304767521329263e-06, "loss": 0.1436, "step": 15754 }, { "epoch": 2.2246540525275345, "grad_norm": 3.3143109130086934, "learning_rate": 3.3036352490396596e-06, "loss": 0.1806, "step": 15755 }, { "epoch": 2.2247952555775203, "grad_norm": 3.9504949170327253, "learning_rate": 3.302503132370681e-06, "loss": 0.1892, "step": 15756 }, { "epoch": 2.224936458627506, "grad_norm": 3.2278424428399073, "learning_rate": 3.3013711713486383e-06, "loss": 0.165, "step": 15757 }, { "epoch": 2.225077661677492, "grad_norm": 4.162476806025594, "learning_rate": 3.3002393659998357e-06, "loss": 0.1947, "step": 15758 }, { "epoch": 2.225218864727478, "grad_norm": 2.5375440743812026, "learning_rate": 3.299107716350577e-06, "loss": 0.112, "step": 15759 }, { "epoch": 2.225360067777464, "grad_norm": 3.8191121919592335, "learning_rate": 3.2979762224271616e-06, "loss": 0.1647, "step": 15760 }, { "epoch": 2.2255012708274498, "grad_norm": 3.639624214346567, "learning_rate": 3.2968448842558833e-06, "loss": 0.1761, "step": 15761 }, { "epoch": 2.2256424738774356, "grad_norm": 2.5112926778753173, "learning_rate": 3.295713701863036e-06, "loss": 0.1379, "step": 15762 }, { "epoch": 2.2257836769274215, "grad_norm": 3.1773061138532532, "learning_rate": 3.2945826752749068e-06, "loss": 0.1446, "step": 15763 }, { "epoch": 2.2259248799774074, "grad_norm": 3.282223576742086, "learning_rate": 3.2934518045177798e-06, "loss": 0.1809, "step": 15764 }, { "epoch": 2.2260660830273933, "grad_norm": 3.1246810208531692, "learning_rate": 3.2923210896179368e-06, "loss": 0.1915, "step": 15765 }, { "epoch": 2.226207286077379, "grad_norm": 2.6996383947306413, "learning_rate": 3.291190530601651e-06, "loss": 0.1199, "step": 15766 }, { "epoch": 2.226348489127365, "grad_norm": 2.560053471344069, "learning_rate": 3.2900601274952038e-06, "loss": 0.1192, "step": 15767 }, { "epoch": 2.226489692177351, "grad_norm": 2.848529329842376, "learning_rate": 3.288929880324865e-06, "loss": 0.1539, "step": 15768 }, { "epoch": 2.226630895227337, "grad_norm": 3.370693737409179, "learning_rate": 3.2877997891168944e-06, "loss": 0.1623, "step": 15769 }, { "epoch": 2.2267720982773227, "grad_norm": 3.2796455126552164, "learning_rate": 3.2866698538975572e-06, "loss": 0.157, "step": 15770 }, { "epoch": 2.2269133013273086, "grad_norm": 3.553660648103173, "learning_rate": 3.285540074693113e-06, "loss": 0.2096, "step": 15771 }, { "epoch": 2.2270545043772945, "grad_norm": 3.614546115684095, "learning_rate": 3.284410451529816e-06, "loss": 0.1524, "step": 15772 }, { "epoch": 2.2271957074272803, "grad_norm": 3.574656513702258, "learning_rate": 3.283280984433922e-06, "loss": 0.1674, "step": 15773 }, { "epoch": 2.2273369104772662, "grad_norm": 3.1688333827297575, "learning_rate": 3.2821516734316772e-06, "loss": 0.1192, "step": 15774 }, { "epoch": 2.227478113527252, "grad_norm": 2.9402583307325436, "learning_rate": 3.281022518549326e-06, "loss": 0.1741, "step": 15775 }, { "epoch": 2.227619316577238, "grad_norm": 2.6930249940212367, "learning_rate": 3.279893519813111e-06, "loss": 0.1265, "step": 15776 }, { "epoch": 2.227760519627224, "grad_norm": 2.531940110547849, "learning_rate": 3.2787646772492678e-06, "loss": 0.1459, "step": 15777 }, { "epoch": 2.2279017226772098, "grad_norm": 2.9759601258806985, "learning_rate": 3.2776359908840306e-06, "loss": 0.1577, "step": 15778 }, { "epoch": 2.2280429257271956, "grad_norm": 2.446051303653981, "learning_rate": 3.27650746074363e-06, "loss": 0.1298, "step": 15779 }, { "epoch": 2.2281841287771815, "grad_norm": 2.9814563690726206, "learning_rate": 3.275379086854292e-06, "loss": 0.1364, "step": 15780 }, { "epoch": 2.2283253318271674, "grad_norm": 4.507686672899418, "learning_rate": 3.2742508692422405e-06, "loss": 0.2231, "step": 15781 }, { "epoch": 2.2284665348771533, "grad_norm": 3.2229993667142156, "learning_rate": 3.2731228079336928e-06, "loss": 0.1572, "step": 15782 }, { "epoch": 2.228607737927139, "grad_norm": 3.874126947162224, "learning_rate": 3.271994902954867e-06, "loss": 0.1598, "step": 15783 }, { "epoch": 2.228748940977125, "grad_norm": 3.1945813095760403, "learning_rate": 3.2708671543319712e-06, "loss": 0.1678, "step": 15784 }, { "epoch": 2.228890144027111, "grad_norm": 2.9808193684312334, "learning_rate": 3.2697395620912233e-06, "loss": 0.1405, "step": 15785 }, { "epoch": 2.229031347077097, "grad_norm": 3.4930336564464652, "learning_rate": 3.2686121262588165e-06, "loss": 0.1851, "step": 15786 }, { "epoch": 2.2291725501270827, "grad_norm": 2.576363944572552, "learning_rate": 3.267484846860959e-06, "loss": 0.1348, "step": 15787 }, { "epoch": 2.2293137531770686, "grad_norm": 3.299302836018291, "learning_rate": 3.2663577239238443e-06, "loss": 0.1475, "step": 15788 }, { "epoch": 2.2294549562270545, "grad_norm": 3.802476557226168, "learning_rate": 3.2652307574736652e-06, "loss": 0.1494, "step": 15789 }, { "epoch": 2.2295961592770404, "grad_norm": 3.0891643024726365, "learning_rate": 3.264103947536619e-06, "loss": 0.162, "step": 15790 }, { "epoch": 2.2297373623270262, "grad_norm": 3.220751587552492, "learning_rate": 3.262977294138887e-06, "loss": 0.1764, "step": 15791 }, { "epoch": 2.229878565377012, "grad_norm": 3.600283631454464, "learning_rate": 3.2618507973066536e-06, "loss": 0.2034, "step": 15792 }, { "epoch": 2.230019768426998, "grad_norm": 3.832411048974628, "learning_rate": 3.2607244570660966e-06, "loss": 0.1606, "step": 15793 }, { "epoch": 2.230160971476984, "grad_norm": 3.452655645634177, "learning_rate": 3.259598273443394e-06, "loss": 0.1431, "step": 15794 }, { "epoch": 2.2303021745269698, "grad_norm": 3.3750096297459704, "learning_rate": 3.2584722464647167e-06, "loss": 0.1548, "step": 15795 }, { "epoch": 2.2304433775769557, "grad_norm": 3.876893663678782, "learning_rate": 3.257346376156233e-06, "loss": 0.2217, "step": 15796 }, { "epoch": 2.2305845806269415, "grad_norm": 3.516298596902472, "learning_rate": 3.2562206625441073e-06, "loss": 0.1586, "step": 15797 }, { "epoch": 2.2307257836769274, "grad_norm": 5.100071104683038, "learning_rate": 3.2550951056545e-06, "loss": 0.2107, "step": 15798 }, { "epoch": 2.2308669867269133, "grad_norm": 3.0635435297438267, "learning_rate": 3.253969705513571e-06, "loss": 0.1619, "step": 15799 }, { "epoch": 2.231008189776899, "grad_norm": 3.290102134201919, "learning_rate": 3.252844462147472e-06, "loss": 0.1503, "step": 15800 }, { "epoch": 2.231149392826885, "grad_norm": 3.2213203841735387, "learning_rate": 3.2517193755823506e-06, "loss": 0.1785, "step": 15801 }, { "epoch": 2.231290595876871, "grad_norm": 3.4559718305140263, "learning_rate": 3.2505944458443626e-06, "loss": 0.1921, "step": 15802 }, { "epoch": 2.231431798926857, "grad_norm": 3.043782857459016, "learning_rate": 3.249469672959642e-06, "loss": 0.1291, "step": 15803 }, { "epoch": 2.2315730019768427, "grad_norm": 3.476721674804354, "learning_rate": 3.248345056954332e-06, "loss": 0.1599, "step": 15804 }, { "epoch": 2.2317142050268286, "grad_norm": 3.541982370811625, "learning_rate": 3.2472205978545657e-06, "loss": 0.2124, "step": 15805 }, { "epoch": 2.2318554080768145, "grad_norm": 2.2358291536121007, "learning_rate": 3.2460962956864727e-06, "loss": 0.1296, "step": 15806 }, { "epoch": 2.2319966111268004, "grad_norm": 3.1897054814495296, "learning_rate": 3.24497215047619e-06, "loss": 0.1411, "step": 15807 }, { "epoch": 2.2321378141767863, "grad_norm": 3.792611424106291, "learning_rate": 3.243848162249835e-06, "loss": 0.1887, "step": 15808 }, { "epoch": 2.232279017226772, "grad_norm": 3.3794472879664452, "learning_rate": 3.2427243310335333e-06, "loss": 0.1672, "step": 15809 }, { "epoch": 2.232420220276758, "grad_norm": 3.806036255943824, "learning_rate": 3.2416006568533987e-06, "loss": 0.1769, "step": 15810 }, { "epoch": 2.232561423326744, "grad_norm": 2.6768019808929924, "learning_rate": 3.240477139735546e-06, "loss": 0.1371, "step": 15811 }, { "epoch": 2.23270262637673, "grad_norm": 2.689263077890952, "learning_rate": 3.239353779706086e-06, "loss": 0.1479, "step": 15812 }, { "epoch": 2.2328438294267157, "grad_norm": 3.0010048157067715, "learning_rate": 3.238230576791124e-06, "loss": 0.141, "step": 15813 }, { "epoch": 2.2329850324767015, "grad_norm": 3.008188105246033, "learning_rate": 3.2371075310167634e-06, "loss": 0.1622, "step": 15814 }, { "epoch": 2.2331262355266874, "grad_norm": 3.830703862528595, "learning_rate": 3.2359846424091025e-06, "loss": 0.1755, "step": 15815 }, { "epoch": 2.2332674385766733, "grad_norm": 2.394735671310006, "learning_rate": 3.234861910994238e-06, "loss": 0.1174, "step": 15816 }, { "epoch": 2.233408641626659, "grad_norm": 3.199872236203357, "learning_rate": 3.2337393367982604e-06, "loss": 0.1567, "step": 15817 }, { "epoch": 2.233549844676645, "grad_norm": 3.119621940053606, "learning_rate": 3.2326169198472555e-06, "loss": 0.136, "step": 15818 }, { "epoch": 2.233691047726631, "grad_norm": 3.541852655813336, "learning_rate": 3.2314946601673182e-06, "loss": 0.2019, "step": 15819 }, { "epoch": 2.233832250776617, "grad_norm": 2.822158488349907, "learning_rate": 3.230372557784518e-06, "loss": 0.1494, "step": 15820 }, { "epoch": 2.2339734538266027, "grad_norm": 3.0618241337314602, "learning_rate": 3.229250612724936e-06, "loss": 0.1732, "step": 15821 }, { "epoch": 2.2341146568765886, "grad_norm": 2.9978500780495603, "learning_rate": 3.2281288250146447e-06, "loss": 0.1627, "step": 15822 }, { "epoch": 2.2342558599265745, "grad_norm": 2.718509759523067, "learning_rate": 3.2270071946797133e-06, "loss": 0.1194, "step": 15823 }, { "epoch": 2.2343970629765604, "grad_norm": 2.903898698654555, "learning_rate": 3.2258857217462115e-06, "loss": 0.1357, "step": 15824 }, { "epoch": 2.2345382660265463, "grad_norm": 3.209175156048876, "learning_rate": 3.2247644062402006e-06, "loss": 0.1678, "step": 15825 }, { "epoch": 2.234679469076532, "grad_norm": 3.0851691796309133, "learning_rate": 3.22364324818774e-06, "loss": 0.1597, "step": 15826 }, { "epoch": 2.234820672126518, "grad_norm": 3.1755785771990475, "learning_rate": 3.222522247614882e-06, "loss": 0.1214, "step": 15827 }, { "epoch": 2.234961875176504, "grad_norm": 2.978499115420236, "learning_rate": 3.2214014045476815e-06, "loss": 0.1444, "step": 15828 }, { "epoch": 2.23510307822649, "grad_norm": 2.722058577816949, "learning_rate": 3.2202807190121845e-06, "loss": 0.1455, "step": 15829 }, { "epoch": 2.2352442812764757, "grad_norm": 2.9257897412642007, "learning_rate": 3.219160191034435e-06, "loss": 0.1611, "step": 15830 }, { "epoch": 2.2353854843264616, "grad_norm": 3.1998483477656205, "learning_rate": 3.218039820640475e-06, "loss": 0.1486, "step": 15831 }, { "epoch": 2.2355266873764474, "grad_norm": 2.6025966837140926, "learning_rate": 3.2169196078563403e-06, "loss": 0.1406, "step": 15832 }, { "epoch": 2.2356678904264333, "grad_norm": 2.8673524021998356, "learning_rate": 3.2157995527080643e-06, "loss": 0.1319, "step": 15833 }, { "epoch": 2.235809093476419, "grad_norm": 3.07294829306463, "learning_rate": 3.2146796552216773e-06, "loss": 0.1709, "step": 15834 }, { "epoch": 2.235950296526405, "grad_norm": 3.84976571884867, "learning_rate": 3.213559915423201e-06, "loss": 0.1822, "step": 15835 }, { "epoch": 2.236091499576391, "grad_norm": 2.662954528506825, "learning_rate": 3.212440333338668e-06, "loss": 0.1354, "step": 15836 }, { "epoch": 2.236232702626377, "grad_norm": 2.758285163170542, "learning_rate": 3.2113209089940877e-06, "loss": 0.1187, "step": 15837 }, { "epoch": 2.2363739056763627, "grad_norm": 3.097287271170392, "learning_rate": 3.210201642415477e-06, "loss": 0.1322, "step": 15838 }, { "epoch": 2.2365151087263486, "grad_norm": 3.8483336559767096, "learning_rate": 3.209082533628848e-06, "loss": 0.1818, "step": 15839 }, { "epoch": 2.2366563117763345, "grad_norm": 3.5069573421109856, "learning_rate": 3.2079635826602053e-06, "loss": 0.1837, "step": 15840 }, { "epoch": 2.2367975148263204, "grad_norm": 3.392051636355483, "learning_rate": 3.2068447895355583e-06, "loss": 0.1193, "step": 15841 }, { "epoch": 2.2369387178763063, "grad_norm": 3.1196356479350125, "learning_rate": 3.205726154280905e-06, "loss": 0.1255, "step": 15842 }, { "epoch": 2.237079920926292, "grad_norm": 3.3370370270753327, "learning_rate": 3.2046076769222424e-06, "loss": 0.1579, "step": 15843 }, { "epoch": 2.237221123976278, "grad_norm": 3.3159125755142957, "learning_rate": 3.203489357485562e-06, "loss": 0.1346, "step": 15844 }, { "epoch": 2.237362327026264, "grad_norm": 3.2045150794590387, "learning_rate": 3.2023711959968564e-06, "loss": 0.145, "step": 15845 }, { "epoch": 2.23750353007625, "grad_norm": 3.749461074359461, "learning_rate": 3.201253192482102e-06, "loss": 0.1758, "step": 15846 }, { "epoch": 2.2376447331262357, "grad_norm": 2.781934547777391, "learning_rate": 3.2001353469672915e-06, "loss": 0.1207, "step": 15847 }, { "epoch": 2.2377859361762216, "grad_norm": 3.1923845545123077, "learning_rate": 3.1990176594783983e-06, "loss": 0.1899, "step": 15848 }, { "epoch": 2.2379271392262075, "grad_norm": 3.2078944156858484, "learning_rate": 3.197900130041398e-06, "loss": 0.139, "step": 15849 }, { "epoch": 2.2380683422761933, "grad_norm": 4.0143361773549, "learning_rate": 3.196782758682261e-06, "loss": 0.1908, "step": 15850 }, { "epoch": 2.238209545326179, "grad_norm": 2.3972979350786545, "learning_rate": 3.1956655454269546e-06, "loss": 0.1161, "step": 15851 }, { "epoch": 2.238350748376165, "grad_norm": 2.8943112271287945, "learning_rate": 3.194548490301439e-06, "loss": 0.1606, "step": 15852 }, { "epoch": 2.238491951426151, "grad_norm": 3.334731102435365, "learning_rate": 3.193431593331684e-06, "loss": 0.17, "step": 15853 }, { "epoch": 2.238633154476137, "grad_norm": 2.7965698448827427, "learning_rate": 3.1923148545436357e-06, "loss": 0.13, "step": 15854 }, { "epoch": 2.2387743575261227, "grad_norm": 3.9895113855189606, "learning_rate": 3.1911982739632497e-06, "loss": 0.2183, "step": 15855 }, { "epoch": 2.2389155605761086, "grad_norm": 3.210887499823668, "learning_rate": 3.1900818516164766e-06, "loss": 0.192, "step": 15856 }, { "epoch": 2.2390567636260945, "grad_norm": 3.257334984010536, "learning_rate": 3.1889655875292593e-06, "loss": 0.1381, "step": 15857 }, { "epoch": 2.2391979666760804, "grad_norm": 2.9906469517929506, "learning_rate": 3.1878494817275374e-06, "loss": 0.11, "step": 15858 }, { "epoch": 2.2393391697260663, "grad_norm": 3.811559738745294, "learning_rate": 3.186733534237255e-06, "loss": 0.1829, "step": 15859 }, { "epoch": 2.239480372776052, "grad_norm": 3.8492309941615224, "learning_rate": 3.185617745084343e-06, "loss": 0.1815, "step": 15860 }, { "epoch": 2.239621575826038, "grad_norm": 3.461277293738979, "learning_rate": 3.184502114294734e-06, "loss": 0.127, "step": 15861 }, { "epoch": 2.239762778876024, "grad_norm": 3.1402572340250834, "learning_rate": 3.1833866418943503e-06, "loss": 0.1462, "step": 15862 }, { "epoch": 2.23990398192601, "grad_norm": 3.2871019226399145, "learning_rate": 3.182271327909113e-06, "loss": 0.1333, "step": 15863 }, { "epoch": 2.2400451849759957, "grad_norm": 3.4959349004449773, "learning_rate": 3.1811561723649496e-06, "loss": 0.1747, "step": 15864 }, { "epoch": 2.2401863880259816, "grad_norm": 4.445077663584838, "learning_rate": 3.1800411752877714e-06, "loss": 0.1761, "step": 15865 }, { "epoch": 2.2403275910759675, "grad_norm": 3.0816792748174855, "learning_rate": 3.1789263367034918e-06, "loss": 0.1652, "step": 15866 }, { "epoch": 2.2404687941259533, "grad_norm": 4.434447591311851, "learning_rate": 3.177811656638018e-06, "loss": 0.2141, "step": 15867 }, { "epoch": 2.2406099971759392, "grad_norm": 4.966880514867716, "learning_rate": 3.1766971351172547e-06, "loss": 0.1809, "step": 15868 }, { "epoch": 2.240751200225925, "grad_norm": 3.423051853674513, "learning_rate": 3.1755827721670996e-06, "loss": 0.1367, "step": 15869 }, { "epoch": 2.240892403275911, "grad_norm": 3.1112951251115755, "learning_rate": 3.174468567813461e-06, "loss": 0.1604, "step": 15870 }, { "epoch": 2.2410336063258964, "grad_norm": 3.233901127699001, "learning_rate": 3.1733545220822215e-06, "loss": 0.1897, "step": 15871 }, { "epoch": 2.2411748093758823, "grad_norm": 2.9739009789151507, "learning_rate": 3.172240634999275e-06, "loss": 0.1371, "step": 15872 }, { "epoch": 2.241316012425868, "grad_norm": 3.064408607564881, "learning_rate": 3.171126906590507e-06, "loss": 0.1674, "step": 15873 }, { "epoch": 2.241457215475854, "grad_norm": 3.3950684455346467, "learning_rate": 3.170013336881801e-06, "loss": 0.155, "step": 15874 }, { "epoch": 2.24159841852584, "grad_norm": 3.809174389804828, "learning_rate": 3.1688999258990318e-06, "loss": 0.164, "step": 15875 }, { "epoch": 2.241739621575826, "grad_norm": 2.74010217457674, "learning_rate": 3.167786673668082e-06, "loss": 0.1476, "step": 15876 }, { "epoch": 2.2418808246258117, "grad_norm": 3.215284281306833, "learning_rate": 3.1666735802148185e-06, "loss": 0.1566, "step": 15877 }, { "epoch": 2.2420220276757976, "grad_norm": 3.7626124595794437, "learning_rate": 3.1655606455651134e-06, "loss": 0.1963, "step": 15878 }, { "epoch": 2.2421632307257835, "grad_norm": 3.3043046952199657, "learning_rate": 3.1644478697448245e-06, "loss": 0.1443, "step": 15879 }, { "epoch": 2.2423044337757694, "grad_norm": 3.152287193813122, "learning_rate": 3.163335252779811e-06, "loss": 0.17, "step": 15880 }, { "epoch": 2.2424456368257553, "grad_norm": 3.0468035283569805, "learning_rate": 3.1622227946959374e-06, "loss": 0.1416, "step": 15881 }, { "epoch": 2.242586839875741, "grad_norm": 3.1232788711366166, "learning_rate": 3.161110495519053e-06, "loss": 0.1328, "step": 15882 }, { "epoch": 2.242728042925727, "grad_norm": 2.7699712923709914, "learning_rate": 3.159998355275008e-06, "loss": 0.1397, "step": 15883 }, { "epoch": 2.242869245975713, "grad_norm": 2.9541531176428526, "learning_rate": 3.1588863739896457e-06, "loss": 0.1604, "step": 15884 }, { "epoch": 2.243010449025699, "grad_norm": 5.173710067010855, "learning_rate": 3.1577745516888103e-06, "loss": 0.2172, "step": 15885 }, { "epoch": 2.2431516520756847, "grad_norm": 3.1815845762067734, "learning_rate": 3.1566628883983395e-06, "loss": 0.1577, "step": 15886 }, { "epoch": 2.2432928551256706, "grad_norm": 3.302840927747817, "learning_rate": 3.1555513841440686e-06, "loss": 0.1287, "step": 15887 }, { "epoch": 2.2434340581756564, "grad_norm": 3.012746810947023, "learning_rate": 3.154440038951827e-06, "loss": 0.1243, "step": 15888 }, { "epoch": 2.2435752612256423, "grad_norm": 3.041528129081475, "learning_rate": 3.1533288528474416e-06, "loss": 0.1277, "step": 15889 }, { "epoch": 2.243716464275628, "grad_norm": 3.0470392290795747, "learning_rate": 3.152217825856738e-06, "loss": 0.1717, "step": 15890 }, { "epoch": 2.243857667325614, "grad_norm": 3.2505469831367506, "learning_rate": 3.1511069580055343e-06, "loss": 0.1781, "step": 15891 }, { "epoch": 2.2439988703756, "grad_norm": 3.3793950873713072, "learning_rate": 3.1499962493196446e-06, "loss": 0.1635, "step": 15892 }, { "epoch": 2.244140073425586, "grad_norm": 3.7653232216057524, "learning_rate": 3.1488856998248864e-06, "loss": 0.1676, "step": 15893 }, { "epoch": 2.2442812764755717, "grad_norm": 2.8224492854927288, "learning_rate": 3.1477753095470654e-06, "loss": 0.1236, "step": 15894 }, { "epoch": 2.2444224795255576, "grad_norm": 3.2063279978494497, "learning_rate": 3.14666507851199e-06, "loss": 0.1603, "step": 15895 }, { "epoch": 2.2445636825755435, "grad_norm": 2.424023335894876, "learning_rate": 3.1455550067454555e-06, "loss": 0.1352, "step": 15896 }, { "epoch": 2.2447048856255294, "grad_norm": 3.7022919323429315, "learning_rate": 3.1444450942732594e-06, "loss": 0.1466, "step": 15897 }, { "epoch": 2.2448460886755153, "grad_norm": 3.4593736347396633, "learning_rate": 3.143335341121202e-06, "loss": 0.1435, "step": 15898 }, { "epoch": 2.244987291725501, "grad_norm": 3.0199088302280175, "learning_rate": 3.142225747315071e-06, "loss": 0.1321, "step": 15899 }, { "epoch": 2.245128494775487, "grad_norm": 3.184749326140049, "learning_rate": 3.1411163128806497e-06, "loss": 0.1628, "step": 15900 }, { "epoch": 2.245269697825473, "grad_norm": 3.295091311281202, "learning_rate": 3.1400070378437253e-06, "loss": 0.1658, "step": 15901 }, { "epoch": 2.245410900875459, "grad_norm": 4.093782775114767, "learning_rate": 3.138897922230074e-06, "loss": 0.1932, "step": 15902 }, { "epoch": 2.2455521039254447, "grad_norm": 2.405948643867988, "learning_rate": 3.1377889660654712e-06, "loss": 0.1042, "step": 15903 }, { "epoch": 2.2456933069754306, "grad_norm": 3.016171103988904, "learning_rate": 3.13668016937569e-06, "loss": 0.1745, "step": 15904 }, { "epoch": 2.2458345100254165, "grad_norm": 4.092768364385749, "learning_rate": 3.1355715321864978e-06, "loss": 0.1805, "step": 15905 }, { "epoch": 2.2459757130754023, "grad_norm": 3.941052728568223, "learning_rate": 3.1344630545236576e-06, "loss": 0.1824, "step": 15906 }, { "epoch": 2.246116916125388, "grad_norm": 3.3009733451825936, "learning_rate": 3.1333547364129324e-06, "loss": 0.1525, "step": 15907 }, { "epoch": 2.246258119175374, "grad_norm": 3.6683328966442095, "learning_rate": 3.132246577880077e-06, "loss": 0.1555, "step": 15908 }, { "epoch": 2.24639932222536, "grad_norm": 2.71014464828207, "learning_rate": 3.131138578950842e-06, "loss": 0.1253, "step": 15909 }, { "epoch": 2.246540525275346, "grad_norm": 3.0675011168195123, "learning_rate": 3.1300307396509833e-06, "loss": 0.1323, "step": 15910 }, { "epoch": 2.2466817283253318, "grad_norm": 4.337292705587969, "learning_rate": 3.1289230600062427e-06, "loss": 0.1847, "step": 15911 }, { "epoch": 2.2468229313753176, "grad_norm": 3.284005147548238, "learning_rate": 3.1278155400423673e-06, "loss": 0.1471, "step": 15912 }, { "epoch": 2.2469641344253035, "grad_norm": 3.425738761719479, "learning_rate": 3.1267081797850862e-06, "loss": 0.1798, "step": 15913 }, { "epoch": 2.2471053374752894, "grad_norm": 3.465761075890443, "learning_rate": 3.125600979260136e-06, "loss": 0.179, "step": 15914 }, { "epoch": 2.2472465405252753, "grad_norm": 3.289471314331126, "learning_rate": 3.1244939384932537e-06, "loss": 0.1636, "step": 15915 }, { "epoch": 2.247387743575261, "grad_norm": 2.852613123414985, "learning_rate": 3.123387057510162e-06, "loss": 0.1335, "step": 15916 }, { "epoch": 2.247528946625247, "grad_norm": 2.9120573353869497, "learning_rate": 3.122280336336587e-06, "loss": 0.1565, "step": 15917 }, { "epoch": 2.247670149675233, "grad_norm": 3.239016036494344, "learning_rate": 3.121173774998245e-06, "loss": 0.1511, "step": 15918 }, { "epoch": 2.247811352725219, "grad_norm": 3.6861611062135835, "learning_rate": 3.1200673735208555e-06, "loss": 0.1654, "step": 15919 }, { "epoch": 2.2479525557752047, "grad_norm": 3.604016945906634, "learning_rate": 3.118961131930127e-06, "loss": 0.1529, "step": 15920 }, { "epoch": 2.2480937588251906, "grad_norm": 3.418576965347991, "learning_rate": 3.1178550502517725e-06, "loss": 0.1689, "step": 15921 }, { "epoch": 2.2482349618751765, "grad_norm": 3.1445663175799012, "learning_rate": 3.1167491285114928e-06, "loss": 0.1435, "step": 15922 }, { "epoch": 2.2483761649251623, "grad_norm": 3.5229814517803804, "learning_rate": 3.1156433667349907e-06, "loss": 0.1661, "step": 15923 }, { "epoch": 2.2485173679751482, "grad_norm": 3.6752757964804696, "learning_rate": 3.1145377649479635e-06, "loss": 0.1609, "step": 15924 }, { "epoch": 2.248658571025134, "grad_norm": 2.680679589476101, "learning_rate": 3.1134323231761064e-06, "loss": 0.148, "step": 15925 }, { "epoch": 2.24879977407512, "grad_norm": 2.8394880152347746, "learning_rate": 3.1123270414451035e-06, "loss": 0.154, "step": 15926 }, { "epoch": 2.248940977125106, "grad_norm": 2.5030502874232106, "learning_rate": 3.1112219197806492e-06, "loss": 0.1417, "step": 15927 }, { "epoch": 2.2490821801750918, "grad_norm": 3.408623693516801, "learning_rate": 3.110116958208422e-06, "loss": 0.1409, "step": 15928 }, { "epoch": 2.2492233832250776, "grad_norm": 2.725913603722745, "learning_rate": 3.1090121567541052e-06, "loss": 0.1295, "step": 15929 }, { "epoch": 2.2493645862750635, "grad_norm": 3.1812211474243535, "learning_rate": 3.107907515443367e-06, "loss": 0.2012, "step": 15930 }, { "epoch": 2.2495057893250494, "grad_norm": 2.7095059986685874, "learning_rate": 3.1068030343018773e-06, "loss": 0.1511, "step": 15931 }, { "epoch": 2.2496469923750353, "grad_norm": 3.730795415951922, "learning_rate": 3.1056987133553118e-06, "loss": 0.1667, "step": 15932 }, { "epoch": 2.249788195425021, "grad_norm": 2.9546025696526916, "learning_rate": 3.1045945526293307e-06, "loss": 0.143, "step": 15933 }, { "epoch": 2.249929398475007, "grad_norm": 2.9821731714340096, "learning_rate": 3.103490552149595e-06, "loss": 0.1519, "step": 15934 }, { "epoch": 2.250070601524993, "grad_norm": 2.7749955406618474, "learning_rate": 3.1023867119417595e-06, "loss": 0.1658, "step": 15935 }, { "epoch": 2.250211804574979, "grad_norm": 2.719883160818092, "learning_rate": 3.1012830320314793e-06, "loss": 0.1338, "step": 15936 }, { "epoch": 2.2503530076249647, "grad_norm": 2.877843803498858, "learning_rate": 3.1001795124444003e-06, "loss": 0.1459, "step": 15937 }, { "epoch": 2.2504942106749506, "grad_norm": 3.1247153215527987, "learning_rate": 3.0990761532061707e-06, "loss": 0.1487, "step": 15938 }, { "epoch": 2.2506354137249365, "grad_norm": 3.5857319961021603, "learning_rate": 3.097972954342431e-06, "loss": 0.1488, "step": 15939 }, { "epoch": 2.2507766167749224, "grad_norm": 2.777287054092524, "learning_rate": 3.0968699158788185e-06, "loss": 0.1348, "step": 15940 }, { "epoch": 2.2509178198249082, "grad_norm": 2.5823167660859796, "learning_rate": 3.095767037840969e-06, "loss": 0.1207, "step": 15941 }, { "epoch": 2.251059022874894, "grad_norm": 2.662811348252803, "learning_rate": 3.0946643202545113e-06, "loss": 0.1344, "step": 15942 }, { "epoch": 2.25120022592488, "grad_norm": 3.0392760124487426, "learning_rate": 3.0935617631450686e-06, "loss": 0.1548, "step": 15943 }, { "epoch": 2.251341428974866, "grad_norm": 3.3129323799772803, "learning_rate": 3.092459366538272e-06, "loss": 0.1369, "step": 15944 }, { "epoch": 2.2514826320248518, "grad_norm": 4.3771175647428295, "learning_rate": 3.0913571304597367e-06, "loss": 0.2273, "step": 15945 }, { "epoch": 2.2516238350748377, "grad_norm": 3.6558673535206707, "learning_rate": 3.090255054935081e-06, "loss": 0.1701, "step": 15946 }, { "epoch": 2.2517650381248235, "grad_norm": 3.3488803535084783, "learning_rate": 3.08915313998991e-06, "loss": 0.197, "step": 15947 }, { "epoch": 2.2519062411748094, "grad_norm": 3.0106710633937874, "learning_rate": 3.0880513856498363e-06, "loss": 0.1539, "step": 15948 }, { "epoch": 2.2520474442247953, "grad_norm": 2.9668269133519654, "learning_rate": 3.08694979194046e-06, "loss": 0.1347, "step": 15949 }, { "epoch": 2.252188647274781, "grad_norm": 2.6103102428897524, "learning_rate": 3.0858483588873878e-06, "loss": 0.1256, "step": 15950 }, { "epoch": 2.252329850324767, "grad_norm": 2.6959023154672925, "learning_rate": 3.0847470865162143e-06, "loss": 0.1128, "step": 15951 }, { "epoch": 2.252471053374753, "grad_norm": 4.2879187031012975, "learning_rate": 3.0836459748525316e-06, "loss": 0.1874, "step": 15952 }, { "epoch": 2.252612256424739, "grad_norm": 2.710821606168807, "learning_rate": 3.082545023921929e-06, "loss": 0.1333, "step": 15953 }, { "epoch": 2.2527534594747247, "grad_norm": 2.8568121927839476, "learning_rate": 3.081444233749994e-06, "loss": 0.1517, "step": 15954 }, { "epoch": 2.2528946625247106, "grad_norm": 3.3319661943869834, "learning_rate": 3.080343604362306e-06, "loss": 0.1604, "step": 15955 }, { "epoch": 2.2530358655746965, "grad_norm": 2.71091420696974, "learning_rate": 3.0792431357844444e-06, "loss": 0.1147, "step": 15956 }, { "epoch": 2.2531770686246824, "grad_norm": 3.0946893320311526, "learning_rate": 3.0781428280419833e-06, "loss": 0.1484, "step": 15957 }, { "epoch": 2.2533182716746682, "grad_norm": 3.6209029125809593, "learning_rate": 3.0770426811604946e-06, "loss": 0.1724, "step": 15958 }, { "epoch": 2.253459474724654, "grad_norm": 2.8521644815799223, "learning_rate": 3.0759426951655437e-06, "loss": 0.1521, "step": 15959 }, { "epoch": 2.25360067777464, "grad_norm": 2.9331253847008862, "learning_rate": 3.0748428700826938e-06, "loss": 0.161, "step": 15960 }, { "epoch": 2.253741880824626, "grad_norm": 3.1128675442047142, "learning_rate": 3.073743205937502e-06, "loss": 0.1423, "step": 15961 }, { "epoch": 2.253883083874612, "grad_norm": 3.4116175772612287, "learning_rate": 3.07264370275553e-06, "loss": 0.2127, "step": 15962 }, { "epoch": 2.2540242869245977, "grad_norm": 3.1708613284402842, "learning_rate": 3.0715443605623296e-06, "loss": 0.1744, "step": 15963 }, { "epoch": 2.2541654899745835, "grad_norm": 2.8340557324158064, "learning_rate": 3.0704451793834433e-06, "loss": 0.1197, "step": 15964 }, { "epoch": 2.2543066930245694, "grad_norm": 2.5715013298222256, "learning_rate": 3.0693461592444184e-06, "loss": 0.1084, "step": 15965 }, { "epoch": 2.2544478960745553, "grad_norm": 3.155852206362035, "learning_rate": 3.0682473001707925e-06, "loss": 0.0962, "step": 15966 }, { "epoch": 2.254589099124541, "grad_norm": 2.854515415717756, "learning_rate": 3.067148602188108e-06, "loss": 0.1431, "step": 15967 }, { "epoch": 2.254730302174527, "grad_norm": 3.3971794135987157, "learning_rate": 3.0660500653218973e-06, "loss": 0.1616, "step": 15968 }, { "epoch": 2.254871505224513, "grad_norm": 4.052321120595911, "learning_rate": 3.0649516895976883e-06, "loss": 0.1879, "step": 15969 }, { "epoch": 2.255012708274499, "grad_norm": 3.6848999717705944, "learning_rate": 3.0638534750410065e-06, "loss": 0.2059, "step": 15970 }, { "epoch": 2.2551539113244847, "grad_norm": 3.84793696262174, "learning_rate": 3.0627554216773736e-06, "loss": 0.1728, "step": 15971 }, { "epoch": 2.2552951143744706, "grad_norm": 3.3204669781264378, "learning_rate": 3.0616575295323105e-06, "loss": 0.148, "step": 15972 }, { "epoch": 2.2554363174244565, "grad_norm": 3.2646171201971397, "learning_rate": 3.0605597986313284e-06, "loss": 0.1766, "step": 15973 }, { "epoch": 2.2555775204744424, "grad_norm": 3.613147587267159, "learning_rate": 3.059462228999941e-06, "loss": 0.2013, "step": 15974 }, { "epoch": 2.2557187235244283, "grad_norm": 3.076365644579705, "learning_rate": 3.0583648206636542e-06, "loss": 0.1365, "step": 15975 }, { "epoch": 2.255859926574414, "grad_norm": 2.7974674810364033, "learning_rate": 3.0572675736479696e-06, "loss": 0.1454, "step": 15976 }, { "epoch": 2.2560011296244, "grad_norm": 2.910538353965002, "learning_rate": 3.0561704879783894e-06, "loss": 0.1601, "step": 15977 }, { "epoch": 2.256142332674386, "grad_norm": 2.6287449440057298, "learning_rate": 3.055073563680404e-06, "loss": 0.1075, "step": 15978 }, { "epoch": 2.256283535724372, "grad_norm": 3.4962777920009436, "learning_rate": 3.0539768007795134e-06, "loss": 0.1837, "step": 15979 }, { "epoch": 2.2564247387743577, "grad_norm": 3.42997305081233, "learning_rate": 3.0528801993012056e-06, "loss": 0.1689, "step": 15980 }, { "epoch": 2.2565659418243436, "grad_norm": 2.904836379274577, "learning_rate": 3.051783759270959e-06, "loss": 0.1229, "step": 15981 }, { "epoch": 2.2567071448743294, "grad_norm": 2.7457922712241647, "learning_rate": 3.050687480714256e-06, "loss": 0.1234, "step": 15982 }, { "epoch": 2.2568483479243153, "grad_norm": 2.9450437037687798, "learning_rate": 3.0495913636565735e-06, "loss": 0.1468, "step": 15983 }, { "epoch": 2.256989550974301, "grad_norm": 2.9528265666902103, "learning_rate": 3.0484954081233877e-06, "loss": 0.1237, "step": 15984 }, { "epoch": 2.257130754024287, "grad_norm": 3.4168451118246352, "learning_rate": 3.047399614140166e-06, "loss": 0.1464, "step": 15985 }, { "epoch": 2.257271957074273, "grad_norm": 3.022705208641973, "learning_rate": 3.046303981732376e-06, "loss": 0.1638, "step": 15986 }, { "epoch": 2.257413160124259, "grad_norm": 3.036554577486767, "learning_rate": 3.045208510925478e-06, "loss": 0.1434, "step": 15987 }, { "epoch": 2.2575543631742447, "grad_norm": 3.3519187953596234, "learning_rate": 3.0441132017449305e-06, "loss": 0.1558, "step": 15988 }, { "epoch": 2.2576955662242306, "grad_norm": 2.9712221247839974, "learning_rate": 3.043018054216188e-06, "loss": 0.0979, "step": 15989 }, { "epoch": 2.2578367692742165, "grad_norm": 3.447719653517833, "learning_rate": 3.0419230683647018e-06, "loss": 0.2329, "step": 15990 }, { "epoch": 2.2579779723242024, "grad_norm": 2.791136456275009, "learning_rate": 3.0408282442159177e-06, "loss": 0.1322, "step": 15991 }, { "epoch": 2.2581191753741883, "grad_norm": 3.0448515643531775, "learning_rate": 3.03973358179528e-06, "loss": 0.1283, "step": 15992 }, { "epoch": 2.258260378424174, "grad_norm": 2.6388983330895877, "learning_rate": 3.0386390811282283e-06, "loss": 0.1278, "step": 15993 }, { "epoch": 2.25840158147416, "grad_norm": 3.2585495685342636, "learning_rate": 3.0375447422401982e-06, "loss": 0.1451, "step": 15994 }, { "epoch": 2.2585427845241455, "grad_norm": 3.434081807522444, "learning_rate": 3.036450565156618e-06, "loss": 0.1619, "step": 15995 }, { "epoch": 2.2586839875741314, "grad_norm": 3.10284723456881, "learning_rate": 3.0353565499029223e-06, "loss": 0.1309, "step": 15996 }, { "epoch": 2.2588251906241172, "grad_norm": 2.8422981321280574, "learning_rate": 3.034262696504536e-06, "loss": 0.1551, "step": 15997 }, { "epoch": 2.258966393674103, "grad_norm": 3.3110039711558574, "learning_rate": 3.0331690049868733e-06, "loss": 0.1623, "step": 15998 }, { "epoch": 2.259107596724089, "grad_norm": 3.51784801673797, "learning_rate": 3.0320754753753544e-06, "loss": 0.1737, "step": 15999 }, { "epoch": 2.259248799774075, "grad_norm": 3.1596735455445706, "learning_rate": 3.0309821076953893e-06, "loss": 0.1686, "step": 16000 }, { "epoch": 2.2593900028240608, "grad_norm": 4.713380853169764, "learning_rate": 3.0298889019723933e-06, "loss": 0.2414, "step": 16001 }, { "epoch": 2.2595312058740467, "grad_norm": 3.5702066039163656, "learning_rate": 3.028795858231768e-06, "loss": 0.1509, "step": 16002 }, { "epoch": 2.2596724089240325, "grad_norm": 3.3120204009703107, "learning_rate": 3.0277029764989173e-06, "loss": 0.1672, "step": 16003 }, { "epoch": 2.2598136119740184, "grad_norm": 3.4051309893706168, "learning_rate": 3.026610256799238e-06, "loss": 0.135, "step": 16004 }, { "epoch": 2.2599548150240043, "grad_norm": 3.90973031788945, "learning_rate": 3.0255176991581246e-06, "loss": 0.2069, "step": 16005 }, { "epoch": 2.26009601807399, "grad_norm": 2.217134446012374, "learning_rate": 3.0244253036009684e-06, "loss": 0.0909, "step": 16006 }, { "epoch": 2.260237221123976, "grad_norm": 2.6364313137531474, "learning_rate": 3.023333070153155e-06, "loss": 0.141, "step": 16007 }, { "epoch": 2.260378424173962, "grad_norm": 4.030734919948177, "learning_rate": 3.022240998840068e-06, "loss": 0.2134, "step": 16008 }, { "epoch": 2.260519627223948, "grad_norm": 2.8905871539988444, "learning_rate": 3.0211490896870876e-06, "loss": 0.1122, "step": 16009 }, { "epoch": 2.2606608302739337, "grad_norm": 3.388308306586784, "learning_rate": 3.0200573427195877e-06, "loss": 0.1787, "step": 16010 }, { "epoch": 2.2608020333239196, "grad_norm": 3.0451652989550575, "learning_rate": 3.0189657579629405e-06, "loss": 0.1239, "step": 16011 }, { "epoch": 2.2609432363739055, "grad_norm": 3.8794496482929537, "learning_rate": 3.017874335442512e-06, "loss": 0.165, "step": 16012 }, { "epoch": 2.2610844394238914, "grad_norm": 3.5655350910800716, "learning_rate": 3.0167830751836712e-06, "loss": 0.1798, "step": 16013 }, { "epoch": 2.2612256424738773, "grad_norm": 3.1560702071672377, "learning_rate": 3.0156919772117788e-06, "loss": 0.1423, "step": 16014 }, { "epoch": 2.261366845523863, "grad_norm": 3.542024086845946, "learning_rate": 3.0146010415521865e-06, "loss": 0.2055, "step": 16015 }, { "epoch": 2.261508048573849, "grad_norm": 3.6945103169119884, "learning_rate": 3.0135102682302477e-06, "loss": 0.1634, "step": 16016 }, { "epoch": 2.261649251623835, "grad_norm": 3.3382928728700856, "learning_rate": 3.0124196572713104e-06, "loss": 0.1598, "step": 16017 }, { "epoch": 2.261790454673821, "grad_norm": 2.912432573371749, "learning_rate": 3.011329208700726e-06, "loss": 0.127, "step": 16018 }, { "epoch": 2.2619316577238067, "grad_norm": 2.41699042314935, "learning_rate": 3.010238922543833e-06, "loss": 0.1123, "step": 16019 }, { "epoch": 2.2620728607737925, "grad_norm": 3.4726001818702765, "learning_rate": 3.0091487988259684e-06, "loss": 0.1711, "step": 16020 }, { "epoch": 2.2622140638237784, "grad_norm": 3.3626638423332387, "learning_rate": 3.008058837572466e-06, "loss": 0.1754, "step": 16021 }, { "epoch": 2.2623552668737643, "grad_norm": 3.5004705995564014, "learning_rate": 3.006969038808658e-06, "loss": 0.1453, "step": 16022 }, { "epoch": 2.26249646992375, "grad_norm": 2.452390999853178, "learning_rate": 3.005879402559868e-06, "loss": 0.1088, "step": 16023 }, { "epoch": 2.262637672973736, "grad_norm": 2.573946230773676, "learning_rate": 3.0047899288514213e-06, "loss": 0.1136, "step": 16024 }, { "epoch": 2.262778876023722, "grad_norm": 3.9620008065560968, "learning_rate": 3.0037006177086347e-06, "loss": 0.1945, "step": 16025 }, { "epoch": 2.262920079073708, "grad_norm": 3.2743056978481144, "learning_rate": 3.0026114691568255e-06, "loss": 0.1994, "step": 16026 }, { "epoch": 2.2630612821236937, "grad_norm": 2.4687385589606774, "learning_rate": 3.001522483221302e-06, "loss": 0.1078, "step": 16027 }, { "epoch": 2.2632024851736796, "grad_norm": 3.5934979011028947, "learning_rate": 3.000433659927375e-06, "loss": 0.2042, "step": 16028 }, { "epoch": 2.2633436882236655, "grad_norm": 3.1195366067675887, "learning_rate": 2.999344999300343e-06, "loss": 0.131, "step": 16029 }, { "epoch": 2.2634848912736514, "grad_norm": 3.1727138783561717, "learning_rate": 2.998256501365514e-06, "loss": 0.1542, "step": 16030 }, { "epoch": 2.2636260943236373, "grad_norm": 3.2748814209678923, "learning_rate": 2.9971681661481823e-06, "loss": 0.181, "step": 16031 }, { "epoch": 2.263767297373623, "grad_norm": 2.935185447841041, "learning_rate": 2.9960799936736353e-06, "loss": 0.1354, "step": 16032 }, { "epoch": 2.263908500423609, "grad_norm": 2.6563256465441665, "learning_rate": 2.994991983967165e-06, "loss": 0.1323, "step": 16033 }, { "epoch": 2.264049703473595, "grad_norm": 2.458422506112852, "learning_rate": 2.993904137054051e-06, "loss": 0.1259, "step": 16034 }, { "epoch": 2.264190906523581, "grad_norm": 2.5679818980366176, "learning_rate": 2.9928164529595836e-06, "loss": 0.1203, "step": 16035 }, { "epoch": 2.2643321095735667, "grad_norm": 3.593043971331139, "learning_rate": 2.9917289317090357e-06, "loss": 0.1544, "step": 16036 }, { "epoch": 2.2644733126235526, "grad_norm": 3.851631795913417, "learning_rate": 2.9906415733276808e-06, "loss": 0.1807, "step": 16037 }, { "epoch": 2.2646145156735384, "grad_norm": 3.6202498583359075, "learning_rate": 2.9895543778407875e-06, "loss": 0.1829, "step": 16038 }, { "epoch": 2.2647557187235243, "grad_norm": 2.6222753276268254, "learning_rate": 2.988467345273628e-06, "loss": 0.1373, "step": 16039 }, { "epoch": 2.26489692177351, "grad_norm": 2.809072727836093, "learning_rate": 2.9873804756514513e-06, "loss": 0.15, "step": 16040 }, { "epoch": 2.265038124823496, "grad_norm": 4.489722921360118, "learning_rate": 2.9862937689995276e-06, "loss": 0.2504, "step": 16041 }, { "epoch": 2.265179327873482, "grad_norm": 3.5371003871130546, "learning_rate": 2.9852072253431073e-06, "loss": 0.1892, "step": 16042 }, { "epoch": 2.265320530923468, "grad_norm": 3.2655261010044736, "learning_rate": 2.984120844707442e-06, "loss": 0.177, "step": 16043 }, { "epoch": 2.2654617339734537, "grad_norm": 3.8855359691683047, "learning_rate": 2.983034627117779e-06, "loss": 0.1842, "step": 16044 }, { "epoch": 2.2656029370234396, "grad_norm": 3.367519937580669, "learning_rate": 2.9819485725993603e-06, "loss": 0.1632, "step": 16045 }, { "epoch": 2.2657441400734255, "grad_norm": 2.7435414540425658, "learning_rate": 2.9808626811774222e-06, "loss": 0.1455, "step": 16046 }, { "epoch": 2.2658853431234114, "grad_norm": 3.297177136591904, "learning_rate": 2.979776952877208e-06, "loss": 0.1707, "step": 16047 }, { "epoch": 2.2660265461733973, "grad_norm": 3.0626618114999964, "learning_rate": 2.9786913877239486e-06, "loss": 0.1593, "step": 16048 }, { "epoch": 2.266167749223383, "grad_norm": 2.857778522701081, "learning_rate": 2.977605985742866e-06, "loss": 0.1298, "step": 16049 }, { "epoch": 2.266308952273369, "grad_norm": 2.8512928199918735, "learning_rate": 2.976520746959187e-06, "loss": 0.1599, "step": 16050 }, { "epoch": 2.266450155323355, "grad_norm": 3.317713400738987, "learning_rate": 2.9754356713981337e-06, "loss": 0.1367, "step": 16051 }, { "epoch": 2.266591358373341, "grad_norm": 2.956077434625852, "learning_rate": 2.9743507590849176e-06, "loss": 0.1595, "step": 16052 }, { "epoch": 2.2667325614233267, "grad_norm": 4.050857510296772, "learning_rate": 2.9732660100447586e-06, "loss": 0.2144, "step": 16053 }, { "epoch": 2.2668737644733126, "grad_norm": 2.8097234141020127, "learning_rate": 2.9721814243028635e-06, "loss": 0.112, "step": 16054 }, { "epoch": 2.2670149675232985, "grad_norm": 3.3288259867311454, "learning_rate": 2.9710970018844378e-06, "loss": 0.1917, "step": 16055 }, { "epoch": 2.2671561705732843, "grad_norm": 3.0252208567989007, "learning_rate": 2.970012742814684e-06, "loss": 0.1426, "step": 16056 }, { "epoch": 2.26729737362327, "grad_norm": 3.074976023278469, "learning_rate": 2.968928647118793e-06, "loss": 0.1334, "step": 16057 }, { "epoch": 2.267438576673256, "grad_norm": 3.6919051641301737, "learning_rate": 2.967844714821966e-06, "loss": 0.1564, "step": 16058 }, { "epoch": 2.267579779723242, "grad_norm": 3.6800699964147117, "learning_rate": 2.9667609459493907e-06, "loss": 0.185, "step": 16059 }, { "epoch": 2.267720982773228, "grad_norm": 3.608491671446746, "learning_rate": 2.965677340526254e-06, "loss": 0.1512, "step": 16060 }, { "epoch": 2.2678621858232137, "grad_norm": 3.454797445263629, "learning_rate": 2.964593898577738e-06, "loss": 0.1614, "step": 16061 }, { "epoch": 2.2680033888731996, "grad_norm": 3.6555995891092756, "learning_rate": 2.963510620129021e-06, "loss": 0.1706, "step": 16062 }, { "epoch": 2.2681445919231855, "grad_norm": 3.2079649262013783, "learning_rate": 2.962427505205279e-06, "loss": 0.1596, "step": 16063 }, { "epoch": 2.2682857949731714, "grad_norm": 2.8725066713181024, "learning_rate": 2.961344553831679e-06, "loss": 0.1209, "step": 16064 }, { "epoch": 2.2684269980231573, "grad_norm": 3.3632100792599777, "learning_rate": 2.9602617660333988e-06, "loss": 0.1564, "step": 16065 }, { "epoch": 2.268568201073143, "grad_norm": 2.73124856029468, "learning_rate": 2.959179141835591e-06, "loss": 0.1284, "step": 16066 }, { "epoch": 2.268709404123129, "grad_norm": 3.188947229162904, "learning_rate": 2.9580966812634194e-06, "loss": 0.1451, "step": 16067 }, { "epoch": 2.268850607173115, "grad_norm": 2.9520718210608226, "learning_rate": 2.9570143843420394e-06, "loss": 0.1447, "step": 16068 }, { "epoch": 2.268991810223101, "grad_norm": 2.6766851072878044, "learning_rate": 2.9559322510966004e-06, "loss": 0.1382, "step": 16069 }, { "epoch": 2.2691330132730867, "grad_norm": 2.731130354208133, "learning_rate": 2.9548502815522573e-06, "loss": 0.1553, "step": 16070 }, { "epoch": 2.2692742163230726, "grad_norm": 3.1233032121382065, "learning_rate": 2.953768475734151e-06, "loss": 0.145, "step": 16071 }, { "epoch": 2.2694154193730585, "grad_norm": 2.895813151024847, "learning_rate": 2.952686833667423e-06, "loss": 0.1397, "step": 16072 }, { "epoch": 2.2695566224230443, "grad_norm": 3.668831065355843, "learning_rate": 2.9516053553772116e-06, "loss": 0.1747, "step": 16073 }, { "epoch": 2.2696978254730302, "grad_norm": 3.283275066842753, "learning_rate": 2.9505240408886417e-06, "loss": 0.155, "step": 16074 }, { "epoch": 2.269839028523016, "grad_norm": 3.4158401549923516, "learning_rate": 2.9494428902268524e-06, "loss": 0.1539, "step": 16075 }, { "epoch": 2.269980231573002, "grad_norm": 2.951928272704684, "learning_rate": 2.948361903416965e-06, "loss": 0.1324, "step": 16076 }, { "epoch": 2.270121434622988, "grad_norm": 3.580675371716236, "learning_rate": 2.947281080484101e-06, "loss": 0.1703, "step": 16077 }, { "epoch": 2.2702626376729738, "grad_norm": 3.046173921325846, "learning_rate": 2.9462004214533803e-06, "loss": 0.1514, "step": 16078 }, { "epoch": 2.2704038407229596, "grad_norm": 3.975863160832691, "learning_rate": 2.945119926349914e-06, "loss": 0.207, "step": 16079 }, { "epoch": 2.2705450437729455, "grad_norm": 3.450429182877458, "learning_rate": 2.944039595198814e-06, "loss": 0.1848, "step": 16080 }, { "epoch": 2.2706862468229314, "grad_norm": 3.085378082509213, "learning_rate": 2.942959428025185e-06, "loss": 0.128, "step": 16081 }, { "epoch": 2.2708274498729173, "grad_norm": 3.080534411275803, "learning_rate": 2.9418794248541362e-06, "loss": 0.1445, "step": 16082 }, { "epoch": 2.270968652922903, "grad_norm": 3.2780679950187217, "learning_rate": 2.9407995857107584e-06, "loss": 0.1601, "step": 16083 }, { "epoch": 2.271109855972889, "grad_norm": 3.0218570548605226, "learning_rate": 2.9397199106201492e-06, "loss": 0.1452, "step": 16084 }, { "epoch": 2.271251059022875, "grad_norm": 3.47739250889305, "learning_rate": 2.938640399607401e-06, "loss": 0.1787, "step": 16085 }, { "epoch": 2.271392262072861, "grad_norm": 2.9404597187305446, "learning_rate": 2.937561052697597e-06, "loss": 0.1245, "step": 16086 }, { "epoch": 2.2715334651228467, "grad_norm": 3.9411792189755137, "learning_rate": 2.9364818699158272e-06, "loss": 0.1651, "step": 16087 }, { "epoch": 2.2716746681728326, "grad_norm": 2.857453763521429, "learning_rate": 2.935402851287168e-06, "loss": 0.1127, "step": 16088 }, { "epoch": 2.2718158712228185, "grad_norm": 3.4317992471579917, "learning_rate": 2.9343239968366956e-06, "loss": 0.1798, "step": 16089 }, { "epoch": 2.2719570742728044, "grad_norm": 2.886906852786769, "learning_rate": 2.933245306589485e-06, "loss": 0.1357, "step": 16090 }, { "epoch": 2.2720982773227902, "grad_norm": 3.2961275322561137, "learning_rate": 2.9321667805705955e-06, "loss": 0.1787, "step": 16091 }, { "epoch": 2.272239480372776, "grad_norm": 3.6216246093294986, "learning_rate": 2.9310884188051013e-06, "loss": 0.1803, "step": 16092 }, { "epoch": 2.272380683422762, "grad_norm": 3.143553354790983, "learning_rate": 2.930010221318059e-06, "loss": 0.1554, "step": 16093 }, { "epoch": 2.272521886472748, "grad_norm": 3.3982342576423448, "learning_rate": 2.9289321881345257e-06, "loss": 0.1354, "step": 16094 }, { "epoch": 2.2726630895227338, "grad_norm": 3.658347651914011, "learning_rate": 2.927854319279555e-06, "loss": 0.2104, "step": 16095 }, { "epoch": 2.2728042925727197, "grad_norm": 4.0475246092893755, "learning_rate": 2.926776614778195e-06, "loss": 0.174, "step": 16096 }, { "epoch": 2.2729454956227055, "grad_norm": 2.7303802434421045, "learning_rate": 2.9256990746554926e-06, "loss": 0.1163, "step": 16097 }, { "epoch": 2.2730866986726914, "grad_norm": 3.008462571478294, "learning_rate": 2.924621698936485e-06, "loss": 0.1274, "step": 16098 }, { "epoch": 2.2732279017226773, "grad_norm": 3.2946756691883494, "learning_rate": 2.9235444876462194e-06, "loss": 0.1736, "step": 16099 }, { "epoch": 2.273369104772663, "grad_norm": 3.4466583571208234, "learning_rate": 2.9224674408097207e-06, "loss": 0.1541, "step": 16100 }, { "epoch": 2.273510307822649, "grad_norm": 3.509852167462246, "learning_rate": 2.921390558452023e-06, "loss": 0.1716, "step": 16101 }, { "epoch": 2.273651510872635, "grad_norm": 2.9591048243140103, "learning_rate": 2.9203138405981514e-06, "loss": 0.136, "step": 16102 }, { "epoch": 2.273792713922621, "grad_norm": 3.7624845468218338, "learning_rate": 2.9192372872731255e-06, "loss": 0.1906, "step": 16103 }, { "epoch": 2.2739339169726067, "grad_norm": 3.332172015065085, "learning_rate": 2.9181608985019705e-06, "loss": 0.1636, "step": 16104 }, { "epoch": 2.2740751200225926, "grad_norm": 3.806964540947606, "learning_rate": 2.917084674309697e-06, "loss": 0.1646, "step": 16105 }, { "epoch": 2.2742163230725785, "grad_norm": 3.0832064187893486, "learning_rate": 2.9160086147213174e-06, "loss": 0.1408, "step": 16106 }, { "epoch": 2.2743575261225644, "grad_norm": 3.5271772292147423, "learning_rate": 2.9149327197618405e-06, "loss": 0.139, "step": 16107 }, { "epoch": 2.2744987291725502, "grad_norm": 3.267779605261069, "learning_rate": 2.913856989456262e-06, "loss": 0.142, "step": 16108 }, { "epoch": 2.274639932222536, "grad_norm": 3.213130036633731, "learning_rate": 2.9127814238295904e-06, "loss": 0.1281, "step": 16109 }, { "epoch": 2.274781135272522, "grad_norm": 3.2771299997528454, "learning_rate": 2.911706022906816e-06, "loss": 0.1527, "step": 16110 }, { "epoch": 2.274922338322508, "grad_norm": 3.281075162465656, "learning_rate": 2.9106307867129347e-06, "loss": 0.1357, "step": 16111 }, { "epoch": 2.275063541372494, "grad_norm": 3.995887019693327, "learning_rate": 2.90955571527293e-06, "loss": 0.1807, "step": 16112 }, { "epoch": 2.2752047444224797, "grad_norm": 2.519750536824236, "learning_rate": 2.90848080861179e-06, "loss": 0.128, "step": 16113 }, { "epoch": 2.2753459474724655, "grad_norm": 3.0061302800307006, "learning_rate": 2.907406066754492e-06, "loss": 0.1212, "step": 16114 }, { "epoch": 2.2754871505224514, "grad_norm": 3.201032334456864, "learning_rate": 2.906331489726012e-06, "loss": 0.1468, "step": 16115 }, { "epoch": 2.2756283535724373, "grad_norm": 3.323340927858849, "learning_rate": 2.905257077551331e-06, "loss": 0.1444, "step": 16116 }, { "epoch": 2.275769556622423, "grad_norm": 3.2159702675212642, "learning_rate": 2.904182830255408e-06, "loss": 0.1591, "step": 16117 }, { "epoch": 2.275910759672409, "grad_norm": 3.2233240608632845, "learning_rate": 2.9031087478632116e-06, "loss": 0.1308, "step": 16118 }, { "epoch": 2.276051962722395, "grad_norm": 3.1842154198087758, "learning_rate": 2.9020348303997024e-06, "loss": 0.1604, "step": 16119 }, { "epoch": 2.276193165772381, "grad_norm": 2.852307533738364, "learning_rate": 2.900961077889837e-06, "loss": 0.1508, "step": 16120 }, { "epoch": 2.2763343688223667, "grad_norm": 2.8394724809355885, "learning_rate": 2.8998874903585716e-06, "loss": 0.1441, "step": 16121 }, { "epoch": 2.2764755718723526, "grad_norm": 3.833017519579199, "learning_rate": 2.898814067830855e-06, "loss": 0.1797, "step": 16122 }, { "epoch": 2.2766167749223385, "grad_norm": 3.641567866132091, "learning_rate": 2.8977408103316327e-06, "loss": 0.2046, "step": 16123 }, { "epoch": 2.2767579779723244, "grad_norm": 3.7412715231506417, "learning_rate": 2.8966677178858506e-06, "loss": 0.1671, "step": 16124 }, { "epoch": 2.2768991810223103, "grad_norm": 3.186999532013299, "learning_rate": 2.895594790518437e-06, "loss": 0.167, "step": 16125 }, { "epoch": 2.277040384072296, "grad_norm": 3.499351900208958, "learning_rate": 2.894522028254334e-06, "loss": 0.1419, "step": 16126 }, { "epoch": 2.277181587122282, "grad_norm": 3.1364647995039565, "learning_rate": 2.8934494311184715e-06, "loss": 0.1695, "step": 16127 }, { "epoch": 2.277322790172268, "grad_norm": 3.1022172338303706, "learning_rate": 2.892376999135774e-06, "loss": 0.1569, "step": 16128 }, { "epoch": 2.277463993222254, "grad_norm": 3.251660456231205, "learning_rate": 2.891304732331167e-06, "loss": 0.1788, "step": 16129 }, { "epoch": 2.2776051962722397, "grad_norm": 2.525042373940704, "learning_rate": 2.890232630729567e-06, "loss": 0.1148, "step": 16130 }, { "epoch": 2.2777463993222256, "grad_norm": 2.8033016117591925, "learning_rate": 2.8891606943558904e-06, "loss": 0.1487, "step": 16131 }, { "epoch": 2.2778876023722114, "grad_norm": 2.6325624485514627, "learning_rate": 2.888088923235045e-06, "loss": 0.134, "step": 16132 }, { "epoch": 2.2780288054221973, "grad_norm": 3.5994094114302717, "learning_rate": 2.8870173173919493e-06, "loss": 0.1856, "step": 16133 }, { "epoch": 2.278170008472183, "grad_norm": 3.678760229406727, "learning_rate": 2.885945876851495e-06, "loss": 0.1696, "step": 16134 }, { "epoch": 2.278311211522169, "grad_norm": 2.6138023410968447, "learning_rate": 2.8848746016385855e-06, "loss": 0.0902, "step": 16135 }, { "epoch": 2.278452414572155, "grad_norm": 2.9002269368750477, "learning_rate": 2.8838034917781187e-06, "loss": 0.1308, "step": 16136 }, { "epoch": 2.278593617622141, "grad_norm": 2.579318566007883, "learning_rate": 2.8827325472949817e-06, "loss": 0.1105, "step": 16137 }, { "epoch": 2.2787348206721267, "grad_norm": 2.95355286746537, "learning_rate": 2.8816617682140703e-06, "loss": 0.1441, "step": 16138 }, { "epoch": 2.2788760237221126, "grad_norm": 3.2984793360567832, "learning_rate": 2.8805911545602647e-06, "loss": 0.169, "step": 16139 }, { "epoch": 2.2790172267720985, "grad_norm": 3.220457821942883, "learning_rate": 2.879520706358446e-06, "loss": 0.1592, "step": 16140 }, { "epoch": 2.2791584298220844, "grad_norm": 2.9911521821162403, "learning_rate": 2.878450423633494e-06, "loss": 0.1566, "step": 16141 }, { "epoch": 2.2792996328720703, "grad_norm": 4.387026578602758, "learning_rate": 2.8773803064102758e-06, "loss": 0.2511, "step": 16142 }, { "epoch": 2.279440835922056, "grad_norm": 2.978360052431601, "learning_rate": 2.876310354713661e-06, "loss": 0.1453, "step": 16143 }, { "epoch": 2.279582038972042, "grad_norm": 3.8948114173134987, "learning_rate": 2.875240568568518e-06, "loss": 0.1868, "step": 16144 }, { "epoch": 2.279723242022028, "grad_norm": 4.071909341580717, "learning_rate": 2.8741709479997095e-06, "loss": 0.1901, "step": 16145 }, { "epoch": 2.279864445072014, "grad_norm": 2.7004379865966626, "learning_rate": 2.873101493032089e-06, "loss": 0.1322, "step": 16146 }, { "epoch": 2.2800056481219997, "grad_norm": 2.9351653660375887, "learning_rate": 2.8720322036905133e-06, "loss": 0.1469, "step": 16147 }, { "epoch": 2.280146851171985, "grad_norm": 2.8043995509722213, "learning_rate": 2.87096307999983e-06, "loss": 0.1312, "step": 16148 }, { "epoch": 2.280288054221971, "grad_norm": 3.2627653682644833, "learning_rate": 2.869894121984883e-06, "loss": 0.1867, "step": 16149 }, { "epoch": 2.280429257271957, "grad_norm": 4.495830857893143, "learning_rate": 2.868825329670524e-06, "loss": 0.2592, "step": 16150 }, { "epoch": 2.2805704603219428, "grad_norm": 2.89965539871878, "learning_rate": 2.867756703081581e-06, "loss": 0.1415, "step": 16151 }, { "epoch": 2.2807116633719287, "grad_norm": 3.0679773705895528, "learning_rate": 2.866688242242892e-06, "loss": 0.1259, "step": 16152 }, { "epoch": 2.2808528664219145, "grad_norm": 4.149630647793469, "learning_rate": 2.865619947179288e-06, "loss": 0.2112, "step": 16153 }, { "epoch": 2.2809940694719004, "grad_norm": 2.2882413855194024, "learning_rate": 2.8645518179155953e-06, "loss": 0.1054, "step": 16154 }, { "epoch": 2.2811352725218863, "grad_norm": 3.115698132113545, "learning_rate": 2.863483854476633e-06, "loss": 0.157, "step": 16155 }, { "epoch": 2.281276475571872, "grad_norm": 4.14475923061119, "learning_rate": 2.8624160568872273e-06, "loss": 0.1857, "step": 16156 }, { "epoch": 2.281417678621858, "grad_norm": 3.236885607514476, "learning_rate": 2.86134842517219e-06, "loss": 0.1408, "step": 16157 }, { "epoch": 2.281558881671844, "grad_norm": 4.0075885026696225, "learning_rate": 2.860280959356336e-06, "loss": 0.164, "step": 16158 }, { "epoch": 2.28170008472183, "grad_norm": 3.128480883071816, "learning_rate": 2.859213659464466e-06, "loss": 0.1194, "step": 16159 }, { "epoch": 2.2818412877718157, "grad_norm": 2.8884005836271274, "learning_rate": 2.8581465255213834e-06, "loss": 0.1406, "step": 16160 }, { "epoch": 2.2819824908218016, "grad_norm": 3.126582247591714, "learning_rate": 2.857079557551894e-06, "loss": 0.1195, "step": 16161 }, { "epoch": 2.2821236938717875, "grad_norm": 2.97370140365119, "learning_rate": 2.8560127555807902e-06, "loss": 0.1145, "step": 16162 }, { "epoch": 2.2822648969217734, "grad_norm": 2.5379760695646945, "learning_rate": 2.8549461196328667e-06, "loss": 0.137, "step": 16163 }, { "epoch": 2.2824060999717593, "grad_norm": 3.639556872888662, "learning_rate": 2.853879649732908e-06, "loss": 0.1644, "step": 16164 }, { "epoch": 2.282547303021745, "grad_norm": 2.770458636117854, "learning_rate": 2.8528133459057006e-06, "loss": 0.1104, "step": 16165 }, { "epoch": 2.282688506071731, "grad_norm": 4.157126343238318, "learning_rate": 2.8517472081760243e-06, "loss": 0.1556, "step": 16166 }, { "epoch": 2.282829709121717, "grad_norm": 2.8682976525914614, "learning_rate": 2.8506812365686553e-06, "loss": 0.1219, "step": 16167 }, { "epoch": 2.282970912171703, "grad_norm": 3.561886145752162, "learning_rate": 2.849615431108368e-06, "loss": 0.1743, "step": 16168 }, { "epoch": 2.2831121152216887, "grad_norm": 3.5630927191255477, "learning_rate": 2.848549791819929e-06, "loss": 0.1933, "step": 16169 }, { "epoch": 2.2832533182716745, "grad_norm": 2.8654721228769215, "learning_rate": 2.847484318728105e-06, "loss": 0.1106, "step": 16170 }, { "epoch": 2.2833945213216604, "grad_norm": 3.1760998089957746, "learning_rate": 2.8464190118576564e-06, "loss": 0.1552, "step": 16171 }, { "epoch": 2.2835357243716463, "grad_norm": 2.5939127100281323, "learning_rate": 2.845353871233337e-06, "loss": 0.1194, "step": 16172 }, { "epoch": 2.283676927421632, "grad_norm": 3.27282576643028, "learning_rate": 2.8442888968799075e-06, "loss": 0.1521, "step": 16173 }, { "epoch": 2.283818130471618, "grad_norm": 3.520623297717128, "learning_rate": 2.843224088822113e-06, "loss": 0.2081, "step": 16174 }, { "epoch": 2.283959333521604, "grad_norm": 2.849044914525224, "learning_rate": 2.8421594470847038e-06, "loss": 0.1487, "step": 16175 }, { "epoch": 2.28410053657159, "grad_norm": 3.016861071385987, "learning_rate": 2.841094971692414e-06, "loss": 0.1173, "step": 16176 }, { "epoch": 2.2842417396215757, "grad_norm": 3.552710721577386, "learning_rate": 2.8400306626699835e-06, "loss": 0.199, "step": 16177 }, { "epoch": 2.2843829426715616, "grad_norm": 2.914693611275363, "learning_rate": 2.83896652004215e-06, "loss": 0.1307, "step": 16178 }, { "epoch": 2.2845241457215475, "grad_norm": 2.7661374613893632, "learning_rate": 2.8379025438336426e-06, "loss": 0.1538, "step": 16179 }, { "epoch": 2.2846653487715334, "grad_norm": 3.1976316180120254, "learning_rate": 2.836838734069187e-06, "loss": 0.1879, "step": 16180 }, { "epoch": 2.2848065518215193, "grad_norm": 3.2321613043896313, "learning_rate": 2.835775090773506e-06, "loss": 0.198, "step": 16181 }, { "epoch": 2.284947754871505, "grad_norm": 2.4183054306725276, "learning_rate": 2.834711613971317e-06, "loss": 0.1037, "step": 16182 }, { "epoch": 2.285088957921491, "grad_norm": 3.3455648607745108, "learning_rate": 2.833648303687336e-06, "loss": 0.1771, "step": 16183 }, { "epoch": 2.285230160971477, "grad_norm": 3.233269220970347, "learning_rate": 2.832585159946274e-06, "loss": 0.1446, "step": 16184 }, { "epoch": 2.285371364021463, "grad_norm": 2.788893748542876, "learning_rate": 2.831522182772837e-06, "loss": 0.1216, "step": 16185 }, { "epoch": 2.2855125670714487, "grad_norm": 3.364026722353922, "learning_rate": 2.8304593721917283e-06, "loss": 0.1396, "step": 16186 }, { "epoch": 2.2856537701214346, "grad_norm": 3.4594601238355303, "learning_rate": 2.829396728227648e-06, "loss": 0.1564, "step": 16187 }, { "epoch": 2.2857949731714204, "grad_norm": 3.4510983797044315, "learning_rate": 2.8283342509052915e-06, "loss": 0.1907, "step": 16188 }, { "epoch": 2.2859361762214063, "grad_norm": 2.9210346073321425, "learning_rate": 2.827271940249345e-06, "loss": 0.1144, "step": 16189 }, { "epoch": 2.286077379271392, "grad_norm": 2.8289541045240956, "learning_rate": 2.8262097962845058e-06, "loss": 0.1514, "step": 16190 }, { "epoch": 2.286218582321378, "grad_norm": 3.4686969302543944, "learning_rate": 2.825147819035452e-06, "loss": 0.1227, "step": 16191 }, { "epoch": 2.286359785371364, "grad_norm": 3.001410351566293, "learning_rate": 2.8240860085268683e-06, "loss": 0.113, "step": 16192 }, { "epoch": 2.28650098842135, "grad_norm": 3.697146904381123, "learning_rate": 2.8230243647834222e-06, "loss": 0.1682, "step": 16193 }, { "epoch": 2.2866421914713357, "grad_norm": 3.0825384812152845, "learning_rate": 2.821962887829789e-06, "loss": 0.145, "step": 16194 }, { "epoch": 2.2867833945213216, "grad_norm": 2.5799916707628, "learning_rate": 2.8209015776906402e-06, "loss": 0.1394, "step": 16195 }, { "epoch": 2.2869245975713075, "grad_norm": 3.5460090896044565, "learning_rate": 2.819840434390638e-06, "loss": 0.1884, "step": 16196 }, { "epoch": 2.2870658006212934, "grad_norm": 2.8847101321946496, "learning_rate": 2.8187794579544434e-06, "loss": 0.1251, "step": 16197 }, { "epoch": 2.2872070036712793, "grad_norm": 2.762101768516108, "learning_rate": 2.8177186484067143e-06, "loss": 0.1392, "step": 16198 }, { "epoch": 2.287348206721265, "grad_norm": 2.8149064006393956, "learning_rate": 2.8166580057721003e-06, "loss": 0.158, "step": 16199 }, { "epoch": 2.287489409771251, "grad_norm": 2.7569289490056432, "learning_rate": 2.8155975300752524e-06, "loss": 0.1333, "step": 16200 }, { "epoch": 2.287630612821237, "grad_norm": 2.6819643345600896, "learning_rate": 2.814537221340816e-06, "loss": 0.1474, "step": 16201 }, { "epoch": 2.287771815871223, "grad_norm": 3.171087646834457, "learning_rate": 2.8134770795934307e-06, "loss": 0.16, "step": 16202 }, { "epoch": 2.2879130189212087, "grad_norm": 3.0825494978607906, "learning_rate": 2.8124171048577354e-06, "loss": 0.1421, "step": 16203 }, { "epoch": 2.2880542219711946, "grad_norm": 3.246109040319658, "learning_rate": 2.811357297158361e-06, "loss": 0.1304, "step": 16204 }, { "epoch": 2.2881954250211805, "grad_norm": 3.319768485926407, "learning_rate": 2.8102976565199393e-06, "loss": 0.1414, "step": 16205 }, { "epoch": 2.2883366280711663, "grad_norm": 2.935836236378914, "learning_rate": 2.809238182967092e-06, "loss": 0.16, "step": 16206 }, { "epoch": 2.288477831121152, "grad_norm": 2.958639881425088, "learning_rate": 2.8081788765244465e-06, "loss": 0.1209, "step": 16207 }, { "epoch": 2.288619034171138, "grad_norm": 3.0518288070660393, "learning_rate": 2.807119737216619e-06, "loss": 0.1567, "step": 16208 }, { "epoch": 2.288760237221124, "grad_norm": 3.6768568197538345, "learning_rate": 2.8060607650682247e-06, "loss": 0.192, "step": 16209 }, { "epoch": 2.28890144027111, "grad_norm": 2.6113234338183853, "learning_rate": 2.805001960103868e-06, "loss": 0.1339, "step": 16210 }, { "epoch": 2.2890426433210957, "grad_norm": 4.210042933919132, "learning_rate": 2.803943322348156e-06, "loss": 0.1661, "step": 16211 }, { "epoch": 2.2891838463710816, "grad_norm": 4.987218242996483, "learning_rate": 2.8028848518256967e-06, "loss": 0.2178, "step": 16212 }, { "epoch": 2.2893250494210675, "grad_norm": 3.3982624818812934, "learning_rate": 2.801826548561085e-06, "loss": 0.1817, "step": 16213 }, { "epoch": 2.2894662524710534, "grad_norm": 2.859248315879725, "learning_rate": 2.800768412578916e-06, "loss": 0.14, "step": 16214 }, { "epoch": 2.2896074555210393, "grad_norm": 2.950914118197667, "learning_rate": 2.7997104439037794e-06, "loss": 0.1423, "step": 16215 }, { "epoch": 2.289748658571025, "grad_norm": 3.735334797187053, "learning_rate": 2.7986526425602623e-06, "loss": 0.1682, "step": 16216 }, { "epoch": 2.289889861621011, "grad_norm": 3.146115289519885, "learning_rate": 2.7975950085729486e-06, "loss": 0.1595, "step": 16217 }, { "epoch": 2.290031064670997, "grad_norm": 2.791920861933825, "learning_rate": 2.7965375419664154e-06, "loss": 0.0993, "step": 16218 }, { "epoch": 2.290172267720983, "grad_norm": 3.6471308111055114, "learning_rate": 2.7954802427652395e-06, "loss": 0.1891, "step": 16219 }, { "epoch": 2.2903134707709687, "grad_norm": 2.9640458175510402, "learning_rate": 2.794423110993991e-06, "loss": 0.1427, "step": 16220 }, { "epoch": 2.2904546738209546, "grad_norm": 3.3113590054874766, "learning_rate": 2.793366146677239e-06, "loss": 0.1483, "step": 16221 }, { "epoch": 2.2905958768709405, "grad_norm": 3.221075178050502, "learning_rate": 2.7923093498395438e-06, "loss": 0.1811, "step": 16222 }, { "epoch": 2.2907370799209263, "grad_norm": 3.3861933172694814, "learning_rate": 2.7912527205054642e-06, "loss": 0.1421, "step": 16223 }, { "epoch": 2.2908782829709122, "grad_norm": 2.9485036054415708, "learning_rate": 2.79019625869956e-06, "loss": 0.1453, "step": 16224 }, { "epoch": 2.291019486020898, "grad_norm": 3.591660777868365, "learning_rate": 2.7891399644463824e-06, "loss": 0.1713, "step": 16225 }, { "epoch": 2.291160689070884, "grad_norm": 3.630841380610836, "learning_rate": 2.7880838377704812e-06, "loss": 0.1705, "step": 16226 }, { "epoch": 2.29130189212087, "grad_norm": 3.6032448309651817, "learning_rate": 2.7870278786963935e-06, "loss": 0.1852, "step": 16227 }, { "epoch": 2.2914430951708558, "grad_norm": 3.2432023962765677, "learning_rate": 2.7859720872486585e-06, "loss": 0.1471, "step": 16228 }, { "epoch": 2.2915842982208416, "grad_norm": 3.3863363074664043, "learning_rate": 2.7849164634518213e-06, "loss": 0.1993, "step": 16229 }, { "epoch": 2.2917255012708275, "grad_norm": 3.212856447307632, "learning_rate": 2.7838610073304096e-06, "loss": 0.1318, "step": 16230 }, { "epoch": 2.2918667043208134, "grad_norm": 3.0287469825254223, "learning_rate": 2.7828057189089507e-06, "loss": 0.1553, "step": 16231 }, { "epoch": 2.2920079073707993, "grad_norm": 3.643180666768194, "learning_rate": 2.7817505982119708e-06, "loss": 0.171, "step": 16232 }, { "epoch": 2.292149110420785, "grad_norm": 3.021587489548783, "learning_rate": 2.780695645263989e-06, "loss": 0.1886, "step": 16233 }, { "epoch": 2.292290313470771, "grad_norm": 3.225652954703523, "learning_rate": 2.779640860089523e-06, "loss": 0.1385, "step": 16234 }, { "epoch": 2.292431516520757, "grad_norm": 2.5161752015034544, "learning_rate": 2.778586242713085e-06, "loss": 0.1333, "step": 16235 }, { "epoch": 2.292572719570743, "grad_norm": 3.122896634932714, "learning_rate": 2.777531793159185e-06, "loss": 0.1178, "step": 16236 }, { "epoch": 2.2927139226207287, "grad_norm": 2.776338681183214, "learning_rate": 2.7764775114523256e-06, "loss": 0.1477, "step": 16237 }, { "epoch": 2.2928551256707146, "grad_norm": 3.641924703841348, "learning_rate": 2.77542339761701e-06, "loss": 0.2117, "step": 16238 }, { "epoch": 2.2929963287207005, "grad_norm": 3.173805425520337, "learning_rate": 2.7743694516777342e-06, "loss": 0.174, "step": 16239 }, { "epoch": 2.2931375317706864, "grad_norm": 3.6353883220133194, "learning_rate": 2.7733156736589893e-06, "loss": 0.1883, "step": 16240 }, { "epoch": 2.2932787348206722, "grad_norm": 3.132734921490453, "learning_rate": 2.7722620635852704e-06, "loss": 0.1565, "step": 16241 }, { "epoch": 2.293419937870658, "grad_norm": 3.0375963631721055, "learning_rate": 2.7712086214810585e-06, "loss": 0.1911, "step": 16242 }, { "epoch": 2.293561140920644, "grad_norm": 3.038182211738905, "learning_rate": 2.770155347370841e-06, "loss": 0.1352, "step": 16243 }, { "epoch": 2.29370234397063, "grad_norm": 2.7768297029408027, "learning_rate": 2.769102241279087e-06, "loss": 0.1464, "step": 16244 }, { "epoch": 2.2938435470206158, "grad_norm": 2.905165919023434, "learning_rate": 2.7680493032302733e-06, "loss": 0.1682, "step": 16245 }, { "epoch": 2.2939847500706017, "grad_norm": 2.846355514219431, "learning_rate": 2.7669965332488692e-06, "loss": 0.1599, "step": 16246 }, { "epoch": 2.2941259531205875, "grad_norm": 2.961876815715632, "learning_rate": 2.765943931359344e-06, "loss": 0.1595, "step": 16247 }, { "epoch": 2.2942671561705734, "grad_norm": 3.4345788982104843, "learning_rate": 2.7648914975861573e-06, "loss": 0.1758, "step": 16248 }, { "epoch": 2.2944083592205593, "grad_norm": 2.8097561282570744, "learning_rate": 2.7638392319537686e-06, "loss": 0.1266, "step": 16249 }, { "epoch": 2.294549562270545, "grad_norm": 2.595316219740321, "learning_rate": 2.7627871344866308e-06, "loss": 0.1308, "step": 16250 }, { "epoch": 2.294690765320531, "grad_norm": 3.00192667841996, "learning_rate": 2.7617352052091937e-06, "loss": 0.1235, "step": 16251 }, { "epoch": 2.294831968370517, "grad_norm": 3.5349008696812043, "learning_rate": 2.760683444145905e-06, "loss": 0.1464, "step": 16252 }, { "epoch": 2.294973171420503, "grad_norm": 3.3592649697834243, "learning_rate": 2.7596318513212074e-06, "loss": 0.1644, "step": 16253 }, { "epoch": 2.2951143744704887, "grad_norm": 3.2780522286438214, "learning_rate": 2.7585804267595383e-06, "loss": 0.1714, "step": 16254 }, { "epoch": 2.2952555775204746, "grad_norm": 3.264252020012359, "learning_rate": 2.7575291704853325e-06, "loss": 0.1848, "step": 16255 }, { "epoch": 2.2953967805704605, "grad_norm": 2.8814363551555573, "learning_rate": 2.756478082523021e-06, "loss": 0.1094, "step": 16256 }, { "epoch": 2.2955379836204464, "grad_norm": 3.645822491894983, "learning_rate": 2.7554271628970307e-06, "loss": 0.175, "step": 16257 }, { "epoch": 2.2956791866704322, "grad_norm": 3.318181780189427, "learning_rate": 2.7543764116317806e-06, "loss": 0.1603, "step": 16258 }, { "epoch": 2.295820389720418, "grad_norm": 3.7858457571820465, "learning_rate": 2.7533258287516973e-06, "loss": 0.1911, "step": 16259 }, { "epoch": 2.295961592770404, "grad_norm": 3.289525056025436, "learning_rate": 2.7522754142811957e-06, "loss": 0.163, "step": 16260 }, { "epoch": 2.29610279582039, "grad_norm": 3.1204496424769936, "learning_rate": 2.75122516824468e-06, "loss": 0.1424, "step": 16261 }, { "epoch": 2.296243998870376, "grad_norm": 3.511784848089861, "learning_rate": 2.7501750906665603e-06, "loss": 0.2062, "step": 16262 }, { "epoch": 2.2963852019203617, "grad_norm": 2.928057958495144, "learning_rate": 2.7491251815712384e-06, "loss": 0.1425, "step": 16263 }, { "epoch": 2.2965264049703475, "grad_norm": 3.409434601237091, "learning_rate": 2.748075440983118e-06, "loss": 0.1765, "step": 16264 }, { "epoch": 2.2966676080203334, "grad_norm": 2.6922074610309847, "learning_rate": 2.7470258689265926e-06, "loss": 0.1253, "step": 16265 }, { "epoch": 2.2968088110703193, "grad_norm": 3.631354629322686, "learning_rate": 2.7459764654260547e-06, "loss": 0.1995, "step": 16266 }, { "epoch": 2.2969500141203048, "grad_norm": 3.2786380031309426, "learning_rate": 2.7449272305058905e-06, "loss": 0.1506, "step": 16267 }, { "epoch": 2.2970912171702906, "grad_norm": 3.5203519772641925, "learning_rate": 2.7438781641904834e-06, "loss": 0.1791, "step": 16268 }, { "epoch": 2.2972324202202765, "grad_norm": 4.0852382638972164, "learning_rate": 2.7428292665042157e-06, "loss": 0.1934, "step": 16269 }, { "epoch": 2.2973736232702624, "grad_norm": 3.8643927156582074, "learning_rate": 2.74178053747146e-06, "loss": 0.1833, "step": 16270 }, { "epoch": 2.2975148263202483, "grad_norm": 2.717198597242836, "learning_rate": 2.740731977116591e-06, "loss": 0.1086, "step": 16271 }, { "epoch": 2.297656029370234, "grad_norm": 3.1282643788994196, "learning_rate": 2.7396835854639757e-06, "loss": 0.169, "step": 16272 }, { "epoch": 2.29779723242022, "grad_norm": 3.7127748942819907, "learning_rate": 2.738635362537978e-06, "loss": 0.1717, "step": 16273 }, { "epoch": 2.297938435470206, "grad_norm": 2.771145098707405, "learning_rate": 2.737587308362959e-06, "loss": 0.1335, "step": 16274 }, { "epoch": 2.298079638520192, "grad_norm": 3.569860196995526, "learning_rate": 2.7365394229632713e-06, "loss": 0.1821, "step": 16275 }, { "epoch": 2.2982208415701777, "grad_norm": 3.2776296963707687, "learning_rate": 2.7354917063632735e-06, "loss": 0.1124, "step": 16276 }, { "epoch": 2.2983620446201636, "grad_norm": 2.6134271563913654, "learning_rate": 2.734444158587314e-06, "loss": 0.1057, "step": 16277 }, { "epoch": 2.2985032476701495, "grad_norm": 3.56736506256394, "learning_rate": 2.7333967796597317e-06, "loss": 0.1717, "step": 16278 }, { "epoch": 2.2986444507201353, "grad_norm": 3.3650762612681406, "learning_rate": 2.7323495696048707e-06, "loss": 0.1496, "step": 16279 }, { "epoch": 2.2987856537701212, "grad_norm": 2.614744930968377, "learning_rate": 2.731302528447063e-06, "loss": 0.1546, "step": 16280 }, { "epoch": 2.298926856820107, "grad_norm": 2.7313616923878286, "learning_rate": 2.7302556562106486e-06, "loss": 0.1227, "step": 16281 }, { "epoch": 2.299068059870093, "grad_norm": 3.4847204558159808, "learning_rate": 2.729208952919954e-06, "loss": 0.1706, "step": 16282 }, { "epoch": 2.299209262920079, "grad_norm": 3.072803162106676, "learning_rate": 2.7281624185993027e-06, "loss": 0.1354, "step": 16283 }, { "epoch": 2.2993504659700648, "grad_norm": 3.138761572374022, "learning_rate": 2.7271160532730167e-06, "loss": 0.1611, "step": 16284 }, { "epoch": 2.2994916690200506, "grad_norm": 2.9447509517912183, "learning_rate": 2.726069856965412e-06, "loss": 0.1467, "step": 16285 }, { "epoch": 2.2996328720700365, "grad_norm": 3.587023868792806, "learning_rate": 2.7250238297008026e-06, "loss": 0.1716, "step": 16286 }, { "epoch": 2.2997740751200224, "grad_norm": 3.1407759517379614, "learning_rate": 2.7239779715034975e-06, "loss": 0.1718, "step": 16287 }, { "epoch": 2.2999152781700083, "grad_norm": 2.8478034919897426, "learning_rate": 2.7229322823978033e-06, "loss": 0.1225, "step": 16288 }, { "epoch": 2.300056481219994, "grad_norm": 4.119813650171776, "learning_rate": 2.7218867624080194e-06, "loss": 0.1769, "step": 16289 }, { "epoch": 2.30019768426998, "grad_norm": 3.3842406824849682, "learning_rate": 2.7208414115584436e-06, "loss": 0.1702, "step": 16290 }, { "epoch": 2.300338887319966, "grad_norm": 2.969951724504971, "learning_rate": 2.7197962298733693e-06, "loss": 0.1548, "step": 16291 }, { "epoch": 2.300480090369952, "grad_norm": 2.6665778173761328, "learning_rate": 2.7187512173770848e-06, "loss": 0.1074, "step": 16292 }, { "epoch": 2.3006212934199377, "grad_norm": 3.433733803316779, "learning_rate": 2.717706374093879e-06, "loss": 0.1984, "step": 16293 }, { "epoch": 2.3007624964699236, "grad_norm": 2.12409339169575, "learning_rate": 2.7166617000480367e-06, "loss": 0.0762, "step": 16294 }, { "epoch": 2.3009036995199095, "grad_norm": 2.4254720284762197, "learning_rate": 2.7156171952638276e-06, "loss": 0.1191, "step": 16295 }, { "epoch": 2.3010449025698954, "grad_norm": 3.7335704045318163, "learning_rate": 2.7145728597655286e-06, "loss": 0.2066, "step": 16296 }, { "epoch": 2.3011861056198812, "grad_norm": 2.2341773433082954, "learning_rate": 2.7135286935774073e-06, "loss": 0.1297, "step": 16297 }, { "epoch": 2.301327308669867, "grad_norm": 2.676988747762443, "learning_rate": 2.712484696723735e-06, "loss": 0.136, "step": 16298 }, { "epoch": 2.301468511719853, "grad_norm": 2.9861638112652846, "learning_rate": 2.711440869228771e-06, "loss": 0.1518, "step": 16299 }, { "epoch": 2.301609714769839, "grad_norm": 2.9600746544913425, "learning_rate": 2.710397211116774e-06, "loss": 0.1251, "step": 16300 }, { "epoch": 2.3017509178198248, "grad_norm": 3.5393407764573883, "learning_rate": 2.709353722411997e-06, "loss": 0.159, "step": 16301 }, { "epoch": 2.3018921208698107, "grad_norm": 2.8569265373089388, "learning_rate": 2.708310403138692e-06, "loss": 0.1709, "step": 16302 }, { "epoch": 2.3020333239197965, "grad_norm": 2.440281759810754, "learning_rate": 2.707267253321103e-06, "loss": 0.1286, "step": 16303 }, { "epoch": 2.3021745269697824, "grad_norm": 2.9857277043414796, "learning_rate": 2.7062242729834743e-06, "loss": 0.1308, "step": 16304 }, { "epoch": 2.3023157300197683, "grad_norm": 3.006710440637879, "learning_rate": 2.7051814621500437e-06, "loss": 0.1266, "step": 16305 }, { "epoch": 2.302456933069754, "grad_norm": 2.4296758329853962, "learning_rate": 2.7041388208450457e-06, "loss": 0.1205, "step": 16306 }, { "epoch": 2.30259813611974, "grad_norm": 3.4514507236900904, "learning_rate": 2.7030963490927097e-06, "loss": 0.1277, "step": 16307 }, { "epoch": 2.302739339169726, "grad_norm": 2.37006144530992, "learning_rate": 2.702054046917264e-06, "loss": 0.0931, "step": 16308 }, { "epoch": 2.302880542219712, "grad_norm": 2.9361927340809264, "learning_rate": 2.701011914342927e-06, "loss": 0.1512, "step": 16309 }, { "epoch": 2.3030217452696977, "grad_norm": 3.025583355391224, "learning_rate": 2.699969951393925e-06, "loss": 0.1628, "step": 16310 }, { "epoch": 2.3031629483196836, "grad_norm": 2.839336924166147, "learning_rate": 2.6989281580944704e-06, "loss": 0.137, "step": 16311 }, { "epoch": 2.3033041513696695, "grad_norm": 3.0108482487677737, "learning_rate": 2.6978865344687697e-06, "loss": 0.1446, "step": 16312 }, { "epoch": 2.3034453544196554, "grad_norm": 3.2634224405247627, "learning_rate": 2.6968450805410328e-06, "loss": 0.135, "step": 16313 }, { "epoch": 2.3035865574696412, "grad_norm": 2.984902039875968, "learning_rate": 2.695803796335459e-06, "loss": 0.1359, "step": 16314 }, { "epoch": 2.303727760519627, "grad_norm": 3.232220112539098, "learning_rate": 2.694762681876253e-06, "loss": 0.1742, "step": 16315 }, { "epoch": 2.303868963569613, "grad_norm": 2.5719603607150887, "learning_rate": 2.6937217371876077e-06, "loss": 0.118, "step": 16316 }, { "epoch": 2.304010166619599, "grad_norm": 2.9971678122443444, "learning_rate": 2.6926809622937144e-06, "loss": 0.177, "step": 16317 }, { "epoch": 2.304151369669585, "grad_norm": 3.7108348805070484, "learning_rate": 2.691640357218759e-06, "loss": 0.1727, "step": 16318 }, { "epoch": 2.3042925727195707, "grad_norm": 3.6008631641139357, "learning_rate": 2.6905999219869295e-06, "loss": 0.1751, "step": 16319 }, { "epoch": 2.3044337757695565, "grad_norm": 3.271888543225656, "learning_rate": 2.6895596566223937e-06, "loss": 0.142, "step": 16320 }, { "epoch": 2.3045749788195424, "grad_norm": 4.237449796420313, "learning_rate": 2.6885195611493386e-06, "loss": 0.1974, "step": 16321 }, { "epoch": 2.3047161818695283, "grad_norm": 3.4041579421590926, "learning_rate": 2.687479635591931e-06, "loss": 0.1921, "step": 16322 }, { "epoch": 2.304857384919514, "grad_norm": 4.276518103815932, "learning_rate": 2.6864398799743383e-06, "loss": 0.1964, "step": 16323 }, { "epoch": 2.3049985879695, "grad_norm": 3.079307241495362, "learning_rate": 2.6854002943207245e-06, "loss": 0.1667, "step": 16324 }, { "epoch": 2.305139791019486, "grad_norm": 3.265125951603585, "learning_rate": 2.684360878655249e-06, "loss": 0.143, "step": 16325 }, { "epoch": 2.305280994069472, "grad_norm": 2.2690044428292286, "learning_rate": 2.683321633002064e-06, "loss": 0.1401, "step": 16326 }, { "epoch": 2.3054221971194577, "grad_norm": 2.867190300571336, "learning_rate": 2.6822825573853274e-06, "loss": 0.1218, "step": 16327 }, { "epoch": 2.3055634001694436, "grad_norm": 3.140173919278724, "learning_rate": 2.681243651829187e-06, "loss": 0.1482, "step": 16328 }, { "epoch": 2.3057046032194295, "grad_norm": 2.9888965276828277, "learning_rate": 2.68020491635778e-06, "loss": 0.1371, "step": 16329 }, { "epoch": 2.3058458062694154, "grad_norm": 3.282351357703986, "learning_rate": 2.6791663509952504e-06, "loss": 0.1373, "step": 16330 }, { "epoch": 2.3059870093194013, "grad_norm": 3.3488256157947833, "learning_rate": 2.67812795576573e-06, "loss": 0.1531, "step": 16331 }, { "epoch": 2.306128212369387, "grad_norm": 3.0436834687399714, "learning_rate": 2.677089730693356e-06, "loss": 0.1342, "step": 16332 }, { "epoch": 2.306269415419373, "grad_norm": 2.74116989589514, "learning_rate": 2.676051675802256e-06, "loss": 0.1313, "step": 16333 }, { "epoch": 2.306410618469359, "grad_norm": 3.8512713728237924, "learning_rate": 2.675013791116551e-06, "loss": 0.1552, "step": 16334 }, { "epoch": 2.306551821519345, "grad_norm": 2.719487535085552, "learning_rate": 2.673976076660362e-06, "loss": 0.1326, "step": 16335 }, { "epoch": 2.3066930245693307, "grad_norm": 3.1976942336979284, "learning_rate": 2.672938532457807e-06, "loss": 0.1579, "step": 16336 }, { "epoch": 2.3068342276193166, "grad_norm": 2.880000793837095, "learning_rate": 2.671901158532991e-06, "loss": 0.1426, "step": 16337 }, { "epoch": 2.3069754306693024, "grad_norm": 3.0831687425643004, "learning_rate": 2.670863954910031e-06, "loss": 0.1498, "step": 16338 }, { "epoch": 2.3071166337192883, "grad_norm": 2.4369189496723775, "learning_rate": 2.6698269216130278e-06, "loss": 0.1205, "step": 16339 }, { "epoch": 2.307257836769274, "grad_norm": 3.397082851110855, "learning_rate": 2.66879005866608e-06, "loss": 0.1457, "step": 16340 }, { "epoch": 2.30739903981926, "grad_norm": 3.4232342652321748, "learning_rate": 2.667753366093285e-06, "loss": 0.1677, "step": 16341 }, { "epoch": 2.307540242869246, "grad_norm": 2.5533226066482007, "learning_rate": 2.6667168439187364e-06, "loss": 0.0921, "step": 16342 }, { "epoch": 2.307681445919232, "grad_norm": 3.2384776434456195, "learning_rate": 2.665680492166518e-06, "loss": 0.1451, "step": 16343 }, { "epoch": 2.3078226489692177, "grad_norm": 3.721855309065821, "learning_rate": 2.6646443108607234e-06, "loss": 0.1699, "step": 16344 }, { "epoch": 2.3079638520192036, "grad_norm": 3.306363863911269, "learning_rate": 2.6636083000254244e-06, "loss": 0.1208, "step": 16345 }, { "epoch": 2.3081050550691895, "grad_norm": 3.3923634535983704, "learning_rate": 2.662572459684699e-06, "loss": 0.1492, "step": 16346 }, { "epoch": 2.3082462581191754, "grad_norm": 3.9518864569435106, "learning_rate": 2.661536789862622e-06, "loss": 0.2081, "step": 16347 }, { "epoch": 2.3083874611691613, "grad_norm": 3.4427703625110966, "learning_rate": 2.6605012905832605e-06, "loss": 0.1158, "step": 16348 }, { "epoch": 2.308528664219147, "grad_norm": 3.050607564513119, "learning_rate": 2.6594659618706764e-06, "loss": 0.1288, "step": 16349 }, { "epoch": 2.308669867269133, "grad_norm": 3.637359562099576, "learning_rate": 2.658430803748936e-06, "loss": 0.158, "step": 16350 }, { "epoch": 2.308811070319119, "grad_norm": 3.1845754867021143, "learning_rate": 2.6573958162420933e-06, "loss": 0.1442, "step": 16351 }, { "epoch": 2.308952273369105, "grad_norm": 3.1031455724272203, "learning_rate": 2.656360999374201e-06, "loss": 0.1285, "step": 16352 }, { "epoch": 2.3090934764190907, "grad_norm": 3.184603584345711, "learning_rate": 2.6553263531693096e-06, "loss": 0.1458, "step": 16353 }, { "epoch": 2.3092346794690766, "grad_norm": 3.9334057563022498, "learning_rate": 2.654291877651457e-06, "loss": 0.1942, "step": 16354 }, { "epoch": 2.3093758825190625, "grad_norm": 2.616577803580007, "learning_rate": 2.653257572844692e-06, "loss": 0.1052, "step": 16355 }, { "epoch": 2.3095170855690483, "grad_norm": 2.996515919509376, "learning_rate": 2.652223438773047e-06, "loss": 0.1271, "step": 16356 }, { "epoch": 2.309658288619034, "grad_norm": 3.252248724070849, "learning_rate": 2.651189475460556e-06, "loss": 0.1542, "step": 16357 }, { "epoch": 2.30979949166902, "grad_norm": 3.0989639011684393, "learning_rate": 2.6501556829312492e-06, "loss": 0.1742, "step": 16358 }, { "epoch": 2.309940694719006, "grad_norm": 2.8644413977787155, "learning_rate": 2.6491220612091494e-06, "loss": 0.1185, "step": 16359 }, { "epoch": 2.310081897768992, "grad_norm": 3.6178228549283613, "learning_rate": 2.648088610318278e-06, "loss": 0.1788, "step": 16360 }, { "epoch": 2.3102231008189777, "grad_norm": 3.614776071568069, "learning_rate": 2.6470553302826528e-06, "loss": 0.1711, "step": 16361 }, { "epoch": 2.3103643038689636, "grad_norm": 3.598478003392561, "learning_rate": 2.646022221126285e-06, "loss": 0.1943, "step": 16362 }, { "epoch": 2.3105055069189495, "grad_norm": 2.9720049986047874, "learning_rate": 2.644989282873187e-06, "loss": 0.119, "step": 16363 }, { "epoch": 2.3106467099689354, "grad_norm": 2.8199790678096277, "learning_rate": 2.6439565155473602e-06, "loss": 0.1204, "step": 16364 }, { "epoch": 2.3107879130189213, "grad_norm": 2.851508641450763, "learning_rate": 2.642923919172807e-06, "loss": 0.1451, "step": 16365 }, { "epoch": 2.310929116068907, "grad_norm": 2.9949301656832734, "learning_rate": 2.6418914937735228e-06, "loss": 0.1526, "step": 16366 }, { "epoch": 2.311070319118893, "grad_norm": 3.5990676047779084, "learning_rate": 2.6408592393735043e-06, "loss": 0.1832, "step": 16367 }, { "epoch": 2.311211522168879, "grad_norm": 3.507330424449203, "learning_rate": 2.63982715599674e-06, "loss": 0.177, "step": 16368 }, { "epoch": 2.311352725218865, "grad_norm": 3.3929005113402115, "learning_rate": 2.6387952436672136e-06, "loss": 0.1651, "step": 16369 }, { "epoch": 2.3114939282688507, "grad_norm": 3.813598094774005, "learning_rate": 2.637763502408909e-06, "loss": 0.1855, "step": 16370 }, { "epoch": 2.3116351313188366, "grad_norm": 3.0349285210696713, "learning_rate": 2.636731932245796e-06, "loss": 0.1391, "step": 16371 }, { "epoch": 2.3117763343688225, "grad_norm": 2.647653023374486, "learning_rate": 2.6357005332018557e-06, "loss": 0.1465, "step": 16372 }, { "epoch": 2.3119175374188083, "grad_norm": 3.4645334532934298, "learning_rate": 2.634669305301054e-06, "loss": 0.1359, "step": 16373 }, { "epoch": 2.3120587404687942, "grad_norm": 3.131557614636249, "learning_rate": 2.6336382485673574e-06, "loss": 0.1568, "step": 16374 }, { "epoch": 2.31219994351878, "grad_norm": 2.6967363805764215, "learning_rate": 2.6326073630247263e-06, "loss": 0.1371, "step": 16375 }, { "epoch": 2.312341146568766, "grad_norm": 3.2815187881778245, "learning_rate": 2.631576648697118e-06, "loss": 0.153, "step": 16376 }, { "epoch": 2.312482349618752, "grad_norm": 4.144321720768993, "learning_rate": 2.630546105608488e-06, "loss": 0.1657, "step": 16377 }, { "epoch": 2.3126235526687378, "grad_norm": 2.550874788071806, "learning_rate": 2.6295157337827827e-06, "loss": 0.1401, "step": 16378 }, { "epoch": 2.3127647557187236, "grad_norm": 3.2807492469931834, "learning_rate": 2.6284855332439487e-06, "loss": 0.2081, "step": 16379 }, { "epoch": 2.3129059587687095, "grad_norm": 2.778662753952818, "learning_rate": 2.6274555040159265e-06, "loss": 0.1407, "step": 16380 }, { "epoch": 2.3130471618186954, "grad_norm": 4.453177977532432, "learning_rate": 2.6264256461226555e-06, "loss": 0.2164, "step": 16381 }, { "epoch": 2.3131883648686813, "grad_norm": 3.1399122716964722, "learning_rate": 2.625395959588067e-06, "loss": 0.1471, "step": 16382 }, { "epoch": 2.313329567918667, "grad_norm": 3.9735541025727983, "learning_rate": 2.6243664444360907e-06, "loss": 0.1597, "step": 16383 }, { "epoch": 2.313470770968653, "grad_norm": 3.4836870807083966, "learning_rate": 2.623337100690654e-06, "loss": 0.1516, "step": 16384 }, { "epoch": 2.313611974018639, "grad_norm": 3.6462400979621896, "learning_rate": 2.622307928375678e-06, "loss": 0.1854, "step": 16385 }, { "epoch": 2.313753177068625, "grad_norm": 3.6793319647404914, "learning_rate": 2.6212789275150796e-06, "loss": 0.1995, "step": 16386 }, { "epoch": 2.3138943801186107, "grad_norm": 4.009696522176874, "learning_rate": 2.620250098132775e-06, "loss": 0.177, "step": 16387 }, { "epoch": 2.3140355831685966, "grad_norm": 2.7956257095416848, "learning_rate": 2.6192214402526662e-06, "loss": 0.1627, "step": 16388 }, { "epoch": 2.3141767862185825, "grad_norm": 3.2535714573962413, "learning_rate": 2.618192953898665e-06, "loss": 0.1332, "step": 16389 }, { "epoch": 2.3143179892685684, "grad_norm": 3.023711359002308, "learning_rate": 2.6171646390946727e-06, "loss": 0.1582, "step": 16390 }, { "epoch": 2.3144591923185542, "grad_norm": 3.4527189551117887, "learning_rate": 2.6161364958645853e-06, "loss": 0.1684, "step": 16391 }, { "epoch": 2.31460039536854, "grad_norm": 3.753535233111927, "learning_rate": 2.6151085242322973e-06, "loss": 0.2014, "step": 16392 }, { "epoch": 2.314741598418526, "grad_norm": 3.22188541255434, "learning_rate": 2.614080724221697e-06, "loss": 0.1949, "step": 16393 }, { "epoch": 2.314882801468512, "grad_norm": 3.4738336985084084, "learning_rate": 2.613053095856671e-06, "loss": 0.1777, "step": 16394 }, { "epoch": 2.3150240045184978, "grad_norm": 3.362570328077147, "learning_rate": 2.612025639161102e-06, "loss": 0.1529, "step": 16395 }, { "epoch": 2.3151652075684837, "grad_norm": 3.776681570408574, "learning_rate": 2.6109983541588655e-06, "loss": 0.213, "step": 16396 }, { "epoch": 2.3153064106184695, "grad_norm": 3.240059536412947, "learning_rate": 2.6099712408738363e-06, "loss": 0.1491, "step": 16397 }, { "epoch": 2.3154476136684554, "grad_norm": 2.8735924747500934, "learning_rate": 2.6089442993298854e-06, "loss": 0.1523, "step": 16398 }, { "epoch": 2.3155888167184413, "grad_norm": 3.694435909146389, "learning_rate": 2.607917529550875e-06, "loss": 0.1964, "step": 16399 }, { "epoch": 2.315730019768427, "grad_norm": 4.150072716422877, "learning_rate": 2.606890931560667e-06, "loss": 0.139, "step": 16400 }, { "epoch": 2.315871222818413, "grad_norm": 3.3472483519326555, "learning_rate": 2.6058645053831234e-06, "loss": 0.1777, "step": 16401 }, { "epoch": 2.316012425868399, "grad_norm": 3.146305988925464, "learning_rate": 2.6048382510420954e-06, "loss": 0.144, "step": 16402 }, { "epoch": 2.316153628918385, "grad_norm": 2.8640163148330595, "learning_rate": 2.603812168561434e-06, "loss": 0.1541, "step": 16403 }, { "epoch": 2.3162948319683707, "grad_norm": 2.890223631466561, "learning_rate": 2.6027862579649856e-06, "loss": 0.1385, "step": 16404 }, { "epoch": 2.3164360350183566, "grad_norm": 3.1677013109681966, "learning_rate": 2.6017605192765828e-06, "loss": 0.1884, "step": 16405 }, { "epoch": 2.3165772380683425, "grad_norm": 3.385777501407841, "learning_rate": 2.6007349525200754e-06, "loss": 0.1604, "step": 16406 }, { "epoch": 2.3167184411183284, "grad_norm": 2.7142619248731417, "learning_rate": 2.599709557719291e-06, "loss": 0.0979, "step": 16407 }, { "epoch": 2.3168596441683142, "grad_norm": 3.228369039886004, "learning_rate": 2.5986843348980607e-06, "loss": 0.1339, "step": 16408 }, { "epoch": 2.3170008472183, "grad_norm": 2.5327960897650073, "learning_rate": 2.5976592840802105e-06, "loss": 0.1301, "step": 16409 }, { "epoch": 2.317142050268286, "grad_norm": 3.0167833220466056, "learning_rate": 2.596634405289562e-06, "loss": 0.1266, "step": 16410 }, { "epoch": 2.317283253318272, "grad_norm": 3.625084750290672, "learning_rate": 2.5956096985499315e-06, "loss": 0.1585, "step": 16411 }, { "epoch": 2.317424456368258, "grad_norm": 2.62455777127269, "learning_rate": 2.594585163885135e-06, "loss": 0.128, "step": 16412 }, { "epoch": 2.3175656594182437, "grad_norm": 3.5956733710535684, "learning_rate": 2.5935608013189808e-06, "loss": 0.1799, "step": 16413 }, { "epoch": 2.3177068624682295, "grad_norm": 2.9012610323332164, "learning_rate": 2.592536610875275e-06, "loss": 0.1445, "step": 16414 }, { "epoch": 2.3178480655182154, "grad_norm": 3.8016298296309103, "learning_rate": 2.5915125925778184e-06, "loss": 0.1891, "step": 16415 }, { "epoch": 2.3179892685682013, "grad_norm": 3.3366905535707434, "learning_rate": 2.5904887464504115e-06, "loss": 0.1598, "step": 16416 }, { "epoch": 2.318130471618187, "grad_norm": 2.940637547529869, "learning_rate": 2.589465072516841e-06, "loss": 0.1724, "step": 16417 }, { "epoch": 2.318271674668173, "grad_norm": 3.6323651311567464, "learning_rate": 2.588441570800907e-06, "loss": 0.1899, "step": 16418 }, { "epoch": 2.318412877718159, "grad_norm": 2.6642314341877187, "learning_rate": 2.5874182413263893e-06, "loss": 0.1421, "step": 16419 }, { "epoch": 2.3185540807681444, "grad_norm": 3.1254573362157534, "learning_rate": 2.5863950841170704e-06, "loss": 0.1647, "step": 16420 }, { "epoch": 2.3186952838181303, "grad_norm": 3.8476599094236823, "learning_rate": 2.58537209919673e-06, "loss": 0.1751, "step": 16421 }, { "epoch": 2.318836486868116, "grad_norm": 4.523103611399567, "learning_rate": 2.5843492865891383e-06, "loss": 0.1728, "step": 16422 }, { "epoch": 2.318977689918102, "grad_norm": 3.4659438701668273, "learning_rate": 2.5833266463180628e-06, "loss": 0.1749, "step": 16423 }, { "epoch": 2.319118892968088, "grad_norm": 3.2818464367596203, "learning_rate": 2.5823041784072767e-06, "loss": 0.1382, "step": 16424 }, { "epoch": 2.319260096018074, "grad_norm": 3.684851044143204, "learning_rate": 2.5812818828805376e-06, "loss": 0.1871, "step": 16425 }, { "epoch": 2.3194012990680597, "grad_norm": 3.9861144513427647, "learning_rate": 2.580259759761604e-06, "loss": 0.1837, "step": 16426 }, { "epoch": 2.3195425021180456, "grad_norm": 3.898380848062007, "learning_rate": 2.5792378090742285e-06, "loss": 0.2097, "step": 16427 }, { "epoch": 2.3196837051680315, "grad_norm": 2.8749080304804995, "learning_rate": 2.578216030842162e-06, "loss": 0.153, "step": 16428 }, { "epoch": 2.3198249082180173, "grad_norm": 2.949391624724403, "learning_rate": 2.57719442508915e-06, "loss": 0.1485, "step": 16429 }, { "epoch": 2.3199661112680032, "grad_norm": 3.1638270022944304, "learning_rate": 2.576172991838933e-06, "loss": 0.1977, "step": 16430 }, { "epoch": 2.320107314317989, "grad_norm": 3.2471614146865013, "learning_rate": 2.5751517311152505e-06, "loss": 0.1212, "step": 16431 }, { "epoch": 2.320248517367975, "grad_norm": 4.160561755448681, "learning_rate": 2.5741306429418355e-06, "loss": 0.2176, "step": 16432 }, { "epoch": 2.320389720417961, "grad_norm": 2.6986422340256615, "learning_rate": 2.5731097273424167e-06, "loss": 0.1257, "step": 16433 }, { "epoch": 2.3205309234679468, "grad_norm": 3.790095881024734, "learning_rate": 2.5720889843407205e-06, "loss": 0.2, "step": 16434 }, { "epoch": 2.3206721265179326, "grad_norm": 2.7970784709338323, "learning_rate": 2.5710684139604645e-06, "loss": 0.1536, "step": 16435 }, { "epoch": 2.3208133295679185, "grad_norm": 4.033082085654044, "learning_rate": 2.5700480162253748e-06, "loss": 0.1957, "step": 16436 }, { "epoch": 2.3209545326179044, "grad_norm": 3.162844010322644, "learning_rate": 2.56902779115916e-06, "loss": 0.1314, "step": 16437 }, { "epoch": 2.3210957356678903, "grad_norm": 2.983024541893772, "learning_rate": 2.568007738785533e-06, "loss": 0.1517, "step": 16438 }, { "epoch": 2.321236938717876, "grad_norm": 2.380366618519906, "learning_rate": 2.5669878591281928e-06, "loss": 0.1101, "step": 16439 }, { "epoch": 2.321378141767862, "grad_norm": 3.225538374023024, "learning_rate": 2.5659681522108428e-06, "loss": 0.1566, "step": 16440 }, { "epoch": 2.321519344817848, "grad_norm": 3.518298506648807, "learning_rate": 2.5649486180571846e-06, "loss": 0.1487, "step": 16441 }, { "epoch": 2.321660547867834, "grad_norm": 3.3667398050820747, "learning_rate": 2.5639292566909103e-06, "loss": 0.1437, "step": 16442 }, { "epoch": 2.3218017509178197, "grad_norm": 2.8638485510038825, "learning_rate": 2.562910068135709e-06, "loss": 0.1623, "step": 16443 }, { "epoch": 2.3219429539678056, "grad_norm": 2.6500912379172767, "learning_rate": 2.5618910524152652e-06, "loss": 0.1308, "step": 16444 }, { "epoch": 2.3220841570177915, "grad_norm": 3.7097068185901145, "learning_rate": 2.56087220955326e-06, "loss": 0.196, "step": 16445 }, { "epoch": 2.3222253600677774, "grad_norm": 2.8254862257494016, "learning_rate": 2.5598535395733735e-06, "loss": 0.1247, "step": 16446 }, { "epoch": 2.3223665631177632, "grad_norm": 2.79060646066435, "learning_rate": 2.558835042499277e-06, "loss": 0.1237, "step": 16447 }, { "epoch": 2.322507766167749, "grad_norm": 3.241697220954079, "learning_rate": 2.557816718354641e-06, "loss": 0.1532, "step": 16448 }, { "epoch": 2.322648969217735, "grad_norm": 4.078071514505492, "learning_rate": 2.5567985671631303e-06, "loss": 0.1734, "step": 16449 }, { "epoch": 2.322790172267721, "grad_norm": 3.0657060507610114, "learning_rate": 2.5557805889484055e-06, "loss": 0.1332, "step": 16450 }, { "epoch": 2.3229313753177068, "grad_norm": 2.6627494299789003, "learning_rate": 2.554762783734126e-06, "loss": 0.1168, "step": 16451 }, { "epoch": 2.3230725783676927, "grad_norm": 3.107679582905108, "learning_rate": 2.5537451515439406e-06, "loss": 0.1409, "step": 16452 }, { "epoch": 2.3232137814176785, "grad_norm": 3.28681615917673, "learning_rate": 2.5527276924015053e-06, "loss": 0.153, "step": 16453 }, { "epoch": 2.3233549844676644, "grad_norm": 3.0292876274254485, "learning_rate": 2.551710406330462e-06, "loss": 0.1707, "step": 16454 }, { "epoch": 2.3234961875176503, "grad_norm": 2.8708502362951087, "learning_rate": 2.5506932933544546e-06, "loss": 0.136, "step": 16455 }, { "epoch": 2.323637390567636, "grad_norm": 2.9434675061479125, "learning_rate": 2.549676353497116e-06, "loss": 0.1298, "step": 16456 }, { "epoch": 2.323778593617622, "grad_norm": 3.2042831825592835, "learning_rate": 2.5486595867820786e-06, "loss": 0.1336, "step": 16457 }, { "epoch": 2.323919796667608, "grad_norm": 3.34863361236074, "learning_rate": 2.547642993232976e-06, "loss": 0.1396, "step": 16458 }, { "epoch": 2.324060999717594, "grad_norm": 2.9851152675092276, "learning_rate": 2.546626572873433e-06, "loss": 0.1528, "step": 16459 }, { "epoch": 2.3242022027675797, "grad_norm": 4.272883973913513, "learning_rate": 2.5456103257270693e-06, "loss": 0.1722, "step": 16460 }, { "epoch": 2.3243434058175656, "grad_norm": 2.96529372617313, "learning_rate": 2.5445942518175017e-06, "loss": 0.1602, "step": 16461 }, { "epoch": 2.3244846088675515, "grad_norm": 3.594112981138169, "learning_rate": 2.5435783511683444e-06, "loss": 0.1755, "step": 16462 }, { "epoch": 2.3246258119175374, "grad_norm": 3.081157238779159, "learning_rate": 2.5425626238032063e-06, "loss": 0.1657, "step": 16463 }, { "epoch": 2.3247670149675232, "grad_norm": 3.2102579707230596, "learning_rate": 2.5415470697456923e-06, "loss": 0.1482, "step": 16464 }, { "epoch": 2.324908218017509, "grad_norm": 2.673610361620119, "learning_rate": 2.540531689019403e-06, "loss": 0.133, "step": 16465 }, { "epoch": 2.325049421067495, "grad_norm": 3.1159168194896023, "learning_rate": 2.5395164816479357e-06, "loss": 0.1396, "step": 16466 }, { "epoch": 2.325190624117481, "grad_norm": 3.0107994753628664, "learning_rate": 2.538501447654883e-06, "loss": 0.1868, "step": 16467 }, { "epoch": 2.325331827167467, "grad_norm": 3.1920579968288916, "learning_rate": 2.5374865870638354e-06, "loss": 0.1687, "step": 16468 }, { "epoch": 2.3254730302174527, "grad_norm": 3.387583164670588, "learning_rate": 2.536471899898373e-06, "loss": 0.1791, "step": 16469 }, { "epoch": 2.3256142332674385, "grad_norm": 2.751476675097827, "learning_rate": 2.535457386182083e-06, "loss": 0.1203, "step": 16470 }, { "epoch": 2.3257554363174244, "grad_norm": 3.091689074845375, "learning_rate": 2.5344430459385405e-06, "loss": 0.1725, "step": 16471 }, { "epoch": 2.3258966393674103, "grad_norm": 3.6892545230713383, "learning_rate": 2.533428879191321e-06, "loss": 0.1524, "step": 16472 }, { "epoch": 2.326037842417396, "grad_norm": 3.6783665610010967, "learning_rate": 2.532414885963985e-06, "loss": 0.1609, "step": 16473 }, { "epoch": 2.326179045467382, "grad_norm": 3.837406288610304, "learning_rate": 2.5314010662801e-06, "loss": 0.1754, "step": 16474 }, { "epoch": 2.326320248517368, "grad_norm": 3.092336931697847, "learning_rate": 2.5303874201632318e-06, "loss": 0.1229, "step": 16475 }, { "epoch": 2.326461451567354, "grad_norm": 2.9974326411848833, "learning_rate": 2.529373947636934e-06, "loss": 0.1337, "step": 16476 }, { "epoch": 2.3266026546173397, "grad_norm": 3.457607005198991, "learning_rate": 2.5283606487247593e-06, "loss": 0.1454, "step": 16477 }, { "epoch": 2.3267438576673256, "grad_norm": 2.8847055228465246, "learning_rate": 2.5273475234502565e-06, "loss": 0.1266, "step": 16478 }, { "epoch": 2.3268850607173115, "grad_norm": 2.5110811497891703, "learning_rate": 2.52633457183697e-06, "loss": 0.1392, "step": 16479 }, { "epoch": 2.3270262637672974, "grad_norm": 3.8968157889469235, "learning_rate": 2.5253217939084407e-06, "loss": 0.2032, "step": 16480 }, { "epoch": 2.3271674668172833, "grad_norm": 2.351836597252122, "learning_rate": 2.5243091896882044e-06, "loss": 0.1291, "step": 16481 }, { "epoch": 2.327308669867269, "grad_norm": 3.4933974672248334, "learning_rate": 2.5232967591997946e-06, "loss": 0.1727, "step": 16482 }, { "epoch": 2.327449872917255, "grad_norm": 2.9178322037775355, "learning_rate": 2.5222845024667387e-06, "loss": 0.1246, "step": 16483 }, { "epoch": 2.327591075967241, "grad_norm": 2.859289921747814, "learning_rate": 2.5212724195125616e-06, "loss": 0.127, "step": 16484 }, { "epoch": 2.327732279017227, "grad_norm": 2.882985425477126, "learning_rate": 2.5202605103607835e-06, "loss": 0.1214, "step": 16485 }, { "epoch": 2.3278734820672127, "grad_norm": 2.6793884444513014, "learning_rate": 2.519248775034918e-06, "loss": 0.1402, "step": 16486 }, { "epoch": 2.3280146851171986, "grad_norm": 2.911587990592806, "learning_rate": 2.5182372135584845e-06, "loss": 0.1396, "step": 16487 }, { "epoch": 2.3281558881671844, "grad_norm": 2.69294614063943, "learning_rate": 2.5172258259549854e-06, "loss": 0.1456, "step": 16488 }, { "epoch": 2.3282970912171703, "grad_norm": 3.0806633235901044, "learning_rate": 2.51621461224793e-06, "loss": 0.1615, "step": 16489 }, { "epoch": 2.328438294267156, "grad_norm": 3.289429309932711, "learning_rate": 2.5152035724608117e-06, "loss": 0.1418, "step": 16490 }, { "epoch": 2.328579497317142, "grad_norm": 2.959839271503791, "learning_rate": 2.514192706617128e-06, "loss": 0.1293, "step": 16491 }, { "epoch": 2.328720700367128, "grad_norm": 2.7496895741671237, "learning_rate": 2.513182014740375e-06, "loss": 0.1421, "step": 16492 }, { "epoch": 2.328861903417114, "grad_norm": 3.9452892724020496, "learning_rate": 2.512171496854039e-06, "loss": 0.1837, "step": 16493 }, { "epoch": 2.3290031064670997, "grad_norm": 2.654354611518729, "learning_rate": 2.511161152981604e-06, "loss": 0.1506, "step": 16494 }, { "epoch": 2.3291443095170856, "grad_norm": 3.0328830689270228, "learning_rate": 2.510150983146549e-06, "loss": 0.1308, "step": 16495 }, { "epoch": 2.3292855125670715, "grad_norm": 3.8549278202495048, "learning_rate": 2.5091409873723506e-06, "loss": 0.1786, "step": 16496 }, { "epoch": 2.3294267156170574, "grad_norm": 2.907983529307993, "learning_rate": 2.5081311656824803e-06, "loss": 0.1259, "step": 16497 }, { "epoch": 2.3295679186670433, "grad_norm": 3.1004713950456737, "learning_rate": 2.5071215181004073e-06, "loss": 0.1702, "step": 16498 }, { "epoch": 2.329709121717029, "grad_norm": 3.4572335442927677, "learning_rate": 2.5061120446495935e-06, "loss": 0.1421, "step": 16499 }, { "epoch": 2.329850324767015, "grad_norm": 2.534071190538496, "learning_rate": 2.505102745353499e-06, "loss": 0.0994, "step": 16500 }, { "epoch": 2.329991527817001, "grad_norm": 3.021538377141395, "learning_rate": 2.5040936202355802e-06, "loss": 0.1567, "step": 16501 }, { "epoch": 2.330132730866987, "grad_norm": 3.8478691041602144, "learning_rate": 2.503084669319289e-06, "loss": 0.165, "step": 16502 }, { "epoch": 2.3302739339169727, "grad_norm": 3.3804615949282666, "learning_rate": 2.50207589262807e-06, "loss": 0.1078, "step": 16503 }, { "epoch": 2.3304151369669586, "grad_norm": 2.9548918299353555, "learning_rate": 2.5010672901853704e-06, "loss": 0.1657, "step": 16504 }, { "epoch": 2.3305563400169444, "grad_norm": 2.736421115640989, "learning_rate": 2.5000588620146326e-06, "loss": 0.1236, "step": 16505 }, { "epoch": 2.3306975430669303, "grad_norm": 3.6564320649823805, "learning_rate": 2.499050608139284e-06, "loss": 0.1718, "step": 16506 }, { "epoch": 2.330838746116916, "grad_norm": 3.0949297295631504, "learning_rate": 2.4980425285827614e-06, "loss": 0.1457, "step": 16507 }, { "epoch": 2.330979949166902, "grad_norm": 4.118557755510834, "learning_rate": 2.4970346233684863e-06, "loss": 0.1803, "step": 16508 }, { "epoch": 2.331121152216888, "grad_norm": 3.050454004761832, "learning_rate": 2.4960268925198894e-06, "loss": 0.1441, "step": 16509 }, { "epoch": 2.331262355266874, "grad_norm": 2.8739019298655193, "learning_rate": 2.4950193360603868e-06, "loss": 0.1612, "step": 16510 }, { "epoch": 2.3314035583168597, "grad_norm": 2.8547235004220872, "learning_rate": 2.4940119540133943e-06, "loss": 0.1349, "step": 16511 }, { "epoch": 2.3315447613668456, "grad_norm": 2.5900418194657155, "learning_rate": 2.493004746402322e-06, "loss": 0.087, "step": 16512 }, { "epoch": 2.3316859644168315, "grad_norm": 2.8248535712875356, "learning_rate": 2.491997713250577e-06, "loss": 0.1206, "step": 16513 }, { "epoch": 2.3318271674668174, "grad_norm": 3.1426631052514953, "learning_rate": 2.490990854581563e-06, "loss": 0.1478, "step": 16514 }, { "epoch": 2.3319683705168033, "grad_norm": 3.289241403571953, "learning_rate": 2.489984170418679e-06, "loss": 0.1756, "step": 16515 }, { "epoch": 2.332109573566789, "grad_norm": 3.694447586799479, "learning_rate": 2.488977660785319e-06, "loss": 0.1589, "step": 16516 }, { "epoch": 2.332250776616775, "grad_norm": 3.2179673842811334, "learning_rate": 2.4879713257048743e-06, "loss": 0.1423, "step": 16517 }, { "epoch": 2.332391979666761, "grad_norm": 3.284648967000756, "learning_rate": 2.486965165200733e-06, "loss": 0.1859, "step": 16518 }, { "epoch": 2.332533182716747, "grad_norm": 2.658455622729808, "learning_rate": 2.4859591792962754e-06, "loss": 0.1272, "step": 16519 }, { "epoch": 2.3326743857667327, "grad_norm": 2.9458215247635215, "learning_rate": 2.4849533680148787e-06, "loss": 0.1203, "step": 16520 }, { "epoch": 2.3328155888167186, "grad_norm": 4.041647663048573, "learning_rate": 2.483947731379923e-06, "loss": 0.1815, "step": 16521 }, { "epoch": 2.3329567918667045, "grad_norm": 3.0737443440292047, "learning_rate": 2.4829422694147796e-06, "loss": 0.1423, "step": 16522 }, { "epoch": 2.3330979949166903, "grad_norm": 2.667578138844263, "learning_rate": 2.4819369821428085e-06, "loss": 0.1215, "step": 16523 }, { "epoch": 2.3332391979666762, "grad_norm": 3.80941077009797, "learning_rate": 2.480931869587375e-06, "loss": 0.1935, "step": 16524 }, { "epoch": 2.333380401016662, "grad_norm": 3.529374039841733, "learning_rate": 2.4799269317718377e-06, "loss": 0.1684, "step": 16525 }, { "epoch": 2.333521604066648, "grad_norm": 2.873313247022884, "learning_rate": 2.4789221687195473e-06, "loss": 0.167, "step": 16526 }, { "epoch": 2.333662807116634, "grad_norm": 3.487601642856519, "learning_rate": 2.4779175804538613e-06, "loss": 0.1686, "step": 16527 }, { "epoch": 2.3338040101666198, "grad_norm": 2.6357737909256738, "learning_rate": 2.4769131669981217e-06, "loss": 0.1116, "step": 16528 }, { "epoch": 2.3339452132166056, "grad_norm": 3.827574220568226, "learning_rate": 2.475908928375671e-06, "loss": 0.1844, "step": 16529 }, { "epoch": 2.3340864162665915, "grad_norm": 3.2102102557773753, "learning_rate": 2.4749048646098486e-06, "loss": 0.1495, "step": 16530 }, { "epoch": 2.3342276193165774, "grad_norm": 3.4073848972601106, "learning_rate": 2.4739009757239853e-06, "loss": 0.1685, "step": 16531 }, { "epoch": 2.3343688223665633, "grad_norm": 4.186464274163126, "learning_rate": 2.472897261741415e-06, "loss": 0.1876, "step": 16532 }, { "epoch": 2.334510025416549, "grad_norm": 3.6508384041255906, "learning_rate": 2.47189372268546e-06, "loss": 0.1527, "step": 16533 }, { "epoch": 2.334651228466535, "grad_norm": 2.869233319162469, "learning_rate": 2.470890358579444e-06, "loss": 0.1179, "step": 16534 }, { "epoch": 2.334792431516521, "grad_norm": 4.240485557982178, "learning_rate": 2.469887169446685e-06, "loss": 0.2037, "step": 16535 }, { "epoch": 2.334933634566507, "grad_norm": 3.3180778477754758, "learning_rate": 2.468884155310497e-06, "loss": 0.1422, "step": 16536 }, { "epoch": 2.3350748376164927, "grad_norm": 3.916534792935674, "learning_rate": 2.4678813161941883e-06, "loss": 0.1948, "step": 16537 }, { "epoch": 2.3352160406664786, "grad_norm": 3.2030305782821324, "learning_rate": 2.466878652121061e-06, "loss": 0.1434, "step": 16538 }, { "epoch": 2.335357243716464, "grad_norm": 2.5607439757271755, "learning_rate": 2.465876163114427e-06, "loss": 0.1315, "step": 16539 }, { "epoch": 2.33549844676645, "grad_norm": 3.343455180028635, "learning_rate": 2.4648738491975745e-06, "loss": 0.1568, "step": 16540 }, { "epoch": 2.335639649816436, "grad_norm": 3.1983977486863475, "learning_rate": 2.463871710393799e-06, "loss": 0.1524, "step": 16541 }, { "epoch": 2.3357808528664217, "grad_norm": 2.989297112298677, "learning_rate": 2.4628697467263916e-06, "loss": 0.1437, "step": 16542 }, { "epoch": 2.3359220559164076, "grad_norm": 3.3647711591525056, "learning_rate": 2.461867958218632e-06, "loss": 0.1628, "step": 16543 }, { "epoch": 2.3360632589663934, "grad_norm": 3.0044997569299166, "learning_rate": 2.46086634489381e-06, "loss": 0.1535, "step": 16544 }, { "epoch": 2.3362044620163793, "grad_norm": 3.2693348997836735, "learning_rate": 2.459864906775197e-06, "loss": 0.2016, "step": 16545 }, { "epoch": 2.336345665066365, "grad_norm": 2.981277535063776, "learning_rate": 2.458863643886067e-06, "loss": 0.1456, "step": 16546 }, { "epoch": 2.336486868116351, "grad_norm": 3.3900866101662372, "learning_rate": 2.4578625562496896e-06, "loss": 0.1555, "step": 16547 }, { "epoch": 2.336628071166337, "grad_norm": 3.4991319543809443, "learning_rate": 2.4568616438893287e-06, "loss": 0.1643, "step": 16548 }, { "epoch": 2.336769274216323, "grad_norm": 3.4018544580317003, "learning_rate": 2.455860906828247e-06, "loss": 0.1747, "step": 16549 }, { "epoch": 2.3369104772663087, "grad_norm": 2.9110265415148864, "learning_rate": 2.454860345089698e-06, "loss": 0.1514, "step": 16550 }, { "epoch": 2.3370516803162946, "grad_norm": 3.1134507919253096, "learning_rate": 2.4538599586969367e-06, "loss": 0.1831, "step": 16551 }, { "epoch": 2.3371928833662805, "grad_norm": 3.1770611669860194, "learning_rate": 2.452859747673212e-06, "loss": 0.1674, "step": 16552 }, { "epoch": 2.3373340864162664, "grad_norm": 3.1066391961167534, "learning_rate": 2.4518597120417665e-06, "loss": 0.166, "step": 16553 }, { "epoch": 2.3374752894662523, "grad_norm": 4.481824898006849, "learning_rate": 2.450859851825842e-06, "loss": 0.2016, "step": 16554 }, { "epoch": 2.337616492516238, "grad_norm": 2.829831441655758, "learning_rate": 2.449860167048671e-06, "loss": 0.1297, "step": 16555 }, { "epoch": 2.337757695566224, "grad_norm": 3.188729150078145, "learning_rate": 2.448860657733495e-06, "loss": 0.1734, "step": 16556 }, { "epoch": 2.33789889861621, "grad_norm": 3.593987130151595, "learning_rate": 2.4478613239035333e-06, "loss": 0.1492, "step": 16557 }, { "epoch": 2.338040101666196, "grad_norm": 3.217982043014439, "learning_rate": 2.4468621655820125e-06, "loss": 0.1315, "step": 16558 }, { "epoch": 2.3381813047161817, "grad_norm": 3.320377481625558, "learning_rate": 2.4458631827921543e-06, "loss": 0.1249, "step": 16559 }, { "epoch": 2.3383225077661676, "grad_norm": 3.8329020272087906, "learning_rate": 2.4448643755571687e-06, "loss": 0.1982, "step": 16560 }, { "epoch": 2.3384637108161535, "grad_norm": 4.039504830310931, "learning_rate": 2.4438657439002765e-06, "loss": 0.1842, "step": 16561 }, { "epoch": 2.3386049138661393, "grad_norm": 3.5339873136152304, "learning_rate": 2.4428672878446803e-06, "loss": 0.1757, "step": 16562 }, { "epoch": 2.338746116916125, "grad_norm": 3.166911841498467, "learning_rate": 2.441869007413585e-06, "loss": 0.1484, "step": 16563 }, { "epoch": 2.338887319966111, "grad_norm": 3.439817063640929, "learning_rate": 2.4408709026301903e-06, "loss": 0.1261, "step": 16564 }, { "epoch": 2.339028523016097, "grad_norm": 3.078681715728318, "learning_rate": 2.4398729735176907e-06, "loss": 0.1652, "step": 16565 }, { "epoch": 2.339169726066083, "grad_norm": 3.06932197485248, "learning_rate": 2.438875220099278e-06, "loss": 0.1739, "step": 16566 }, { "epoch": 2.3393109291160687, "grad_norm": 4.662164107228458, "learning_rate": 2.4378776423981398e-06, "loss": 0.1685, "step": 16567 }, { "epoch": 2.3394521321660546, "grad_norm": 2.92490531564976, "learning_rate": 2.43688024043746e-06, "loss": 0.1399, "step": 16568 }, { "epoch": 2.3395933352160405, "grad_norm": 3.500153577893397, "learning_rate": 2.4358830142404166e-06, "loss": 0.1716, "step": 16569 }, { "epoch": 2.3397345382660264, "grad_norm": 2.9972560004876425, "learning_rate": 2.4348859638301857e-06, "loss": 0.1281, "step": 16570 }, { "epoch": 2.3398757413160123, "grad_norm": 4.310056007915561, "learning_rate": 2.4338890892299383e-06, "loss": 0.1762, "step": 16571 }, { "epoch": 2.340016944365998, "grad_norm": 3.098136607355675, "learning_rate": 2.4328923904628376e-06, "loss": 0.156, "step": 16572 }, { "epoch": 2.340158147415984, "grad_norm": 3.3087486635333203, "learning_rate": 2.4318958675520553e-06, "loss": 0.1322, "step": 16573 }, { "epoch": 2.34029935046597, "grad_norm": 3.3678099704468707, "learning_rate": 2.430899520520741e-06, "loss": 0.1726, "step": 16574 }, { "epoch": 2.340440553515956, "grad_norm": 3.4524527610445, "learning_rate": 2.4299033493920543e-06, "loss": 0.1554, "step": 16575 }, { "epoch": 2.3405817565659417, "grad_norm": 3.602610267448821, "learning_rate": 2.428907354189144e-06, "loss": 0.1729, "step": 16576 }, { "epoch": 2.3407229596159276, "grad_norm": 3.604474011203195, "learning_rate": 2.4279115349351546e-06, "loss": 0.1954, "step": 16577 }, { "epoch": 2.3408641626659135, "grad_norm": 3.7291353000982945, "learning_rate": 2.4269158916532332e-06, "loss": 0.1755, "step": 16578 }, { "epoch": 2.3410053657158993, "grad_norm": 3.649894958722056, "learning_rate": 2.4259204243665157e-06, "loss": 0.2109, "step": 16579 }, { "epoch": 2.3411465687658852, "grad_norm": 2.7950855341051213, "learning_rate": 2.424925133098137e-06, "loss": 0.1157, "step": 16580 }, { "epoch": 2.341287771815871, "grad_norm": 3.0345121900505707, "learning_rate": 2.4239300178712265e-06, "loss": 0.1792, "step": 16581 }, { "epoch": 2.341428974865857, "grad_norm": 3.03045270370139, "learning_rate": 2.422935078708911e-06, "loss": 0.1588, "step": 16582 }, { "epoch": 2.341570177915843, "grad_norm": 2.832531245571322, "learning_rate": 2.4219403156343123e-06, "loss": 0.1662, "step": 16583 }, { "epoch": 2.3417113809658288, "grad_norm": 2.9034395610127453, "learning_rate": 2.4209457286705475e-06, "loss": 0.1434, "step": 16584 }, { "epoch": 2.3418525840158146, "grad_norm": 3.448512474951474, "learning_rate": 2.4199513178407306e-06, "loss": 0.1993, "step": 16585 }, { "epoch": 2.3419937870658005, "grad_norm": 3.803943184354228, "learning_rate": 2.418957083167972e-06, "loss": 0.152, "step": 16586 }, { "epoch": 2.3421349901157864, "grad_norm": 2.565057292524917, "learning_rate": 2.417963024675376e-06, "loss": 0.114, "step": 16587 }, { "epoch": 2.3422761931657723, "grad_norm": 3.9042882144797035, "learning_rate": 2.4169691423860454e-06, "loss": 0.1759, "step": 16588 }, { "epoch": 2.342417396215758, "grad_norm": 2.8044060496096983, "learning_rate": 2.4159754363230745e-06, "loss": 0.1352, "step": 16589 }, { "epoch": 2.342558599265744, "grad_norm": 3.6356439952734116, "learning_rate": 2.414981906509565e-06, "loss": 0.2297, "step": 16590 }, { "epoch": 2.34269980231573, "grad_norm": 3.473421090058503, "learning_rate": 2.413988552968597e-06, "loss": 0.1814, "step": 16591 }, { "epoch": 2.342841005365716, "grad_norm": 3.74932791425919, "learning_rate": 2.4129953757232584e-06, "loss": 0.1659, "step": 16592 }, { "epoch": 2.3429822084157017, "grad_norm": 3.3188668468929894, "learning_rate": 2.4120023747966314e-06, "loss": 0.1607, "step": 16593 }, { "epoch": 2.3431234114656876, "grad_norm": 2.5480852223318604, "learning_rate": 2.411009550211789e-06, "loss": 0.1373, "step": 16594 }, { "epoch": 2.3432646145156735, "grad_norm": 3.5204785045908005, "learning_rate": 2.4100169019918095e-06, "loss": 0.1769, "step": 16595 }, { "epoch": 2.3434058175656594, "grad_norm": 3.175146724564063, "learning_rate": 2.409024430159761e-06, "loss": 0.1264, "step": 16596 }, { "epoch": 2.3435470206156452, "grad_norm": 3.294022770184792, "learning_rate": 2.4080321347387046e-06, "loss": 0.1724, "step": 16597 }, { "epoch": 2.343688223665631, "grad_norm": 3.222489863851626, "learning_rate": 2.4070400157517036e-06, "loss": 0.1696, "step": 16598 }, { "epoch": 2.343829426715617, "grad_norm": 2.9726430405008135, "learning_rate": 2.406048073221814e-06, "loss": 0.1166, "step": 16599 }, { "epoch": 2.343970629765603, "grad_norm": 3.3208264696955636, "learning_rate": 2.4050563071720867e-06, "loss": 0.1321, "step": 16600 }, { "epoch": 2.3441118328155888, "grad_norm": 3.4581316712527945, "learning_rate": 2.4040647176255717e-06, "loss": 0.1581, "step": 16601 }, { "epoch": 2.3442530358655747, "grad_norm": 4.008794991202921, "learning_rate": 2.403073304605311e-06, "loss": 0.2211, "step": 16602 }, { "epoch": 2.3443942389155605, "grad_norm": 3.0608931162305844, "learning_rate": 2.402082068134347e-06, "loss": 0.1214, "step": 16603 }, { "epoch": 2.3445354419655464, "grad_norm": 2.3348591645265397, "learning_rate": 2.401091008235714e-06, "loss": 0.1306, "step": 16604 }, { "epoch": 2.3446766450155323, "grad_norm": 3.347525126758426, "learning_rate": 2.400100124932444e-06, "loss": 0.1892, "step": 16605 }, { "epoch": 2.344817848065518, "grad_norm": 2.7269688445431366, "learning_rate": 2.399109418247563e-06, "loss": 0.1224, "step": 16606 }, { "epoch": 2.344959051115504, "grad_norm": 3.1756162212093226, "learning_rate": 2.3981188882041005e-06, "loss": 0.1838, "step": 16607 }, { "epoch": 2.34510025416549, "grad_norm": 3.5462293554996283, "learning_rate": 2.3971285348250705e-06, "loss": 0.1719, "step": 16608 }, { "epoch": 2.345241457215476, "grad_norm": 2.848473369226521, "learning_rate": 2.3961383581334897e-06, "loss": 0.1681, "step": 16609 }, { "epoch": 2.3453826602654617, "grad_norm": 3.1394107245903853, "learning_rate": 2.3951483581523694e-06, "loss": 0.1309, "step": 16610 }, { "epoch": 2.3455238633154476, "grad_norm": 3.26245157482, "learning_rate": 2.3941585349047146e-06, "loss": 0.1645, "step": 16611 }, { "epoch": 2.3456650663654335, "grad_norm": 3.1654875368667583, "learning_rate": 2.393168888413533e-06, "loss": 0.1532, "step": 16612 }, { "epoch": 2.3458062694154194, "grad_norm": 3.941693657191256, "learning_rate": 2.392179418701822e-06, "loss": 0.2157, "step": 16613 }, { "epoch": 2.3459474724654052, "grad_norm": 3.86797988631536, "learning_rate": 2.3911901257925752e-06, "loss": 0.2009, "step": 16614 }, { "epoch": 2.346088675515391, "grad_norm": 3.1862774778825966, "learning_rate": 2.3902010097087834e-06, "loss": 0.1885, "step": 16615 }, { "epoch": 2.346229878565377, "grad_norm": 3.2327824101503584, "learning_rate": 2.389212070473438e-06, "loss": 0.1674, "step": 16616 }, { "epoch": 2.346371081615363, "grad_norm": 3.7925712040934125, "learning_rate": 2.388223308109511e-06, "loss": 0.2214, "step": 16617 }, { "epoch": 2.346512284665349, "grad_norm": 3.4998147652875464, "learning_rate": 2.3872347226399895e-06, "loss": 0.1864, "step": 16618 }, { "epoch": 2.3466534877153347, "grad_norm": 2.7499813477225166, "learning_rate": 2.3862463140878443e-06, "loss": 0.162, "step": 16619 }, { "epoch": 2.3467946907653205, "grad_norm": 2.5572899571543988, "learning_rate": 2.3852580824760487e-06, "loss": 0.1223, "step": 16620 }, { "epoch": 2.3469358938153064, "grad_norm": 3.4136451492428357, "learning_rate": 2.3842700278275656e-06, "loss": 0.1202, "step": 16621 }, { "epoch": 2.3470770968652923, "grad_norm": 2.503281566434876, "learning_rate": 2.383282150165358e-06, "loss": 0.1417, "step": 16622 }, { "epoch": 2.347218299915278, "grad_norm": 3.15305953250733, "learning_rate": 2.3822944495123814e-06, "loss": 0.154, "step": 16623 }, { "epoch": 2.347359502965264, "grad_norm": 3.066299530875791, "learning_rate": 2.381306925891598e-06, "loss": 0.1457, "step": 16624 }, { "epoch": 2.34750070601525, "grad_norm": 2.6453775273740763, "learning_rate": 2.380319579325947e-06, "loss": 0.1576, "step": 16625 }, { "epoch": 2.347641909065236, "grad_norm": 3.4606002356100527, "learning_rate": 2.3793324098383796e-06, "loss": 0.1887, "step": 16626 }, { "epoch": 2.3477831121152217, "grad_norm": 3.5277398877317303, "learning_rate": 2.3783454174518362e-06, "loss": 0.1885, "step": 16627 }, { "epoch": 2.3479243151652076, "grad_norm": 2.888798441463067, "learning_rate": 2.3773586021892524e-06, "loss": 0.1173, "step": 16628 }, { "epoch": 2.3480655182151935, "grad_norm": 2.8162296064778802, "learning_rate": 2.3763719640735603e-06, "loss": 0.1182, "step": 16629 }, { "epoch": 2.3482067212651794, "grad_norm": 2.8841728080694145, "learning_rate": 2.375385503127695e-06, "loss": 0.1309, "step": 16630 }, { "epoch": 2.3483479243151653, "grad_norm": 3.3802075816301445, "learning_rate": 2.3743992193745778e-06, "loss": 0.1724, "step": 16631 }, { "epoch": 2.348489127365151, "grad_norm": 2.687398088693144, "learning_rate": 2.3734131128371283e-06, "loss": 0.1497, "step": 16632 }, { "epoch": 2.348630330415137, "grad_norm": 4.636465219397901, "learning_rate": 2.3724271835382682e-06, "loss": 0.2241, "step": 16633 }, { "epoch": 2.348771533465123, "grad_norm": 3.8390331457788838, "learning_rate": 2.3714414315009005e-06, "loss": 0.1323, "step": 16634 }, { "epoch": 2.348912736515109, "grad_norm": 2.5822990595413744, "learning_rate": 2.370455856747942e-06, "loss": 0.1411, "step": 16635 }, { "epoch": 2.3490539395650947, "grad_norm": 3.463292827111964, "learning_rate": 2.3694704593022942e-06, "loss": 0.1933, "step": 16636 }, { "epoch": 2.3491951426150806, "grad_norm": 3.6340970982519853, "learning_rate": 2.3684852391868586e-06, "loss": 0.1706, "step": 16637 }, { "epoch": 2.3493363456650664, "grad_norm": 3.560038650204616, "learning_rate": 2.367500196424529e-06, "loss": 0.2171, "step": 16638 }, { "epoch": 2.3494775487150523, "grad_norm": 2.905858217558989, "learning_rate": 2.3665153310381994e-06, "loss": 0.1538, "step": 16639 }, { "epoch": 2.349618751765038, "grad_norm": 2.512841239871109, "learning_rate": 2.3655306430507563e-06, "loss": 0.1452, "step": 16640 }, { "epoch": 2.349759954815024, "grad_norm": 2.7191356279700205, "learning_rate": 2.364546132485085e-06, "loss": 0.1075, "step": 16641 }, { "epoch": 2.34990115786501, "grad_norm": 2.518868744419248, "learning_rate": 2.3635617993640625e-06, "loss": 0.1092, "step": 16642 }, { "epoch": 2.350042360914996, "grad_norm": 3.4741598068897317, "learning_rate": 2.3625776437105674e-06, "loss": 0.1809, "step": 16643 }, { "epoch": 2.3501835639649817, "grad_norm": 3.3994506491930263, "learning_rate": 2.3615936655474693e-06, "loss": 0.1559, "step": 16644 }, { "epoch": 2.3503247670149676, "grad_norm": 3.1450527318004053, "learning_rate": 2.360609864897635e-06, "loss": 0.1611, "step": 16645 }, { "epoch": 2.3504659700649535, "grad_norm": 3.057692533434694, "learning_rate": 2.3596262417839256e-06, "loss": 0.1265, "step": 16646 }, { "epoch": 2.3506071731149394, "grad_norm": 2.570076215118854, "learning_rate": 2.3586427962292046e-06, "loss": 0.1232, "step": 16647 }, { "epoch": 2.3507483761649253, "grad_norm": 3.093022008073182, "learning_rate": 2.357659528256326e-06, "loss": 0.1378, "step": 16648 }, { "epoch": 2.350889579214911, "grad_norm": 3.0584504503702106, "learning_rate": 2.3566764378881392e-06, "loss": 0.1537, "step": 16649 }, { "epoch": 2.351030782264897, "grad_norm": 2.7159109409179063, "learning_rate": 2.3556935251474946e-06, "loss": 0.1337, "step": 16650 }, { "epoch": 2.351171985314883, "grad_norm": 3.12146019371224, "learning_rate": 2.354710790057225e-06, "loss": 0.1319, "step": 16651 }, { "epoch": 2.351313188364869, "grad_norm": 2.975046790948436, "learning_rate": 2.3537282326401777e-06, "loss": 0.1638, "step": 16652 }, { "epoch": 2.3514543914148547, "grad_norm": 2.4660571084825857, "learning_rate": 2.352745852919184e-06, "loss": 0.135, "step": 16653 }, { "epoch": 2.3515955944648406, "grad_norm": 3.3047830967455356, "learning_rate": 2.351763650917074e-06, "loss": 0.1786, "step": 16654 }, { "epoch": 2.3517367975148264, "grad_norm": 3.138636760757721, "learning_rate": 2.350781626656674e-06, "loss": 0.1252, "step": 16655 }, { "epoch": 2.3518780005648123, "grad_norm": 2.685413982261762, "learning_rate": 2.3497997801608054e-06, "loss": 0.1133, "step": 16656 }, { "epoch": 2.352019203614798, "grad_norm": 3.402290601550504, "learning_rate": 2.348818111452287e-06, "loss": 0.153, "step": 16657 }, { "epoch": 2.352160406664784, "grad_norm": 2.4690867644021073, "learning_rate": 2.3478366205539295e-06, "loss": 0.1169, "step": 16658 }, { "epoch": 2.35230160971477, "grad_norm": 4.6218114745324925, "learning_rate": 2.346855307488546e-06, "loss": 0.2045, "step": 16659 }, { "epoch": 2.352442812764756, "grad_norm": 3.61000026408447, "learning_rate": 2.345874172278939e-06, "loss": 0.1093, "step": 16660 }, { "epoch": 2.3525840158147417, "grad_norm": 4.127194737828726, "learning_rate": 2.3448932149479107e-06, "loss": 0.2156, "step": 16661 }, { "epoch": 2.3527252188647276, "grad_norm": 3.082974617770854, "learning_rate": 2.3439124355182584e-06, "loss": 0.1364, "step": 16662 }, { "epoch": 2.3528664219147135, "grad_norm": 3.7364178413406064, "learning_rate": 2.3429318340127717e-06, "loss": 0.1545, "step": 16663 }, { "epoch": 2.3530076249646994, "grad_norm": 3.1461949329827887, "learning_rate": 2.341951410454245e-06, "loss": 0.1696, "step": 16664 }, { "epoch": 2.3531488280146853, "grad_norm": 2.8644655661195304, "learning_rate": 2.340971164865461e-06, "loss": 0.126, "step": 16665 }, { "epoch": 2.353290031064671, "grad_norm": 3.0838767382371137, "learning_rate": 2.3399910972692017e-06, "loss": 0.1733, "step": 16666 }, { "epoch": 2.353431234114657, "grad_norm": 2.9588600779669156, "learning_rate": 2.339011207688239e-06, "loss": 0.1192, "step": 16667 }, { "epoch": 2.353572437164643, "grad_norm": 2.811973582361199, "learning_rate": 2.338031496145343e-06, "loss": 0.1279, "step": 16668 }, { "epoch": 2.353713640214629, "grad_norm": 2.670454618637573, "learning_rate": 2.3370519626632904e-06, "loss": 0.1035, "step": 16669 }, { "epoch": 2.3538548432646147, "grad_norm": 2.617087461016208, "learning_rate": 2.33607260726484e-06, "loss": 0.0981, "step": 16670 }, { "epoch": 2.3539960463146006, "grad_norm": 2.920268041675882, "learning_rate": 2.335093429972752e-06, "loss": 0.1584, "step": 16671 }, { "epoch": 2.3541372493645865, "grad_norm": 3.2378570906990354, "learning_rate": 2.334114430809784e-06, "loss": 0.1071, "step": 16672 }, { "epoch": 2.3542784524145723, "grad_norm": 2.564055895149629, "learning_rate": 2.3331356097986848e-06, "loss": 0.1503, "step": 16673 }, { "epoch": 2.3544196554645582, "grad_norm": 3.4253511107625405, "learning_rate": 2.3321569669622034e-06, "loss": 0.148, "step": 16674 }, { "epoch": 2.354560858514544, "grad_norm": 2.8541535218961873, "learning_rate": 2.3311785023230825e-06, "loss": 0.1312, "step": 16675 }, { "epoch": 2.35470206156453, "grad_norm": 3.51530082540038, "learning_rate": 2.3302002159040613e-06, "loss": 0.1446, "step": 16676 }, { "epoch": 2.354843264614516, "grad_norm": 3.2601604902415198, "learning_rate": 2.3292221077278744e-06, "loss": 0.153, "step": 16677 }, { "epoch": 2.3549844676645018, "grad_norm": 2.869569567007493, "learning_rate": 2.328244177817254e-06, "loss": 0.1369, "step": 16678 }, { "epoch": 2.3551256707144876, "grad_norm": 3.263088299865168, "learning_rate": 2.3272664261949253e-06, "loss": 0.1475, "step": 16679 }, { "epoch": 2.3552668737644735, "grad_norm": 2.5873052417090174, "learning_rate": 2.326288852883607e-06, "loss": 0.1205, "step": 16680 }, { "epoch": 2.3554080768144594, "grad_norm": 3.1899124304130146, "learning_rate": 2.3253114579060266e-06, "loss": 0.1477, "step": 16681 }, { "epoch": 2.3555492798644453, "grad_norm": 3.274804817949863, "learning_rate": 2.3243342412848923e-06, "loss": 0.1682, "step": 16682 }, { "epoch": 2.355690482914431, "grad_norm": 3.0463938556398413, "learning_rate": 2.3233572030429187e-06, "loss": 0.151, "step": 16683 }, { "epoch": 2.355831685964417, "grad_norm": 3.062068376514406, "learning_rate": 2.3223803432028046e-06, "loss": 0.1366, "step": 16684 }, { "epoch": 2.355972889014403, "grad_norm": 3.765162928821967, "learning_rate": 2.3214036617872536e-06, "loss": 0.1624, "step": 16685 }, { "epoch": 2.356114092064389, "grad_norm": 4.36960310945819, "learning_rate": 2.3204271588189685e-06, "loss": 0.2004, "step": 16686 }, { "epoch": 2.3562552951143747, "grad_norm": 2.8512451549550377, "learning_rate": 2.319450834320639e-06, "loss": 0.1189, "step": 16687 }, { "epoch": 2.3563964981643606, "grad_norm": 3.4423166055682928, "learning_rate": 2.3184746883149556e-06, "loss": 0.1642, "step": 16688 }, { "epoch": 2.3565377012143465, "grad_norm": 3.217575098854573, "learning_rate": 2.3174987208246038e-06, "loss": 0.141, "step": 16689 }, { "epoch": 2.3566789042643324, "grad_norm": 3.1845278937044843, "learning_rate": 2.3165229318722627e-06, "loss": 0.1622, "step": 16690 }, { "epoch": 2.3568201073143182, "grad_norm": 3.8978800224577665, "learning_rate": 2.31554732148061e-06, "loss": 0.1833, "step": 16691 }, { "epoch": 2.356961310364304, "grad_norm": 3.2537622388035787, "learning_rate": 2.3145718896723204e-06, "loss": 0.1585, "step": 16692 }, { "epoch": 2.3571025134142896, "grad_norm": 3.3961136143053783, "learning_rate": 2.3135966364700603e-06, "loss": 0.1479, "step": 16693 }, { "epoch": 2.3572437164642754, "grad_norm": 2.964405352577542, "learning_rate": 2.312621561896494e-06, "loss": 0.1324, "step": 16694 }, { "epoch": 2.3573849195142613, "grad_norm": 2.9759674844534567, "learning_rate": 2.311646665974284e-06, "loss": 0.1515, "step": 16695 }, { "epoch": 2.357526122564247, "grad_norm": 3.6417743798910207, "learning_rate": 2.3106719487260843e-06, "loss": 0.1434, "step": 16696 }, { "epoch": 2.357667325614233, "grad_norm": 3.167411292920211, "learning_rate": 2.309697410174545e-06, "loss": 0.1211, "step": 16697 }, { "epoch": 2.357808528664219, "grad_norm": 3.6106210681205795, "learning_rate": 2.3087230503423207e-06, "loss": 0.1581, "step": 16698 }, { "epoch": 2.357949731714205, "grad_norm": 3.690799445797164, "learning_rate": 2.30774886925205e-06, "loss": 0.1885, "step": 16699 }, { "epoch": 2.3580909347641907, "grad_norm": 3.4603217483896413, "learning_rate": 2.306774866926377e-06, "loss": 0.1823, "step": 16700 }, { "epoch": 2.3582321378141766, "grad_norm": 3.4846246306200603, "learning_rate": 2.3058010433879297e-06, "loss": 0.2054, "step": 16701 }, { "epoch": 2.3583733408641625, "grad_norm": 3.859154502171222, "learning_rate": 2.304827398659342e-06, "loss": 0.1923, "step": 16702 }, { "epoch": 2.3585145439141484, "grad_norm": 3.149821175537311, "learning_rate": 2.303853932763244e-06, "loss": 0.1583, "step": 16703 }, { "epoch": 2.3586557469641343, "grad_norm": 2.9617161232596247, "learning_rate": 2.3028806457222565e-06, "loss": 0.1461, "step": 16704 }, { "epoch": 2.35879695001412, "grad_norm": 3.141985952882657, "learning_rate": 2.3019075375589995e-06, "loss": 0.1529, "step": 16705 }, { "epoch": 2.358938153064106, "grad_norm": 3.566826133937719, "learning_rate": 2.300934608296086e-06, "loss": 0.1577, "step": 16706 }, { "epoch": 2.359079356114092, "grad_norm": 2.9670539565602283, "learning_rate": 2.299961857956128e-06, "loss": 0.1237, "step": 16707 }, { "epoch": 2.359220559164078, "grad_norm": 3.6668071331968215, "learning_rate": 2.2989892865617313e-06, "loss": 0.2091, "step": 16708 }, { "epoch": 2.3593617622140637, "grad_norm": 2.990665666709609, "learning_rate": 2.2980168941354973e-06, "loss": 0.1597, "step": 16709 }, { "epoch": 2.3595029652640496, "grad_norm": 3.34084985189003, "learning_rate": 2.2970446807000237e-06, "loss": 0.1302, "step": 16710 }, { "epoch": 2.3596441683140355, "grad_norm": 2.5365759978189826, "learning_rate": 2.296072646277906e-06, "loss": 0.1275, "step": 16711 }, { "epoch": 2.3597853713640213, "grad_norm": 3.3346695832173427, "learning_rate": 2.2951007908917334e-06, "loss": 0.1555, "step": 16712 }, { "epoch": 2.359926574414007, "grad_norm": 2.9571388246806043, "learning_rate": 2.2941291145640898e-06, "loss": 0.1807, "step": 16713 }, { "epoch": 2.360067777463993, "grad_norm": 3.5764015834354788, "learning_rate": 2.293157617317555e-06, "loss": 0.1868, "step": 16714 }, { "epoch": 2.360208980513979, "grad_norm": 2.764716020827102, "learning_rate": 2.292186299174712e-06, "loss": 0.1561, "step": 16715 }, { "epoch": 2.360350183563965, "grad_norm": 3.1090639997268554, "learning_rate": 2.291215160158131e-06, "loss": 0.1608, "step": 16716 }, { "epoch": 2.3604913866139507, "grad_norm": 3.550344346168288, "learning_rate": 2.2902442002903823e-06, "loss": 0.1564, "step": 16717 }, { "epoch": 2.3606325896639366, "grad_norm": 3.585780853375674, "learning_rate": 2.289273419594027e-06, "loss": 0.1527, "step": 16718 }, { "epoch": 2.3607737927139225, "grad_norm": 3.2535969945152323, "learning_rate": 2.2883028180916265e-06, "loss": 0.1665, "step": 16719 }, { "epoch": 2.3609149957639084, "grad_norm": 2.8620029125672697, "learning_rate": 2.287332395805737e-06, "loss": 0.1128, "step": 16720 }, { "epoch": 2.3610561988138943, "grad_norm": 3.174831221469931, "learning_rate": 2.286362152758913e-06, "loss": 0.1335, "step": 16721 }, { "epoch": 2.36119740186388, "grad_norm": 2.904956965956408, "learning_rate": 2.2853920889737015e-06, "loss": 0.1511, "step": 16722 }, { "epoch": 2.361338604913866, "grad_norm": 2.7158183361761647, "learning_rate": 2.2844222044726463e-06, "loss": 0.1418, "step": 16723 }, { "epoch": 2.361479807963852, "grad_norm": 2.5152687948818757, "learning_rate": 2.2834524992782867e-06, "loss": 0.1568, "step": 16724 }, { "epoch": 2.361621011013838, "grad_norm": 3.5973077838860066, "learning_rate": 2.282482973413159e-06, "loss": 0.1482, "step": 16725 }, { "epoch": 2.3617622140638237, "grad_norm": 2.946663072075912, "learning_rate": 2.281513626899794e-06, "loss": 0.1319, "step": 16726 }, { "epoch": 2.3619034171138096, "grad_norm": 2.914028124592764, "learning_rate": 2.2805444597607195e-06, "loss": 0.111, "step": 16727 }, { "epoch": 2.3620446201637955, "grad_norm": 3.5525910267244405, "learning_rate": 2.279575472018457e-06, "loss": 0.1807, "step": 16728 }, { "epoch": 2.3621858232137813, "grad_norm": 3.045541499727895, "learning_rate": 2.278606663695526e-06, "loss": 0.1495, "step": 16729 }, { "epoch": 2.3623270262637672, "grad_norm": 2.8383180107553527, "learning_rate": 2.2776380348144435e-06, "loss": 0.1415, "step": 16730 }, { "epoch": 2.362468229313753, "grad_norm": 3.9236594155479154, "learning_rate": 2.276669585397716e-06, "loss": 0.1815, "step": 16731 }, { "epoch": 2.362609432363739, "grad_norm": 3.2538822995069636, "learning_rate": 2.2757013154678496e-06, "loss": 0.1688, "step": 16732 }, { "epoch": 2.362750635413725, "grad_norm": 3.1496179962404147, "learning_rate": 2.274733225047352e-06, "loss": 0.1582, "step": 16733 }, { "epoch": 2.3628918384637108, "grad_norm": 4.024662395414331, "learning_rate": 2.2737653141587203e-06, "loss": 0.2048, "step": 16734 }, { "epoch": 2.3630330415136966, "grad_norm": 4.063873570807576, "learning_rate": 2.2727975828244443e-06, "loss": 0.1381, "step": 16735 }, { "epoch": 2.3631742445636825, "grad_norm": 3.1398909984290304, "learning_rate": 2.2718300310670136e-06, "loss": 0.1402, "step": 16736 }, { "epoch": 2.3633154476136684, "grad_norm": 2.598271885989341, "learning_rate": 2.2708626589089146e-06, "loss": 0.1256, "step": 16737 }, { "epoch": 2.3634566506636543, "grad_norm": 2.987888638309345, "learning_rate": 2.26989546637263e-06, "loss": 0.1192, "step": 16738 }, { "epoch": 2.36359785371364, "grad_norm": 3.0807834556379765, "learning_rate": 2.2689284534806376e-06, "loss": 0.132, "step": 16739 }, { "epoch": 2.363739056763626, "grad_norm": 3.4885130524883396, "learning_rate": 2.26796162025541e-06, "loss": 0.158, "step": 16740 }, { "epoch": 2.363880259813612, "grad_norm": 2.862157648787348, "learning_rate": 2.2669949667194137e-06, "loss": 0.1473, "step": 16741 }, { "epoch": 2.364021462863598, "grad_norm": 3.026163312504716, "learning_rate": 2.2660284928951148e-06, "loss": 0.1474, "step": 16742 }, { "epoch": 2.3641626659135837, "grad_norm": 3.265749374698899, "learning_rate": 2.265062198804975e-06, "loss": 0.1433, "step": 16743 }, { "epoch": 2.3643038689635696, "grad_norm": 3.646680306600485, "learning_rate": 2.2640960844714478e-06, "loss": 0.1724, "step": 16744 }, { "epoch": 2.3644450720135555, "grad_norm": 2.3220017617928583, "learning_rate": 2.2631301499169877e-06, "loss": 0.104, "step": 16745 }, { "epoch": 2.3645862750635414, "grad_norm": 3.2126335279731677, "learning_rate": 2.2621643951640413e-06, "loss": 0.1787, "step": 16746 }, { "epoch": 2.3647274781135272, "grad_norm": 3.841370649046859, "learning_rate": 2.261198820235052e-06, "loss": 0.1983, "step": 16747 }, { "epoch": 2.364868681163513, "grad_norm": 3.7768973931581393, "learning_rate": 2.2602334251524617e-06, "loss": 0.1828, "step": 16748 }, { "epoch": 2.365009884213499, "grad_norm": 3.035120788490452, "learning_rate": 2.2592682099386996e-06, "loss": 0.1442, "step": 16749 }, { "epoch": 2.365151087263485, "grad_norm": 3.503043857085711, "learning_rate": 2.258303174616204e-06, "loss": 0.1757, "step": 16750 }, { "epoch": 2.3652922903134708, "grad_norm": 3.1046231248207197, "learning_rate": 2.257338319207403e-06, "loss": 0.1479, "step": 16751 }, { "epoch": 2.3654334933634567, "grad_norm": 3.1146212908005686, "learning_rate": 2.256373643734713e-06, "loss": 0.1555, "step": 16752 }, { "epoch": 2.3655746964134425, "grad_norm": 4.104367679538476, "learning_rate": 2.2554091482205544e-06, "loss": 0.1536, "step": 16753 }, { "epoch": 2.3657158994634284, "grad_norm": 2.7720890744626865, "learning_rate": 2.25444483268734e-06, "loss": 0.121, "step": 16754 }, { "epoch": 2.3658571025134143, "grad_norm": 3.63672139750319, "learning_rate": 2.253480697157486e-06, "loss": 0.1767, "step": 16755 }, { "epoch": 2.3659983055634, "grad_norm": 2.919902228443104, "learning_rate": 2.2525167416533945e-06, "loss": 0.1336, "step": 16756 }, { "epoch": 2.366139508613386, "grad_norm": 3.2348142901196733, "learning_rate": 2.251552966197469e-06, "loss": 0.1369, "step": 16757 }, { "epoch": 2.366280711663372, "grad_norm": 2.6102307933158504, "learning_rate": 2.250589370812105e-06, "loss": 0.1424, "step": 16758 }, { "epoch": 2.366421914713358, "grad_norm": 3.3693386623751906, "learning_rate": 2.2496259555196986e-06, "loss": 0.1581, "step": 16759 }, { "epoch": 2.3665631177633437, "grad_norm": 3.036184750015656, "learning_rate": 2.248662720342637e-06, "loss": 0.1515, "step": 16760 }, { "epoch": 2.3667043208133296, "grad_norm": 3.1529133487863, "learning_rate": 2.247699665303307e-06, "loss": 0.1231, "step": 16761 }, { "epoch": 2.3668455238633155, "grad_norm": 2.900054581045101, "learning_rate": 2.2467367904240877e-06, "loss": 0.1644, "step": 16762 }, { "epoch": 2.3669867269133014, "grad_norm": 3.97893936330357, "learning_rate": 2.2457740957273577e-06, "loss": 0.1844, "step": 16763 }, { "epoch": 2.3671279299632872, "grad_norm": 3.7346162757334413, "learning_rate": 2.2448115812354888e-06, "loss": 0.1956, "step": 16764 }, { "epoch": 2.367269133013273, "grad_norm": 2.540897266463932, "learning_rate": 2.2438492469708494e-06, "loss": 0.1255, "step": 16765 }, { "epoch": 2.367410336063259, "grad_norm": 3.2408841194428137, "learning_rate": 2.2428870929558012e-06, "loss": 0.1804, "step": 16766 }, { "epoch": 2.367551539113245, "grad_norm": 3.0376324982207246, "learning_rate": 2.241925119212709e-06, "loss": 0.1603, "step": 16767 }, { "epoch": 2.367692742163231, "grad_norm": 2.8684779971555803, "learning_rate": 2.2409633257639306e-06, "loss": 0.1168, "step": 16768 }, { "epoch": 2.3678339452132167, "grad_norm": 3.6440323380936688, "learning_rate": 2.2400017126318095e-06, "loss": 0.1508, "step": 16769 }, { "epoch": 2.3679751482632025, "grad_norm": 3.047506574521593, "learning_rate": 2.239040279838699e-06, "loss": 0.1687, "step": 16770 }, { "epoch": 2.3681163513131884, "grad_norm": 2.977736404618285, "learning_rate": 2.2380790274069363e-06, "loss": 0.1498, "step": 16771 }, { "epoch": 2.3682575543631743, "grad_norm": 3.0891657577434475, "learning_rate": 2.2371179553588685e-06, "loss": 0.129, "step": 16772 }, { "epoch": 2.36839875741316, "grad_norm": 3.5975547444760116, "learning_rate": 2.236157063716827e-06, "loss": 0.1489, "step": 16773 }, { "epoch": 2.368539960463146, "grad_norm": 4.175897512158729, "learning_rate": 2.2351963525031428e-06, "loss": 0.1902, "step": 16774 }, { "epoch": 2.368681163513132, "grad_norm": 2.9073938878241234, "learning_rate": 2.2342358217401407e-06, "loss": 0.1523, "step": 16775 }, { "epoch": 2.368822366563118, "grad_norm": 3.3971754903535913, "learning_rate": 2.233275471450146e-06, "loss": 0.1665, "step": 16776 }, { "epoch": 2.3689635696131037, "grad_norm": 3.4085875242012853, "learning_rate": 2.232315301655473e-06, "loss": 0.1489, "step": 16777 }, { "epoch": 2.3691047726630896, "grad_norm": 3.6377655989530173, "learning_rate": 2.231355312378439e-06, "loss": 0.1419, "step": 16778 }, { "epoch": 2.3692459757130755, "grad_norm": 4.126370118529404, "learning_rate": 2.2303955036413528e-06, "loss": 0.2482, "step": 16779 }, { "epoch": 2.3693871787630614, "grad_norm": 3.468834686382979, "learning_rate": 2.229435875466519e-06, "loss": 0.1828, "step": 16780 }, { "epoch": 2.3695283818130473, "grad_norm": 2.3891788840695525, "learning_rate": 2.2284764278762404e-06, "loss": 0.0878, "step": 16781 }, { "epoch": 2.369669584863033, "grad_norm": 3.249383321163135, "learning_rate": 2.2275171608928124e-06, "loss": 0.1283, "step": 16782 }, { "epoch": 2.369810787913019, "grad_norm": 4.421932860761235, "learning_rate": 2.226558074538527e-06, "loss": 0.2027, "step": 16783 }, { "epoch": 2.369951990963005, "grad_norm": 4.163506919692353, "learning_rate": 2.225599168835677e-06, "loss": 0.1811, "step": 16784 }, { "epoch": 2.370093194012991, "grad_norm": 3.279789363133372, "learning_rate": 2.2246404438065483e-06, "loss": 0.1239, "step": 16785 }, { "epoch": 2.3702343970629767, "grad_norm": 3.6170108142916058, "learning_rate": 2.2236818994734144e-06, "loss": 0.1394, "step": 16786 }, { "epoch": 2.3703756001129626, "grad_norm": 3.340104319756753, "learning_rate": 2.2227235358585554e-06, "loss": 0.1714, "step": 16787 }, { "epoch": 2.3705168031629484, "grad_norm": 2.997069832781216, "learning_rate": 2.221765352984239e-06, "loss": 0.1139, "step": 16788 }, { "epoch": 2.3706580062129343, "grad_norm": 4.960897284905732, "learning_rate": 2.22080735087274e-06, "loss": 0.1997, "step": 16789 }, { "epoch": 2.37079920926292, "grad_norm": 3.053743705739603, "learning_rate": 2.2198495295463184e-06, "loss": 0.1167, "step": 16790 }, { "epoch": 2.370940412312906, "grad_norm": 2.566892677107905, "learning_rate": 2.2188918890272335e-06, "loss": 0.1447, "step": 16791 }, { "epoch": 2.371081615362892, "grad_norm": 3.63923234768654, "learning_rate": 2.2179344293377415e-06, "loss": 0.1802, "step": 16792 }, { "epoch": 2.371222818412878, "grad_norm": 3.556732355067024, "learning_rate": 2.2169771505000912e-06, "loss": 0.181, "step": 16793 }, { "epoch": 2.3713640214628637, "grad_norm": 3.319846057301965, "learning_rate": 2.2160200525365326e-06, "loss": 0.1565, "step": 16794 }, { "epoch": 2.3715052245128496, "grad_norm": 3.5740189101594386, "learning_rate": 2.2150631354693054e-06, "loss": 0.2204, "step": 16795 }, { "epoch": 2.3716464275628355, "grad_norm": 3.1018345132765486, "learning_rate": 2.214106399320648e-06, "loss": 0.1334, "step": 16796 }, { "epoch": 2.3717876306128214, "grad_norm": 3.4836925638860876, "learning_rate": 2.2131498441127964e-06, "loss": 0.1388, "step": 16797 }, { "epoch": 2.3719288336628073, "grad_norm": 3.2267371304477495, "learning_rate": 2.2121934698679793e-06, "loss": 0.1912, "step": 16798 }, { "epoch": 2.372070036712793, "grad_norm": 2.9429593615016425, "learning_rate": 2.2112372766084223e-06, "loss": 0.1202, "step": 16799 }, { "epoch": 2.372211239762779, "grad_norm": 3.359542551781249, "learning_rate": 2.2102812643563455e-06, "loss": 0.1412, "step": 16800 }, { "epoch": 2.372352442812765, "grad_norm": 3.1605134811386835, "learning_rate": 2.2093254331339697e-06, "loss": 0.169, "step": 16801 }, { "epoch": 2.372493645862751, "grad_norm": 3.133843874930088, "learning_rate": 2.20836978296351e-06, "loss": 0.1597, "step": 16802 }, { "epoch": 2.3726348489127367, "grad_norm": 3.4449206845367133, "learning_rate": 2.2074143138671677e-06, "loss": 0.1722, "step": 16803 }, { "epoch": 2.3727760519627226, "grad_norm": 3.334969547659957, "learning_rate": 2.2064590258671524e-06, "loss": 0.1463, "step": 16804 }, { "epoch": 2.3729172550127084, "grad_norm": 3.4295660305092928, "learning_rate": 2.205503918985661e-06, "loss": 0.1821, "step": 16805 }, { "epoch": 2.3730584580626943, "grad_norm": 4.44281081225325, "learning_rate": 2.2045489932448937e-06, "loss": 0.1817, "step": 16806 }, { "epoch": 2.37319966111268, "grad_norm": 3.669500104790085, "learning_rate": 2.2035942486670425e-06, "loss": 0.1279, "step": 16807 }, { "epoch": 2.373340864162666, "grad_norm": 2.676594464041409, "learning_rate": 2.2026396852742936e-06, "loss": 0.1233, "step": 16808 }, { "epoch": 2.373482067212652, "grad_norm": 2.7779642848725805, "learning_rate": 2.2016853030888306e-06, "loss": 0.13, "step": 16809 }, { "epoch": 2.373623270262638, "grad_norm": 3.2569654300715705, "learning_rate": 2.2007311021328337e-06, "loss": 0.1637, "step": 16810 }, { "epoch": 2.3737644733126237, "grad_norm": 2.8232553673817233, "learning_rate": 2.199777082428478e-06, "loss": 0.1387, "step": 16811 }, { "epoch": 2.373905676362609, "grad_norm": 3.2533348989648405, "learning_rate": 2.198823243997933e-06, "loss": 0.1589, "step": 16812 }, { "epoch": 2.374046879412595, "grad_norm": 2.971038238114221, "learning_rate": 2.197869586863368e-06, "loss": 0.133, "step": 16813 }, { "epoch": 2.374188082462581, "grad_norm": 2.581745547649538, "learning_rate": 2.196916111046944e-06, "loss": 0.1296, "step": 16814 }, { "epoch": 2.374329285512567, "grad_norm": 3.308605595511561, "learning_rate": 2.195962816570819e-06, "loss": 0.1751, "step": 16815 }, { "epoch": 2.3744704885625527, "grad_norm": 2.495779136728042, "learning_rate": 2.1950097034571495e-06, "loss": 0.1344, "step": 16816 }, { "epoch": 2.3746116916125386, "grad_norm": 3.315600295572005, "learning_rate": 2.1940567717280793e-06, "loss": 0.1443, "step": 16817 }, { "epoch": 2.3747528946625245, "grad_norm": 3.7217681564831127, "learning_rate": 2.193104021405762e-06, "loss": 0.1716, "step": 16818 }, { "epoch": 2.3748940977125104, "grad_norm": 3.1384611504683306, "learning_rate": 2.192151452512339e-06, "loss": 0.1448, "step": 16819 }, { "epoch": 2.3750353007624962, "grad_norm": 2.8030048416374695, "learning_rate": 2.191199065069941e-06, "loss": 0.1627, "step": 16820 }, { "epoch": 2.375176503812482, "grad_norm": 3.4786606488999428, "learning_rate": 2.1902468591007042e-06, "loss": 0.1812, "step": 16821 }, { "epoch": 2.375317706862468, "grad_norm": 3.8978158660890467, "learning_rate": 2.1892948346267583e-06, "loss": 0.2333, "step": 16822 }, { "epoch": 2.375458909912454, "grad_norm": 2.6898938527455134, "learning_rate": 2.1883429916702238e-06, "loss": 0.1304, "step": 16823 }, { "epoch": 2.37560011296244, "grad_norm": 2.7725877506387833, "learning_rate": 2.187391330253228e-06, "loss": 0.1482, "step": 16824 }, { "epoch": 2.3757413160124257, "grad_norm": 3.8028033625614697, "learning_rate": 2.186439850397882e-06, "loss": 0.1503, "step": 16825 }, { "epoch": 2.3758825190624115, "grad_norm": 2.890799461779304, "learning_rate": 2.1854885521263016e-06, "loss": 0.1476, "step": 16826 }, { "epoch": 2.3760237221123974, "grad_norm": 2.624418045570247, "learning_rate": 2.184537435460594e-06, "loss": 0.1226, "step": 16827 }, { "epoch": 2.3761649251623833, "grad_norm": 3.796868830613685, "learning_rate": 2.183586500422856e-06, "loss": 0.137, "step": 16828 }, { "epoch": 2.376306128212369, "grad_norm": 2.7027500865259673, "learning_rate": 2.1826357470351945e-06, "loss": 0.1313, "step": 16829 }, { "epoch": 2.376447331262355, "grad_norm": 3.9337572020463285, "learning_rate": 2.1816851753197023e-06, "loss": 0.1607, "step": 16830 }, { "epoch": 2.376588534312341, "grad_norm": 2.6190177369118417, "learning_rate": 2.1807347852984707e-06, "loss": 0.1056, "step": 16831 }, { "epoch": 2.376729737362327, "grad_norm": 3.4987924433025244, "learning_rate": 2.1797845769935856e-06, "loss": 0.1694, "step": 16832 }, { "epoch": 2.3768709404123127, "grad_norm": 3.017635996546025, "learning_rate": 2.178834550427129e-06, "loss": 0.1235, "step": 16833 }, { "epoch": 2.3770121434622986, "grad_norm": 3.4253151426848296, "learning_rate": 2.1778847056211803e-06, "loss": 0.1418, "step": 16834 }, { "epoch": 2.3771533465122845, "grad_norm": 3.0283263908078557, "learning_rate": 2.1769350425978097e-06, "loss": 0.1353, "step": 16835 }, { "epoch": 2.3772945495622704, "grad_norm": 2.911163552527298, "learning_rate": 2.175985561379097e-06, "loss": 0.1253, "step": 16836 }, { "epoch": 2.3774357526122563, "grad_norm": 2.9560822252812304, "learning_rate": 2.1750362619870967e-06, "loss": 0.1349, "step": 16837 }, { "epoch": 2.377576955662242, "grad_norm": 2.951963398970572, "learning_rate": 2.174087144443875e-06, "loss": 0.1897, "step": 16838 }, { "epoch": 2.377718158712228, "grad_norm": 2.813355255166902, "learning_rate": 2.173138208771488e-06, "loss": 0.1171, "step": 16839 }, { "epoch": 2.377859361762214, "grad_norm": 3.3249924572557554, "learning_rate": 2.1721894549919863e-06, "loss": 0.1525, "step": 16840 }, { "epoch": 2.3780005648122, "grad_norm": 4.21156338227217, "learning_rate": 2.1712408831274232e-06, "loss": 0.2071, "step": 16841 }, { "epoch": 2.3781417678621857, "grad_norm": 2.6621535929826505, "learning_rate": 2.1702924931998426e-06, "loss": 0.1122, "step": 16842 }, { "epoch": 2.3782829709121716, "grad_norm": 3.5684731222605888, "learning_rate": 2.1693442852312817e-06, "loss": 0.1477, "step": 16843 }, { "epoch": 2.3784241739621574, "grad_norm": 3.834633803624263, "learning_rate": 2.168396259243781e-06, "loss": 0.14, "step": 16844 }, { "epoch": 2.3785653770121433, "grad_norm": 3.099933924499636, "learning_rate": 2.1674484152593634e-06, "loss": 0.1608, "step": 16845 }, { "epoch": 2.378706580062129, "grad_norm": 3.0221030984721358, "learning_rate": 2.166500753300065e-06, "loss": 0.161, "step": 16846 }, { "epoch": 2.378847783112115, "grad_norm": 2.596044824389492, "learning_rate": 2.165553273387906e-06, "loss": 0.123, "step": 16847 }, { "epoch": 2.378988986162101, "grad_norm": 3.153993282837189, "learning_rate": 2.1646059755449058e-06, "loss": 0.1568, "step": 16848 }, { "epoch": 2.379130189212087, "grad_norm": 3.7686804122485573, "learning_rate": 2.1636588597930775e-06, "loss": 0.1762, "step": 16849 }, { "epoch": 2.3792713922620727, "grad_norm": 3.3200375582842514, "learning_rate": 2.1627119261544348e-06, "loss": 0.1065, "step": 16850 }, { "epoch": 2.3794125953120586, "grad_norm": 3.241732455816064, "learning_rate": 2.1617651746509815e-06, "loss": 0.1689, "step": 16851 }, { "epoch": 2.3795537983620445, "grad_norm": 2.783290901304856, "learning_rate": 2.1608186053047177e-06, "loss": 0.1349, "step": 16852 }, { "epoch": 2.3796950014120304, "grad_norm": 3.193364012437937, "learning_rate": 2.1598722181376497e-06, "loss": 0.1385, "step": 16853 }, { "epoch": 2.3798362044620163, "grad_norm": 3.9331525621472814, "learning_rate": 2.1589260131717614e-06, "loss": 0.1706, "step": 16854 }, { "epoch": 2.379977407512002, "grad_norm": 3.2871113105016256, "learning_rate": 2.1579799904290476e-06, "loss": 0.1225, "step": 16855 }, { "epoch": 2.380118610561988, "grad_norm": 3.751676753259436, "learning_rate": 2.157034149931492e-06, "loss": 0.189, "step": 16856 }, { "epoch": 2.380259813611974, "grad_norm": 3.636325786659401, "learning_rate": 2.1560884917010725e-06, "loss": 0.1714, "step": 16857 }, { "epoch": 2.38040101666196, "grad_norm": 2.750835651375333, "learning_rate": 2.155143015759773e-06, "loss": 0.1429, "step": 16858 }, { "epoch": 2.3805422197119457, "grad_norm": 3.6596374430454177, "learning_rate": 2.1541977221295605e-06, "loss": 0.1843, "step": 16859 }, { "epoch": 2.3806834227619316, "grad_norm": 3.63387828127033, "learning_rate": 2.1532526108324047e-06, "loss": 0.1791, "step": 16860 }, { "epoch": 2.3808246258119174, "grad_norm": 2.1818596143524274, "learning_rate": 2.1523076818902722e-06, "loss": 0.08, "step": 16861 }, { "epoch": 2.3809658288619033, "grad_norm": 2.8908724743711356, "learning_rate": 2.151362935325115e-06, "loss": 0.1434, "step": 16862 }, { "epoch": 2.381107031911889, "grad_norm": 2.4791904498862607, "learning_rate": 2.1504183711588966e-06, "loss": 0.1275, "step": 16863 }, { "epoch": 2.381248234961875, "grad_norm": 2.593423071621891, "learning_rate": 2.149473989413564e-06, "loss": 0.0952, "step": 16864 }, { "epoch": 2.381389438011861, "grad_norm": 3.4013339525721715, "learning_rate": 2.148529790111067e-06, "loss": 0.1757, "step": 16865 }, { "epoch": 2.381530641061847, "grad_norm": 2.8369335383036236, "learning_rate": 2.1475857732733464e-06, "loss": 0.1315, "step": 16866 }, { "epoch": 2.3816718441118327, "grad_norm": 3.259529982458239, "learning_rate": 2.1466419389223403e-06, "loss": 0.1276, "step": 16867 }, { "epoch": 2.3818130471618186, "grad_norm": 2.9851834195123392, "learning_rate": 2.1456982870799848e-06, "loss": 0.1222, "step": 16868 }, { "epoch": 2.3819542502118045, "grad_norm": 3.228418306674102, "learning_rate": 2.1447548177682065e-06, "loss": 0.1567, "step": 16869 }, { "epoch": 2.3820954532617904, "grad_norm": 3.1675876251642334, "learning_rate": 2.1438115310089393e-06, "loss": 0.1302, "step": 16870 }, { "epoch": 2.3822366563117763, "grad_norm": 2.0743157181808023, "learning_rate": 2.1428684268240964e-06, "loss": 0.1321, "step": 16871 }, { "epoch": 2.382377859361762, "grad_norm": 4.121627887555411, "learning_rate": 2.1419255052355983e-06, "loss": 0.1908, "step": 16872 }, { "epoch": 2.382519062411748, "grad_norm": 2.9301165144113197, "learning_rate": 2.140982766265357e-06, "loss": 0.1304, "step": 16873 }, { "epoch": 2.382660265461734, "grad_norm": 3.394183471132699, "learning_rate": 2.14004020993528e-06, "loss": 0.162, "step": 16874 }, { "epoch": 2.38280146851172, "grad_norm": 3.528012232321496, "learning_rate": 2.1390978362672763e-06, "loss": 0.153, "step": 16875 }, { "epoch": 2.3829426715617057, "grad_norm": 3.000637826485278, "learning_rate": 2.138155645283244e-06, "loss": 0.1312, "step": 16876 }, { "epoch": 2.3830838746116916, "grad_norm": 3.2413708452045085, "learning_rate": 2.13721363700508e-06, "loss": 0.1475, "step": 16877 }, { "epoch": 2.3832250776616775, "grad_norm": 3.1098766203682544, "learning_rate": 2.1362718114546777e-06, "loss": 0.1249, "step": 16878 }, { "epoch": 2.3833662807116633, "grad_norm": 3.0271378774702544, "learning_rate": 2.1353301686539173e-06, "loss": 0.1121, "step": 16879 }, { "epoch": 2.3835074837616492, "grad_norm": 3.0764241744404495, "learning_rate": 2.1343887086246893e-06, "loss": 0.1418, "step": 16880 }, { "epoch": 2.383648686811635, "grad_norm": 2.8739305416767023, "learning_rate": 2.133447431388872e-06, "loss": 0.1226, "step": 16881 }, { "epoch": 2.383789889861621, "grad_norm": 3.6562634606108873, "learning_rate": 2.1325063369683374e-06, "loss": 0.139, "step": 16882 }, { "epoch": 2.383931092911607, "grad_norm": 2.706378317998329, "learning_rate": 2.1315654253849594e-06, "loss": 0.1217, "step": 16883 }, { "epoch": 2.3840722959615928, "grad_norm": 3.2890492456411637, "learning_rate": 2.1306246966606025e-06, "loss": 0.128, "step": 16884 }, { "epoch": 2.3842134990115786, "grad_norm": 2.5057486636725206, "learning_rate": 2.1296841508171285e-06, "loss": 0.1349, "step": 16885 }, { "epoch": 2.3843547020615645, "grad_norm": 3.22059075939742, "learning_rate": 2.128743787876393e-06, "loss": 0.1444, "step": 16886 }, { "epoch": 2.3844959051115504, "grad_norm": 3.2905058048843467, "learning_rate": 2.1278036078602584e-06, "loss": 0.1776, "step": 16887 }, { "epoch": 2.3846371081615363, "grad_norm": 2.9343742730428994, "learning_rate": 2.126863610790566e-06, "loss": 0.1314, "step": 16888 }, { "epoch": 2.384778311211522, "grad_norm": 3.1352321630894964, "learning_rate": 2.1259237966891623e-06, "loss": 0.1406, "step": 16889 }, { "epoch": 2.384919514261508, "grad_norm": 3.1461646947860333, "learning_rate": 2.124984165577889e-06, "loss": 0.1051, "step": 16890 }, { "epoch": 2.385060717311494, "grad_norm": 2.8928606369337597, "learning_rate": 2.1240447174785806e-06, "loss": 0.1625, "step": 16891 }, { "epoch": 2.38520192036148, "grad_norm": 3.19215859831351, "learning_rate": 2.1231054524130746e-06, "loss": 0.1667, "step": 16892 }, { "epoch": 2.3853431234114657, "grad_norm": 3.6164602667332986, "learning_rate": 2.1221663704031957e-06, "loss": 0.1516, "step": 16893 }, { "epoch": 2.3854843264614516, "grad_norm": 3.497314894308194, "learning_rate": 2.121227471470768e-06, "loss": 0.1805, "step": 16894 }, { "epoch": 2.3856255295114375, "grad_norm": 4.235992160202638, "learning_rate": 2.120288755637614e-06, "loss": 0.2222, "step": 16895 }, { "epoch": 2.3857667325614234, "grad_norm": 3.369591627455159, "learning_rate": 2.1193502229255436e-06, "loss": 0.1486, "step": 16896 }, { "epoch": 2.3859079356114092, "grad_norm": 4.513794629689269, "learning_rate": 2.1184118733563685e-06, "loss": 0.1976, "step": 16897 }, { "epoch": 2.386049138661395, "grad_norm": 2.974483642533953, "learning_rate": 2.1174737069519e-06, "loss": 0.1054, "step": 16898 }, { "epoch": 2.386190341711381, "grad_norm": 3.303286288961413, "learning_rate": 2.116535723733938e-06, "loss": 0.1383, "step": 16899 }, { "epoch": 2.386331544761367, "grad_norm": 2.7780379208416677, "learning_rate": 2.1155979237242817e-06, "loss": 0.1257, "step": 16900 }, { "epoch": 2.3864727478113528, "grad_norm": 3.29292251244628, "learning_rate": 2.1146603069447234e-06, "loss": 0.2088, "step": 16901 }, { "epoch": 2.3866139508613387, "grad_norm": 3.9175322988482097, "learning_rate": 2.1137228734170558e-06, "loss": 0.1727, "step": 16902 }, { "epoch": 2.3867551539113245, "grad_norm": 2.8990293912138303, "learning_rate": 2.1127856231630593e-06, "loss": 0.1502, "step": 16903 }, { "epoch": 2.3868963569613104, "grad_norm": 3.0941608781904213, "learning_rate": 2.1118485562045244e-06, "loss": 0.122, "step": 16904 }, { "epoch": 2.3870375600112963, "grad_norm": 2.93435337313772, "learning_rate": 2.1109116725632193e-06, "loss": 0.1586, "step": 16905 }, { "epoch": 2.387178763061282, "grad_norm": 3.130180943115138, "learning_rate": 2.109974972260921e-06, "loss": 0.1399, "step": 16906 }, { "epoch": 2.387319966111268, "grad_norm": 3.81169229690288, "learning_rate": 2.1090384553193953e-06, "loss": 0.1506, "step": 16907 }, { "epoch": 2.387461169161254, "grad_norm": 2.999748380957778, "learning_rate": 2.108102121760409e-06, "loss": 0.1652, "step": 16908 }, { "epoch": 2.38760237221124, "grad_norm": 3.289506261611598, "learning_rate": 2.107165971605718e-06, "loss": 0.172, "step": 16909 }, { "epoch": 2.3877435752612257, "grad_norm": 3.565859863998495, "learning_rate": 2.1062300048770847e-06, "loss": 0.1595, "step": 16910 }, { "epoch": 2.3878847783112116, "grad_norm": 3.0678509385941433, "learning_rate": 2.105294221596256e-06, "loss": 0.136, "step": 16911 }, { "epoch": 2.3880259813611975, "grad_norm": 3.144068866342032, "learning_rate": 2.104358621784983e-06, "loss": 0.1749, "step": 16912 }, { "epoch": 2.3881671844111834, "grad_norm": 2.508002444693295, "learning_rate": 2.103423205465004e-06, "loss": 0.1446, "step": 16913 }, { "epoch": 2.3883083874611692, "grad_norm": 2.8644246761573102, "learning_rate": 2.102487972658056e-06, "loss": 0.1568, "step": 16914 }, { "epoch": 2.388449590511155, "grad_norm": 3.3656818647591384, "learning_rate": 2.101552923385879e-06, "loss": 0.1484, "step": 16915 }, { "epoch": 2.388590793561141, "grad_norm": 3.732281131695765, "learning_rate": 2.1006180576702017e-06, "loss": 0.1787, "step": 16916 }, { "epoch": 2.388731996611127, "grad_norm": 3.222004584684271, "learning_rate": 2.0996833755327493e-06, "loss": 0.1429, "step": 16917 }, { "epoch": 2.388873199661113, "grad_norm": 2.780391201297536, "learning_rate": 2.0987488769952436e-06, "loss": 0.1249, "step": 16918 }, { "epoch": 2.3890144027110987, "grad_norm": 2.372011405250769, "learning_rate": 2.097814562079401e-06, "loss": 0.096, "step": 16919 }, { "epoch": 2.3891556057610845, "grad_norm": 3.30239240061786, "learning_rate": 2.0968804308069324e-06, "loss": 0.1529, "step": 16920 }, { "epoch": 2.3892968088110704, "grad_norm": 2.6595140622041646, "learning_rate": 2.0959464831995557e-06, "loss": 0.1307, "step": 16921 }, { "epoch": 2.3894380118610563, "grad_norm": 3.185856299500183, "learning_rate": 2.095012719278966e-06, "loss": 0.1644, "step": 16922 }, { "epoch": 2.389579214911042, "grad_norm": 3.1040273784057666, "learning_rate": 2.094079139066868e-06, "loss": 0.1753, "step": 16923 }, { "epoch": 2.389720417961028, "grad_norm": 3.2835151214127594, "learning_rate": 2.0931457425849555e-06, "loss": 0.1522, "step": 16924 }, { "epoch": 2.389861621011014, "grad_norm": 2.7178529999987737, "learning_rate": 2.092212529854921e-06, "loss": 0.1255, "step": 16925 }, { "epoch": 2.390002824061, "grad_norm": 3.3100662964775682, "learning_rate": 2.09127950089845e-06, "loss": 0.1732, "step": 16926 }, { "epoch": 2.3901440271109857, "grad_norm": 2.909307484857051, "learning_rate": 2.09034665573723e-06, "loss": 0.1439, "step": 16927 }, { "epoch": 2.3902852301609716, "grad_norm": 2.7410693226195955, "learning_rate": 2.089413994392938e-06, "loss": 0.1272, "step": 16928 }, { "epoch": 2.3904264332109575, "grad_norm": 3.3869034461445438, "learning_rate": 2.088481516887252e-06, "loss": 0.1416, "step": 16929 }, { "epoch": 2.3905676362609434, "grad_norm": 3.3509758780329, "learning_rate": 2.087549223241835e-06, "loss": 0.1236, "step": 16930 }, { "epoch": 2.3907088393109293, "grad_norm": 3.8165363144350435, "learning_rate": 2.086617113478354e-06, "loss": 0.1726, "step": 16931 }, { "epoch": 2.390850042360915, "grad_norm": 3.4336482525392125, "learning_rate": 2.085685187618478e-06, "loss": 0.1665, "step": 16932 }, { "epoch": 2.390991245410901, "grad_norm": 3.0106414155881844, "learning_rate": 2.084753445683859e-06, "loss": 0.1334, "step": 16933 }, { "epoch": 2.391132448460887, "grad_norm": 3.0846619681921066, "learning_rate": 2.0838218876961524e-06, "loss": 0.1107, "step": 16934 }, { "epoch": 2.391273651510873, "grad_norm": 3.693857139616947, "learning_rate": 2.082890513677006e-06, "loss": 0.1958, "step": 16935 }, { "epoch": 2.3914148545608587, "grad_norm": 3.305159366411521, "learning_rate": 2.081959323648065e-06, "loss": 0.183, "step": 16936 }, { "epoch": 2.3915560576108446, "grad_norm": 3.6562492030960034, "learning_rate": 2.08102831763097e-06, "loss": 0.1747, "step": 16937 }, { "epoch": 2.3916972606608304, "grad_norm": 3.834081912606494, "learning_rate": 2.0800974956473565e-06, "loss": 0.1919, "step": 16938 }, { "epoch": 2.3918384637108163, "grad_norm": 3.011216813524388, "learning_rate": 2.079166857718856e-06, "loss": 0.1351, "step": 16939 }, { "epoch": 2.391979666760802, "grad_norm": 2.7792333723633837, "learning_rate": 2.0782364038670986e-06, "loss": 0.1373, "step": 16940 }, { "epoch": 2.392120869810788, "grad_norm": 2.9614190025743516, "learning_rate": 2.0773061341137057e-06, "loss": 0.1406, "step": 16941 }, { "epoch": 2.392262072860774, "grad_norm": 2.812234777266102, "learning_rate": 2.0763760484802966e-06, "loss": 0.1241, "step": 16942 }, { "epoch": 2.39240327591076, "grad_norm": 3.527025634108565, "learning_rate": 2.0754461469884836e-06, "loss": 0.17, "step": 16943 }, { "epoch": 2.3925444789607457, "grad_norm": 3.423091661390692, "learning_rate": 2.074516429659882e-06, "loss": 0.1396, "step": 16944 }, { "epoch": 2.3926856820107316, "grad_norm": 3.6972629888284136, "learning_rate": 2.0735868965160953e-06, "loss": 0.2181, "step": 16945 }, { "epoch": 2.3928268850607175, "grad_norm": 3.290052481382784, "learning_rate": 2.0726575475787293e-06, "loss": 0.1595, "step": 16946 }, { "epoch": 2.3929680881107034, "grad_norm": 2.700608833352401, "learning_rate": 2.071728382869376e-06, "loss": 0.1028, "step": 16947 }, { "epoch": 2.3931092911606893, "grad_norm": 3.4445725325027596, "learning_rate": 2.070799402409628e-06, "loss": 0.1865, "step": 16948 }, { "epoch": 2.393250494210675, "grad_norm": 2.70174360647674, "learning_rate": 2.0698706062210804e-06, "loss": 0.1497, "step": 16949 }, { "epoch": 2.393391697260661, "grad_norm": 3.543520884337734, "learning_rate": 2.068941994325315e-06, "loss": 0.1728, "step": 16950 }, { "epoch": 2.393532900310647, "grad_norm": 3.029676466372009, "learning_rate": 2.068013566743913e-06, "loss": 0.1553, "step": 16951 }, { "epoch": 2.393674103360633, "grad_norm": 3.5536986284155496, "learning_rate": 2.06708532349845e-06, "loss": 0.1825, "step": 16952 }, { "epoch": 2.3938153064106187, "grad_norm": 3.118160308799758, "learning_rate": 2.0661572646104975e-06, "loss": 0.1361, "step": 16953 }, { "epoch": 2.3939565094606046, "grad_norm": 2.9861453209696, "learning_rate": 2.065229390101624e-06, "loss": 0.1375, "step": 16954 }, { "epoch": 2.3940977125105904, "grad_norm": 3.4730025401811693, "learning_rate": 2.064301699993393e-06, "loss": 0.1651, "step": 16955 }, { "epoch": 2.3942389155605763, "grad_norm": 2.798785760141852, "learning_rate": 2.063374194307364e-06, "loss": 0.1434, "step": 16956 }, { "epoch": 2.394380118610562, "grad_norm": 3.801573620700732, "learning_rate": 2.0624468730650903e-06, "loss": 0.166, "step": 16957 }, { "epoch": 2.394521321660548, "grad_norm": 3.695102569134403, "learning_rate": 2.0615197362881234e-06, "loss": 0.1574, "step": 16958 }, { "epoch": 2.394662524710534, "grad_norm": 3.361808546060126, "learning_rate": 2.060592783998009e-06, "loss": 0.1492, "step": 16959 }, { "epoch": 2.39480372776052, "grad_norm": 2.978792146729772, "learning_rate": 2.0596660162162872e-06, "loss": 0.1586, "step": 16960 }, { "epoch": 2.3949449308105057, "grad_norm": 2.5695790176852067, "learning_rate": 2.0587394329645018e-06, "loss": 0.1118, "step": 16961 }, { "epoch": 2.3950861338604916, "grad_norm": 2.6964622627040464, "learning_rate": 2.057813034264181e-06, "loss": 0.1267, "step": 16962 }, { "epoch": 2.3952273369104775, "grad_norm": 3.3592956819004978, "learning_rate": 2.0568868201368587e-06, "loss": 0.138, "step": 16963 }, { "epoch": 2.3953685399604634, "grad_norm": 3.2528426364813066, "learning_rate": 2.0559607906040524e-06, "loss": 0.1762, "step": 16964 }, { "epoch": 2.395509743010449, "grad_norm": 4.017866235119019, "learning_rate": 2.0550349456872853e-06, "loss": 0.1552, "step": 16965 }, { "epoch": 2.3956509460604347, "grad_norm": 3.7990873679225334, "learning_rate": 2.054109285408077e-06, "loss": 0.1894, "step": 16966 }, { "epoch": 2.3957921491104206, "grad_norm": 3.5081018729001143, "learning_rate": 2.0531838097879377e-06, "loss": 0.1983, "step": 16967 }, { "epoch": 2.3959333521604065, "grad_norm": 2.892450141963926, "learning_rate": 2.0522585188483745e-06, "loss": 0.1262, "step": 16968 }, { "epoch": 2.3960745552103924, "grad_norm": 2.9858956663469445, "learning_rate": 2.051333412610891e-06, "loss": 0.1298, "step": 16969 }, { "epoch": 2.3962157582603782, "grad_norm": 2.647187125274911, "learning_rate": 2.0504084910969856e-06, "loss": 0.1226, "step": 16970 }, { "epoch": 2.396356961310364, "grad_norm": 3.85197333523568, "learning_rate": 2.0494837543281543e-06, "loss": 0.1604, "step": 16971 }, { "epoch": 2.39649816436035, "grad_norm": 2.775118155429708, "learning_rate": 2.0485592023258872e-06, "loss": 0.1467, "step": 16972 }, { "epoch": 2.396639367410336, "grad_norm": 2.464203968113668, "learning_rate": 2.0476348351116702e-06, "loss": 0.1461, "step": 16973 }, { "epoch": 2.396780570460322, "grad_norm": 3.5108784580881562, "learning_rate": 2.046710652706985e-06, "loss": 0.1765, "step": 16974 }, { "epoch": 2.3969217735103077, "grad_norm": 2.4179921258261423, "learning_rate": 2.0457866551333094e-06, "loss": 0.1222, "step": 16975 }, { "epoch": 2.3970629765602935, "grad_norm": 2.817553641664922, "learning_rate": 2.0448628424121173e-06, "loss": 0.0993, "step": 16976 }, { "epoch": 2.3972041796102794, "grad_norm": 3.0225950216418265, "learning_rate": 2.043939214564874e-06, "loss": 0.1564, "step": 16977 }, { "epoch": 2.3973453826602653, "grad_norm": 3.187890033929883, "learning_rate": 2.0430157716130506e-06, "loss": 0.15, "step": 16978 }, { "epoch": 2.397486585710251, "grad_norm": 2.8332602651289323, "learning_rate": 2.0420925135781035e-06, "loss": 0.1422, "step": 16979 }, { "epoch": 2.397627788760237, "grad_norm": 4.214074705262303, "learning_rate": 2.041169440481493e-06, "loss": 0.2012, "step": 16980 }, { "epoch": 2.397768991810223, "grad_norm": 2.992840649556532, "learning_rate": 2.0402465523446648e-06, "loss": 0.1266, "step": 16981 }, { "epoch": 2.397910194860209, "grad_norm": 3.001202755229056, "learning_rate": 2.0393238491890655e-06, "loss": 0.1357, "step": 16982 }, { "epoch": 2.3980513979101947, "grad_norm": 3.4520750436045082, "learning_rate": 2.038401331036146e-06, "loss": 0.1343, "step": 16983 }, { "epoch": 2.3981926009601806, "grad_norm": 3.8870909296759195, "learning_rate": 2.0374789979073408e-06, "loss": 0.1687, "step": 16984 }, { "epoch": 2.3983338040101665, "grad_norm": 3.6522517758393978, "learning_rate": 2.036556849824084e-06, "loss": 0.1682, "step": 16985 }, { "epoch": 2.3984750070601524, "grad_norm": 3.2689158862969974, "learning_rate": 2.035634886807808e-06, "loss": 0.1621, "step": 16986 }, { "epoch": 2.3986162101101383, "grad_norm": 2.4619751773024707, "learning_rate": 2.0347131088799375e-06, "loss": 0.1318, "step": 16987 }, { "epoch": 2.398757413160124, "grad_norm": 3.8404803111642534, "learning_rate": 2.0337915160618936e-06, "loss": 0.1569, "step": 16988 }, { "epoch": 2.39889861621011, "grad_norm": 2.9510833763785045, "learning_rate": 2.032870108375096e-06, "loss": 0.1566, "step": 16989 }, { "epoch": 2.399039819260096, "grad_norm": 3.5273539107793797, "learning_rate": 2.0319488858409552e-06, "loss": 0.1858, "step": 16990 }, { "epoch": 2.399181022310082, "grad_norm": 3.5795345710610333, "learning_rate": 2.031027848480881e-06, "loss": 0.1685, "step": 16991 }, { "epoch": 2.3993222253600677, "grad_norm": 3.267834376929403, "learning_rate": 2.030106996316279e-06, "loss": 0.1775, "step": 16992 }, { "epoch": 2.3994634284100536, "grad_norm": 3.7969931283701084, "learning_rate": 2.0291863293685477e-06, "loss": 0.1724, "step": 16993 }, { "epoch": 2.3996046314600394, "grad_norm": 3.0674786494111483, "learning_rate": 2.0282658476590823e-06, "loss": 0.1488, "step": 16994 }, { "epoch": 2.3997458345100253, "grad_norm": 3.206102938320587, "learning_rate": 2.0273455512092767e-06, "loss": 0.1731, "step": 16995 }, { "epoch": 2.399887037560011, "grad_norm": 3.786699221505517, "learning_rate": 2.0264254400405192e-06, "loss": 0.1509, "step": 16996 }, { "epoch": 2.400028240609997, "grad_norm": 3.826865788699074, "learning_rate": 2.025505514174192e-06, "loss": 0.1467, "step": 16997 }, { "epoch": 2.400169443659983, "grad_norm": 5.007631849796837, "learning_rate": 2.024585773631671e-06, "loss": 0.2467, "step": 16998 }, { "epoch": 2.400310646709969, "grad_norm": 3.237093440913676, "learning_rate": 2.0236662184343325e-06, "loss": 0.1387, "step": 16999 }, { "epoch": 2.4004518497599547, "grad_norm": 3.616872453733728, "learning_rate": 2.022746848603543e-06, "loss": 0.1638, "step": 17000 }, { "epoch": 2.4005930528099406, "grad_norm": 2.741695318250654, "learning_rate": 2.0218276641606748e-06, "loss": 0.1087, "step": 17001 }, { "epoch": 2.4007342558599265, "grad_norm": 2.5031180529284525, "learning_rate": 2.020908665127086e-06, "loss": 0.1261, "step": 17002 }, { "epoch": 2.4008754589099124, "grad_norm": 2.9181118878659813, "learning_rate": 2.0199898515241333e-06, "loss": 0.1642, "step": 17003 }, { "epoch": 2.4010166619598983, "grad_norm": 2.7580166723087443, "learning_rate": 2.019071223373171e-06, "loss": 0.1097, "step": 17004 }, { "epoch": 2.401157865009884, "grad_norm": 2.8602409862986593, "learning_rate": 2.018152780695547e-06, "loss": 0.166, "step": 17005 }, { "epoch": 2.40129906805987, "grad_norm": 3.28514809516474, "learning_rate": 2.0172345235126043e-06, "loss": 0.1503, "step": 17006 }, { "epoch": 2.401440271109856, "grad_norm": 2.795001426567266, "learning_rate": 2.0163164518456846e-06, "loss": 0.1382, "step": 17007 }, { "epoch": 2.401581474159842, "grad_norm": 3.777642332044963, "learning_rate": 2.0153985657161223e-06, "loss": 0.1897, "step": 17008 }, { "epoch": 2.4017226772098277, "grad_norm": 3.8718338654761086, "learning_rate": 2.0144808651452495e-06, "loss": 0.1721, "step": 17009 }, { "epoch": 2.4018638802598136, "grad_norm": 3.9719470631013816, "learning_rate": 2.0135633501543916e-06, "loss": 0.2004, "step": 17010 }, { "epoch": 2.4020050833097994, "grad_norm": 3.6532600411301073, "learning_rate": 2.0126460207648734e-06, "loss": 0.1788, "step": 17011 }, { "epoch": 2.4021462863597853, "grad_norm": 2.9936414595983867, "learning_rate": 2.0117288769980092e-06, "loss": 0.1438, "step": 17012 }, { "epoch": 2.402287489409771, "grad_norm": 2.917090846361606, "learning_rate": 2.0108119188751186e-06, "loss": 0.1305, "step": 17013 }, { "epoch": 2.402428692459757, "grad_norm": 2.8186529471572355, "learning_rate": 2.009895146417512e-06, "loss": 0.1349, "step": 17014 }, { "epoch": 2.402569895509743, "grad_norm": 3.236650076883082, "learning_rate": 2.008978559646488e-06, "loss": 0.1761, "step": 17015 }, { "epoch": 2.402711098559729, "grad_norm": 3.3399888632244057, "learning_rate": 2.0080621585833516e-06, "loss": 0.1468, "step": 17016 }, { "epoch": 2.4028523016097147, "grad_norm": 3.4896443238376476, "learning_rate": 2.0071459432493967e-06, "loss": 0.1721, "step": 17017 }, { "epoch": 2.4029935046597006, "grad_norm": 3.0798643987558916, "learning_rate": 2.0062299136659203e-06, "loss": 0.1408, "step": 17018 }, { "epoch": 2.4031347077096865, "grad_norm": 2.7460754438328845, "learning_rate": 2.0053140698542096e-06, "loss": 0.1349, "step": 17019 }, { "epoch": 2.4032759107596724, "grad_norm": 3.7423615598091837, "learning_rate": 2.0043984118355464e-06, "loss": 0.1934, "step": 17020 }, { "epoch": 2.4034171138096583, "grad_norm": 3.3433141709234233, "learning_rate": 2.0034829396312115e-06, "loss": 0.1386, "step": 17021 }, { "epoch": 2.403558316859644, "grad_norm": 2.9314097682598477, "learning_rate": 2.0025676532624794e-06, "loss": 0.1593, "step": 17022 }, { "epoch": 2.40369951990963, "grad_norm": 2.793720863801027, "learning_rate": 2.0016525527506203e-06, "loss": 0.1171, "step": 17023 }, { "epoch": 2.403840722959616, "grad_norm": 3.208090175875538, "learning_rate": 2.000737638116903e-06, "loss": 0.1433, "step": 17024 }, { "epoch": 2.403981926009602, "grad_norm": 3.763394110760023, "learning_rate": 1.999822909382588e-06, "loss": 0.1648, "step": 17025 }, { "epoch": 2.4041231290595877, "grad_norm": 2.784817852668454, "learning_rate": 1.9989083665689335e-06, "loss": 0.127, "step": 17026 }, { "epoch": 2.4042643321095736, "grad_norm": 2.8823208571680197, "learning_rate": 1.9979940096971928e-06, "loss": 0.145, "step": 17027 }, { "epoch": 2.4044055351595595, "grad_norm": 2.4921598187721403, "learning_rate": 1.997079838788616e-06, "loss": 0.1021, "step": 17028 }, { "epoch": 2.4045467382095453, "grad_norm": 3.5271795523193608, "learning_rate": 1.9961658538644447e-06, "loss": 0.178, "step": 17029 }, { "epoch": 2.4046879412595312, "grad_norm": 2.2549416833608382, "learning_rate": 1.9952520549459254e-06, "loss": 0.1268, "step": 17030 }, { "epoch": 2.404829144309517, "grad_norm": 3.9227016093854243, "learning_rate": 1.994338442054293e-06, "loss": 0.1614, "step": 17031 }, { "epoch": 2.404970347359503, "grad_norm": 2.7379225478209577, "learning_rate": 1.993425015210777e-06, "loss": 0.1407, "step": 17032 }, { "epoch": 2.405111550409489, "grad_norm": 3.264747096804538, "learning_rate": 1.992511774436604e-06, "loss": 0.1442, "step": 17033 }, { "epoch": 2.4052527534594748, "grad_norm": 2.9510364316049214, "learning_rate": 1.9915987197529973e-06, "loss": 0.1299, "step": 17034 }, { "epoch": 2.4053939565094606, "grad_norm": 3.4362503362212684, "learning_rate": 1.990685851181181e-06, "loss": 0.1405, "step": 17035 }, { "epoch": 2.4055351595594465, "grad_norm": 3.502391578997964, "learning_rate": 1.989773168742366e-06, "loss": 0.1555, "step": 17036 }, { "epoch": 2.4056763626094324, "grad_norm": 4.0363089227585345, "learning_rate": 1.988860672457763e-06, "loss": 0.1886, "step": 17037 }, { "epoch": 2.4058175656594183, "grad_norm": 3.304975817021495, "learning_rate": 1.9879483623485786e-06, "loss": 0.1908, "step": 17038 }, { "epoch": 2.405958768709404, "grad_norm": 3.067266070400253, "learning_rate": 1.987036238436014e-06, "loss": 0.131, "step": 17039 }, { "epoch": 2.40609997175939, "grad_norm": 3.07072508757356, "learning_rate": 1.986124300741267e-06, "loss": 0.1467, "step": 17040 }, { "epoch": 2.406241174809376, "grad_norm": 3.356140292256625, "learning_rate": 1.985212549285529e-06, "loss": 0.1583, "step": 17041 }, { "epoch": 2.406382377859362, "grad_norm": 3.52210803903658, "learning_rate": 1.9843009840899917e-06, "loss": 0.1646, "step": 17042 }, { "epoch": 2.4065235809093477, "grad_norm": 3.200044327501405, "learning_rate": 1.983389605175837e-06, "loss": 0.1307, "step": 17043 }, { "epoch": 2.4066647839593336, "grad_norm": 3.0611807629184526, "learning_rate": 1.982478412564246e-06, "loss": 0.1701, "step": 17044 }, { "epoch": 2.4068059870093195, "grad_norm": 2.896255461557228, "learning_rate": 1.9815674062763936e-06, "loss": 0.1243, "step": 17045 }, { "epoch": 2.4069471900593054, "grad_norm": 3.2336848466640427, "learning_rate": 1.980656586333449e-06, "loss": 0.1579, "step": 17046 }, { "epoch": 2.4070883931092912, "grad_norm": 2.311343637127122, "learning_rate": 1.9797459527565856e-06, "loss": 0.1096, "step": 17047 }, { "epoch": 2.407229596159277, "grad_norm": 3.470348602553056, "learning_rate": 1.978835505566965e-06, "loss": 0.1413, "step": 17048 }, { "epoch": 2.407370799209263, "grad_norm": 2.9967358261730745, "learning_rate": 1.977925244785741e-06, "loss": 0.1353, "step": 17049 }, { "epoch": 2.407512002259249, "grad_norm": 2.6830244640655754, "learning_rate": 1.977015170434069e-06, "loss": 0.1472, "step": 17050 }, { "epoch": 2.4076532053092348, "grad_norm": 2.4999751968678483, "learning_rate": 1.9761052825330975e-06, "loss": 0.1023, "step": 17051 }, { "epoch": 2.4077944083592207, "grad_norm": 3.304182349891002, "learning_rate": 1.975195581103976e-06, "loss": 0.1202, "step": 17052 }, { "epoch": 2.4079356114092065, "grad_norm": 3.2638175886739336, "learning_rate": 1.974286066167844e-06, "loss": 0.1858, "step": 17053 }, { "epoch": 2.4080768144591924, "grad_norm": 4.127048475569063, "learning_rate": 1.9733767377458377e-06, "loss": 0.1971, "step": 17054 }, { "epoch": 2.4082180175091783, "grad_norm": 2.852323887957247, "learning_rate": 1.97246759585909e-06, "loss": 0.1435, "step": 17055 }, { "epoch": 2.408359220559164, "grad_norm": 3.720634621930768, "learning_rate": 1.971558640528728e-06, "loss": 0.17, "step": 17056 }, { "epoch": 2.40850042360915, "grad_norm": 2.7516834611931618, "learning_rate": 1.970649871775876e-06, "loss": 0.1355, "step": 17057 }, { "epoch": 2.408641626659136, "grad_norm": 2.7406142273192153, "learning_rate": 1.969741289621653e-06, "loss": 0.1206, "step": 17058 }, { "epoch": 2.408782829709122, "grad_norm": 3.4896496582040832, "learning_rate": 1.9688328940871747e-06, "loss": 0.1581, "step": 17059 }, { "epoch": 2.4089240327591077, "grad_norm": 3.0013940258078278, "learning_rate": 1.967924685193552e-06, "loss": 0.093, "step": 17060 }, { "epoch": 2.4090652358090936, "grad_norm": 3.1796208319890478, "learning_rate": 1.9670166629618903e-06, "loss": 0.1449, "step": 17061 }, { "epoch": 2.4092064388590795, "grad_norm": 4.047483458292405, "learning_rate": 1.9661088274132924e-06, "loss": 0.1858, "step": 17062 }, { "epoch": 2.4093476419090654, "grad_norm": 2.9585503454644164, "learning_rate": 1.965201178568853e-06, "loss": 0.1314, "step": 17063 }, { "epoch": 2.4094888449590512, "grad_norm": 3.6699651401732156, "learning_rate": 1.9642937164496712e-06, "loss": 0.1844, "step": 17064 }, { "epoch": 2.409630048009037, "grad_norm": 4.615353068313634, "learning_rate": 1.9633864410768356e-06, "loss": 0.2265, "step": 17065 }, { "epoch": 2.409771251059023, "grad_norm": 2.832294657802391, "learning_rate": 1.962479352471426e-06, "loss": 0.1405, "step": 17066 }, { "epoch": 2.409912454109009, "grad_norm": 2.9118594759840226, "learning_rate": 1.961572450654524e-06, "loss": 0.1179, "step": 17067 }, { "epoch": 2.4100536571589948, "grad_norm": 3.647322233573388, "learning_rate": 1.960665735647206e-06, "loss": 0.1681, "step": 17068 }, { "epoch": 2.4101948602089807, "grad_norm": 2.858339836307105, "learning_rate": 1.9597592074705452e-06, "loss": 0.167, "step": 17069 }, { "epoch": 2.4103360632589665, "grad_norm": 3.9749513714507216, "learning_rate": 1.9588528661456087e-06, "loss": 0.1952, "step": 17070 }, { "epoch": 2.4104772663089524, "grad_norm": 3.731251282202874, "learning_rate": 1.957946711693459e-06, "loss": 0.1728, "step": 17071 }, { "epoch": 2.4106184693589383, "grad_norm": 3.466234954161315, "learning_rate": 1.9570407441351548e-06, "loss": 0.1402, "step": 17072 }, { "epoch": 2.410759672408924, "grad_norm": 3.706305679325977, "learning_rate": 1.95613496349175e-06, "loss": 0.1882, "step": 17073 }, { "epoch": 2.41090087545891, "grad_norm": 3.24128042688095, "learning_rate": 1.955229369784295e-06, "loss": 0.1398, "step": 17074 }, { "epoch": 2.411042078508896, "grad_norm": 2.986847233665659, "learning_rate": 1.954323963033835e-06, "loss": 0.1323, "step": 17075 }, { "epoch": 2.411183281558882, "grad_norm": 4.383957604812067, "learning_rate": 1.9534187432614114e-06, "loss": 0.1711, "step": 17076 }, { "epoch": 2.4113244846088677, "grad_norm": 2.975597065479522, "learning_rate": 1.952513710488061e-06, "loss": 0.158, "step": 17077 }, { "epoch": 2.4114656876588536, "grad_norm": 3.522376985909804, "learning_rate": 1.9516088647348164e-06, "loss": 0.1768, "step": 17078 }, { "epoch": 2.4116068907088395, "grad_norm": 3.5310393878240585, "learning_rate": 1.9507042060227064e-06, "loss": 0.1754, "step": 17079 }, { "epoch": 2.4117480937588254, "grad_norm": 3.2415036505826853, "learning_rate": 1.9497997343727513e-06, "loss": 0.1621, "step": 17080 }, { "epoch": 2.4118892968088113, "grad_norm": 2.998305578281165, "learning_rate": 1.9488954498059777e-06, "loss": 0.1474, "step": 17081 }, { "epoch": 2.412030499858797, "grad_norm": 3.328258166917561, "learning_rate": 1.947991352343398e-06, "loss": 0.1557, "step": 17082 }, { "epoch": 2.412171702908783, "grad_norm": 2.6362100751543527, "learning_rate": 1.9470874420060202e-06, "loss": 0.1273, "step": 17083 }, { "epoch": 2.4123129059587685, "grad_norm": 3.2299710630863503, "learning_rate": 1.9461837188148513e-06, "loss": 0.1675, "step": 17084 }, { "epoch": 2.4124541090087543, "grad_norm": 3.6270290097742377, "learning_rate": 1.945280182790893e-06, "loss": 0.1546, "step": 17085 }, { "epoch": 2.4125953120587402, "grad_norm": 3.082577709029947, "learning_rate": 1.944376833955147e-06, "loss": 0.1497, "step": 17086 }, { "epoch": 2.412736515108726, "grad_norm": 2.1911857360473843, "learning_rate": 1.9434736723286042e-06, "loss": 0.12, "step": 17087 }, { "epoch": 2.412877718158712, "grad_norm": 3.4292407935881846, "learning_rate": 1.9425706979322544e-06, "loss": 0.1977, "step": 17088 }, { "epoch": 2.413018921208698, "grad_norm": 3.0249649123711677, "learning_rate": 1.941667910787082e-06, "loss": 0.1662, "step": 17089 }, { "epoch": 2.4131601242586838, "grad_norm": 2.462392796433591, "learning_rate": 1.940765310914069e-06, "loss": 0.0872, "step": 17090 }, { "epoch": 2.4133013273086696, "grad_norm": 3.4887255523778125, "learning_rate": 1.939862898334185e-06, "loss": 0.1395, "step": 17091 }, { "epoch": 2.4134425303586555, "grad_norm": 3.2173610014384812, "learning_rate": 1.9389606730684084e-06, "loss": 0.1654, "step": 17092 }, { "epoch": 2.4135837334086414, "grad_norm": 3.3147681723106435, "learning_rate": 1.9380586351377052e-06, "loss": 0.16, "step": 17093 }, { "epoch": 2.4137249364586273, "grad_norm": 6.515556774036979, "learning_rate": 1.937156784563037e-06, "loss": 0.1625, "step": 17094 }, { "epoch": 2.413866139508613, "grad_norm": 3.9580745051065547, "learning_rate": 1.9362551213653636e-06, "loss": 0.1728, "step": 17095 }, { "epoch": 2.414007342558599, "grad_norm": 3.1705448154777645, "learning_rate": 1.9353536455656385e-06, "loss": 0.1566, "step": 17096 }, { "epoch": 2.414148545608585, "grad_norm": 2.960726521770711, "learning_rate": 1.9344523571848096e-06, "loss": 0.1291, "step": 17097 }, { "epoch": 2.414289748658571, "grad_norm": 3.1646003692953912, "learning_rate": 1.9335512562438263e-06, "loss": 0.1625, "step": 17098 }, { "epoch": 2.4144309517085567, "grad_norm": 2.926123500759486, "learning_rate": 1.9326503427636313e-06, "loss": 0.1407, "step": 17099 }, { "epoch": 2.4145721547585426, "grad_norm": 3.161368139616355, "learning_rate": 1.9317496167651563e-06, "loss": 0.1386, "step": 17100 }, { "epoch": 2.4147133578085285, "grad_norm": 3.108719796029688, "learning_rate": 1.9308490782693346e-06, "loss": 0.1252, "step": 17101 }, { "epoch": 2.4148545608585144, "grad_norm": 3.3649233797629376, "learning_rate": 1.929948727297096e-06, "loss": 0.1401, "step": 17102 }, { "epoch": 2.4149957639085002, "grad_norm": 3.434812600037867, "learning_rate": 1.9290485638693613e-06, "loss": 0.1919, "step": 17103 }, { "epoch": 2.415136966958486, "grad_norm": 3.2043317664527056, "learning_rate": 1.928148588007055e-06, "loss": 0.1487, "step": 17104 }, { "epoch": 2.415278170008472, "grad_norm": 3.081611353945789, "learning_rate": 1.9272487997310894e-06, "loss": 0.1317, "step": 17105 }, { "epoch": 2.415419373058458, "grad_norm": 3.039104295489057, "learning_rate": 1.9263491990623763e-06, "loss": 0.1538, "step": 17106 }, { "epoch": 2.4155605761084438, "grad_norm": 3.5732256096084574, "learning_rate": 1.9254497860218223e-06, "loss": 0.1772, "step": 17107 }, { "epoch": 2.4157017791584297, "grad_norm": 2.949726696394914, "learning_rate": 1.9245505606303238e-06, "loss": 0.1342, "step": 17108 }, { "epoch": 2.4158429822084155, "grad_norm": 2.841419564913024, "learning_rate": 1.923651522908785e-06, "loss": 0.1519, "step": 17109 }, { "epoch": 2.4159841852584014, "grad_norm": 3.743750313422187, "learning_rate": 1.9227526728780978e-06, "loss": 0.1874, "step": 17110 }, { "epoch": 2.4161253883083873, "grad_norm": 3.311484642364792, "learning_rate": 1.921854010559149e-06, "loss": 0.1652, "step": 17111 }, { "epoch": 2.416266591358373, "grad_norm": 2.8730812416670988, "learning_rate": 1.920955535972825e-06, "loss": 0.1548, "step": 17112 }, { "epoch": 2.416407794408359, "grad_norm": 3.2598050132515777, "learning_rate": 1.920057249140005e-06, "loss": 0.1558, "step": 17113 }, { "epoch": 2.416548997458345, "grad_norm": 3.026824621128307, "learning_rate": 1.9191591500815642e-06, "loss": 0.1196, "step": 17114 }, { "epoch": 2.416690200508331, "grad_norm": 3.6923533187952513, "learning_rate": 1.918261238818374e-06, "loss": 0.2454, "step": 17115 }, { "epoch": 2.4168314035583167, "grad_norm": 4.198397236463667, "learning_rate": 1.9173635153713066e-06, "loss": 0.1814, "step": 17116 }, { "epoch": 2.4169726066083026, "grad_norm": 3.2772135437708214, "learning_rate": 1.9164659797612172e-06, "loss": 0.1458, "step": 17117 }, { "epoch": 2.4171138096582885, "grad_norm": 2.9609600675098666, "learning_rate": 1.9155686320089684e-06, "loss": 0.1404, "step": 17118 }, { "epoch": 2.4172550127082744, "grad_norm": 3.1270771096290098, "learning_rate": 1.914671472135413e-06, "loss": 0.1743, "step": 17119 }, { "epoch": 2.4173962157582602, "grad_norm": 3.3562446705929916, "learning_rate": 1.9137745001613984e-06, "loss": 0.1455, "step": 17120 }, { "epoch": 2.417537418808246, "grad_norm": 3.4678363886607784, "learning_rate": 1.9128777161077748e-06, "loss": 0.1367, "step": 17121 }, { "epoch": 2.417678621858232, "grad_norm": 3.4798248466700668, "learning_rate": 1.911981119995381e-06, "loss": 0.1856, "step": 17122 }, { "epoch": 2.417819824908218, "grad_norm": 3.702778791225187, "learning_rate": 1.9110847118450517e-06, "loss": 0.1699, "step": 17123 }, { "epoch": 2.417961027958204, "grad_norm": 3.7119301335623858, "learning_rate": 1.9101884916776236e-06, "loss": 0.1786, "step": 17124 }, { "epoch": 2.4181022310081897, "grad_norm": 3.4426962714187574, "learning_rate": 1.909292459513916e-06, "loss": 0.1577, "step": 17125 }, { "epoch": 2.4182434340581755, "grad_norm": 2.871508633446945, "learning_rate": 1.9083966153747603e-06, "loss": 0.1141, "step": 17126 }, { "epoch": 2.4183846371081614, "grad_norm": 3.161242825909235, "learning_rate": 1.9075009592809733e-06, "loss": 0.191, "step": 17127 }, { "epoch": 2.4185258401581473, "grad_norm": 3.739364537922266, "learning_rate": 1.9066054912533683e-06, "loss": 0.1453, "step": 17128 }, { "epoch": 2.418667043208133, "grad_norm": 2.5073187509531776, "learning_rate": 1.9057102113127569e-06, "loss": 0.1349, "step": 17129 }, { "epoch": 2.418808246258119, "grad_norm": 2.8797024752121247, "learning_rate": 1.9048151194799435e-06, "loss": 0.1577, "step": 17130 }, { "epoch": 2.418949449308105, "grad_norm": 3.1822994979636228, "learning_rate": 1.9039202157757319e-06, "loss": 0.1199, "step": 17131 }, { "epoch": 2.419090652358091, "grad_norm": 3.3128047887305034, "learning_rate": 1.9030255002209153e-06, "loss": 0.1648, "step": 17132 }, { "epoch": 2.4192318554080767, "grad_norm": 3.2638085017855207, "learning_rate": 1.9021309728362935e-06, "loss": 0.1718, "step": 17133 }, { "epoch": 2.4193730584580626, "grad_norm": 3.7715551129188896, "learning_rate": 1.901236633642649e-06, "loss": 0.2045, "step": 17134 }, { "epoch": 2.4195142615080485, "grad_norm": 4.914970269029538, "learning_rate": 1.9003424826607674e-06, "loss": 0.2414, "step": 17135 }, { "epoch": 2.4196554645580344, "grad_norm": 2.985415403410023, "learning_rate": 1.8994485199114287e-06, "loss": 0.1275, "step": 17136 }, { "epoch": 2.4197966676080203, "grad_norm": 3.0417998742362067, "learning_rate": 1.8985547454154053e-06, "loss": 0.149, "step": 17137 }, { "epoch": 2.419937870658006, "grad_norm": 2.8185543891681935, "learning_rate": 1.8976611591934734e-06, "loss": 0.1447, "step": 17138 }, { "epoch": 2.420079073707992, "grad_norm": 2.9844557477744655, "learning_rate": 1.896767761266397e-06, "loss": 0.1214, "step": 17139 }, { "epoch": 2.420220276757978, "grad_norm": 2.911296703501799, "learning_rate": 1.8958745516549382e-06, "loss": 0.1188, "step": 17140 }, { "epoch": 2.420361479807964, "grad_norm": 3.715911646859128, "learning_rate": 1.8949815303798568e-06, "loss": 0.2046, "step": 17141 }, { "epoch": 2.4205026828579497, "grad_norm": 3.7613390802198903, "learning_rate": 1.8940886974619e-06, "loss": 0.1761, "step": 17142 }, { "epoch": 2.4206438859079356, "grad_norm": 3.4428215924109073, "learning_rate": 1.893196052921824e-06, "loss": 0.1546, "step": 17143 }, { "epoch": 2.4207850889579214, "grad_norm": 2.8501013530884345, "learning_rate": 1.8923035967803704e-06, "loss": 0.1305, "step": 17144 }, { "epoch": 2.4209262920079073, "grad_norm": 3.4759455127356294, "learning_rate": 1.8914113290582802e-06, "loss": 0.1929, "step": 17145 }, { "epoch": 2.421067495057893, "grad_norm": 3.5197958051190565, "learning_rate": 1.8905192497762881e-06, "loss": 0.1517, "step": 17146 }, { "epoch": 2.421208698107879, "grad_norm": 3.607400995226715, "learning_rate": 1.8896273589551273e-06, "loss": 0.1757, "step": 17147 }, { "epoch": 2.421349901157865, "grad_norm": 3.821816259638706, "learning_rate": 1.8887356566155236e-06, "loss": 0.2168, "step": 17148 }, { "epoch": 2.421491104207851, "grad_norm": 2.4728836991684733, "learning_rate": 1.8878441427782e-06, "loss": 0.1093, "step": 17149 }, { "epoch": 2.4216323072578367, "grad_norm": 3.1333700417486465, "learning_rate": 1.8869528174638752e-06, "loss": 0.1928, "step": 17150 }, { "epoch": 2.4217735103078226, "grad_norm": 3.6382218282718224, "learning_rate": 1.886061680693263e-06, "loss": 0.1858, "step": 17151 }, { "epoch": 2.4219147133578085, "grad_norm": 2.8740903665219872, "learning_rate": 1.885170732487074e-06, "loss": 0.1188, "step": 17152 }, { "epoch": 2.4220559164077944, "grad_norm": 2.8416557016412054, "learning_rate": 1.884279972866012e-06, "loss": 0.1294, "step": 17153 }, { "epoch": 2.4221971194577803, "grad_norm": 3.140858070227539, "learning_rate": 1.8833894018507758e-06, "loss": 0.149, "step": 17154 }, { "epoch": 2.422338322507766, "grad_norm": 2.968625187251119, "learning_rate": 1.8824990194620674e-06, "loss": 0.1491, "step": 17155 }, { "epoch": 2.422479525557752, "grad_norm": 3.7300800817213235, "learning_rate": 1.8816088257205767e-06, "loss": 0.1601, "step": 17156 }, { "epoch": 2.422620728607738, "grad_norm": 2.8000697752699897, "learning_rate": 1.8807188206469906e-06, "loss": 0.1014, "step": 17157 }, { "epoch": 2.422761931657724, "grad_norm": 3.3486173012522142, "learning_rate": 1.8798290042619949e-06, "loss": 0.1849, "step": 17158 }, { "epoch": 2.4229031347077097, "grad_norm": 2.46996074011264, "learning_rate": 1.8789393765862608e-06, "loss": 0.1152, "step": 17159 }, { "epoch": 2.4230443377576956, "grad_norm": 2.38141861270557, "learning_rate": 1.8780499376404715e-06, "loss": 0.1094, "step": 17160 }, { "epoch": 2.4231855408076814, "grad_norm": 3.7383386722199616, "learning_rate": 1.8771606874452941e-06, "loss": 0.1864, "step": 17161 }, { "epoch": 2.4233267438576673, "grad_norm": 3.073584992793781, "learning_rate": 1.8762716260213943e-06, "loss": 0.1672, "step": 17162 }, { "epoch": 2.423467946907653, "grad_norm": 3.5871842393548325, "learning_rate": 1.875382753389433e-06, "loss": 0.1584, "step": 17163 }, { "epoch": 2.423609149957639, "grad_norm": 3.7824919317541825, "learning_rate": 1.8744940695700686e-06, "loss": 0.1532, "step": 17164 }, { "epoch": 2.423750353007625, "grad_norm": 3.540866669000487, "learning_rate": 1.8736055745839522e-06, "loss": 0.1705, "step": 17165 }, { "epoch": 2.423891556057611, "grad_norm": 3.396740317461732, "learning_rate": 1.8727172684517325e-06, "loss": 0.1752, "step": 17166 }, { "epoch": 2.4240327591075967, "grad_norm": 3.528360243051571, "learning_rate": 1.8718291511940546e-06, "loss": 0.1633, "step": 17167 }, { "epoch": 2.4241739621575826, "grad_norm": 2.9751861975393497, "learning_rate": 1.870941222831556e-06, "loss": 0.1357, "step": 17168 }, { "epoch": 2.4243151652075685, "grad_norm": 3.494655786736849, "learning_rate": 1.8700534833848738e-06, "loss": 0.1779, "step": 17169 }, { "epoch": 2.4244563682575544, "grad_norm": 3.7257536354798173, "learning_rate": 1.869165932874636e-06, "loss": 0.1834, "step": 17170 }, { "epoch": 2.4245975713075403, "grad_norm": 3.4743758114180783, "learning_rate": 1.868278571321469e-06, "loss": 0.1641, "step": 17171 }, { "epoch": 2.424738774357526, "grad_norm": 3.074647482287495, "learning_rate": 1.867391398745999e-06, "loss": 0.1631, "step": 17172 }, { "epoch": 2.424879977407512, "grad_norm": 3.2984405157056957, "learning_rate": 1.8665044151688404e-06, "loss": 0.1569, "step": 17173 }, { "epoch": 2.425021180457498, "grad_norm": 4.5352064234196705, "learning_rate": 1.8656176206106059e-06, "loss": 0.1797, "step": 17174 }, { "epoch": 2.425162383507484, "grad_norm": 2.689618237622237, "learning_rate": 1.8647310150919084e-06, "loss": 0.1009, "step": 17175 }, { "epoch": 2.4253035865574697, "grad_norm": 3.107030592597233, "learning_rate": 1.863844598633343e-06, "loss": 0.1751, "step": 17176 }, { "epoch": 2.4254447896074556, "grad_norm": 4.6619338439193845, "learning_rate": 1.8629583712555188e-06, "loss": 0.2095, "step": 17177 }, { "epoch": 2.4255859926574415, "grad_norm": 3.969409285522405, "learning_rate": 1.8620723329790269e-06, "loss": 0.1987, "step": 17178 }, { "epoch": 2.4257271957074273, "grad_norm": 2.8290457213653912, "learning_rate": 1.8611864838244598e-06, "loss": 0.1511, "step": 17179 }, { "epoch": 2.4258683987574132, "grad_norm": 2.999653985981247, "learning_rate": 1.8603008238124043e-06, "loss": 0.1298, "step": 17180 }, { "epoch": 2.426009601807399, "grad_norm": 2.745271204131022, "learning_rate": 1.8594153529634417e-06, "loss": 0.1406, "step": 17181 }, { "epoch": 2.426150804857385, "grad_norm": 3.130626232421815, "learning_rate": 1.8585300712981514e-06, "loss": 0.1735, "step": 17182 }, { "epoch": 2.426292007907371, "grad_norm": 2.747524388058323, "learning_rate": 1.8576449788371065e-06, "loss": 0.1371, "step": 17183 }, { "epoch": 2.4264332109573568, "grad_norm": 4.413331653601289, "learning_rate": 1.8567600756008753e-06, "loss": 0.204, "step": 17184 }, { "epoch": 2.4265744140073426, "grad_norm": 3.3118489707059426, "learning_rate": 1.8558753616100223e-06, "loss": 0.1538, "step": 17185 }, { "epoch": 2.4267156170573285, "grad_norm": 3.1554393015322715, "learning_rate": 1.8549908368851099e-06, "loss": 0.1634, "step": 17186 }, { "epoch": 2.4268568201073144, "grad_norm": 2.913859310193168, "learning_rate": 1.854106501446693e-06, "loss": 0.1549, "step": 17187 }, { "epoch": 2.4269980231573003, "grad_norm": 3.0406136463002915, "learning_rate": 1.8532223553153194e-06, "loss": 0.1603, "step": 17188 }, { "epoch": 2.427139226207286, "grad_norm": 2.9229674863759323, "learning_rate": 1.852338398511544e-06, "loss": 0.1276, "step": 17189 }, { "epoch": 2.427280429257272, "grad_norm": 3.8550741383075775, "learning_rate": 1.8514546310559044e-06, "loss": 0.1372, "step": 17190 }, { "epoch": 2.427421632307258, "grad_norm": 3.2322720814740085, "learning_rate": 1.8505710529689402e-06, "loss": 0.1533, "step": 17191 }, { "epoch": 2.427562835357244, "grad_norm": 3.7059883903013, "learning_rate": 1.8496876642711882e-06, "loss": 0.1753, "step": 17192 }, { "epoch": 2.4277040384072297, "grad_norm": 3.492832784961619, "learning_rate": 1.8488044649831716e-06, "loss": 0.1969, "step": 17193 }, { "epoch": 2.4278452414572156, "grad_norm": 2.783881213349675, "learning_rate": 1.8479214551254176e-06, "loss": 0.1398, "step": 17194 }, { "epoch": 2.4279864445072015, "grad_norm": 4.034497505615193, "learning_rate": 1.84703863471845e-06, "loss": 0.2461, "step": 17195 }, { "epoch": 2.4281276475571874, "grad_norm": 3.468404042266877, "learning_rate": 1.8461560037827842e-06, "loss": 0.1881, "step": 17196 }, { "epoch": 2.4282688506071732, "grad_norm": 3.430727768869994, "learning_rate": 1.8452735623389317e-06, "loss": 0.1905, "step": 17197 }, { "epoch": 2.428410053657159, "grad_norm": 3.369458468775003, "learning_rate": 1.8443913104073984e-06, "loss": 0.1805, "step": 17198 }, { "epoch": 2.428551256707145, "grad_norm": 5.1839203163831895, "learning_rate": 1.8435092480086902e-06, "loss": 0.1594, "step": 17199 }, { "epoch": 2.428692459757131, "grad_norm": 2.8985353166412, "learning_rate": 1.842627375163305e-06, "loss": 0.128, "step": 17200 }, { "epoch": 2.4288336628071168, "grad_norm": 3.2380151516382125, "learning_rate": 1.8417456918917355e-06, "loss": 0.1574, "step": 17201 }, { "epoch": 2.4289748658571026, "grad_norm": 2.912280008433937, "learning_rate": 1.8408641982144738e-06, "loss": 0.1502, "step": 17202 }, { "epoch": 2.4291160689070885, "grad_norm": 3.0728797262641825, "learning_rate": 1.8399828941520036e-06, "loss": 0.138, "step": 17203 }, { "epoch": 2.4292572719570744, "grad_norm": 3.123077298687668, "learning_rate": 1.8391017797248079e-06, "loss": 0.1147, "step": 17204 }, { "epoch": 2.4293984750070603, "grad_norm": 2.6960118934154265, "learning_rate": 1.8382208549533609e-06, "loss": 0.1415, "step": 17205 }, { "epoch": 2.429539678057046, "grad_norm": 3.4733377429385985, "learning_rate": 1.8373401198581353e-06, "loss": 0.1561, "step": 17206 }, { "epoch": 2.429680881107032, "grad_norm": 2.664096796477448, "learning_rate": 1.8364595744596026e-06, "loss": 0.1304, "step": 17207 }, { "epoch": 2.429822084157018, "grad_norm": 2.821024016576151, "learning_rate": 1.8355792187782228e-06, "loss": 0.1181, "step": 17208 }, { "epoch": 2.429963287207004, "grad_norm": 3.456582404735825, "learning_rate": 1.8346990528344589e-06, "loss": 0.1669, "step": 17209 }, { "epoch": 2.4301044902569897, "grad_norm": 3.721668412541519, "learning_rate": 1.8338190766487606e-06, "loss": 0.151, "step": 17210 }, { "epoch": 2.4302456933069756, "grad_norm": 2.904562493566288, "learning_rate": 1.8329392902415777e-06, "loss": 0.1385, "step": 17211 }, { "epoch": 2.4303868963569615, "grad_norm": 2.718114041907725, "learning_rate": 1.8320596936333613e-06, "loss": 0.1397, "step": 17212 }, { "epoch": 2.4305280994069474, "grad_norm": 3.471337090174425, "learning_rate": 1.8311802868445494e-06, "loss": 0.159, "step": 17213 }, { "epoch": 2.4306693024569332, "grad_norm": 3.087029411894934, "learning_rate": 1.8303010698955803e-06, "loss": 0.1577, "step": 17214 }, { "epoch": 2.430810505506919, "grad_norm": 2.631387592854345, "learning_rate": 1.8294220428068865e-06, "loss": 0.119, "step": 17215 }, { "epoch": 2.430951708556905, "grad_norm": 3.0708274456448463, "learning_rate": 1.828543205598895e-06, "loss": 0.1194, "step": 17216 }, { "epoch": 2.431092911606891, "grad_norm": 3.8797698660103297, "learning_rate": 1.8276645582920306e-06, "loss": 0.2223, "step": 17217 }, { "epoch": 2.4312341146568768, "grad_norm": 4.0951883884088245, "learning_rate": 1.8267861009067124e-06, "loss": 0.1936, "step": 17218 }, { "epoch": 2.4313753177068627, "grad_norm": 3.8315254373504364, "learning_rate": 1.8259078334633561e-06, "loss": 0.1669, "step": 17219 }, { "epoch": 2.4315165207568485, "grad_norm": 2.7585148019647674, "learning_rate": 1.8250297559823716e-06, "loss": 0.1294, "step": 17220 }, { "epoch": 2.4316577238068344, "grad_norm": 3.00863625472103, "learning_rate": 1.8241518684841642e-06, "loss": 0.1505, "step": 17221 }, { "epoch": 2.4317989268568203, "grad_norm": 3.5954465939841986, "learning_rate": 1.8232741709891376e-06, "loss": 0.1718, "step": 17222 }, { "epoch": 2.431940129906806, "grad_norm": 3.218723743168936, "learning_rate": 1.822396663517685e-06, "loss": 0.1546, "step": 17223 }, { "epoch": 2.432081332956792, "grad_norm": 3.4682573013106293, "learning_rate": 1.8215193460902047e-06, "loss": 0.1348, "step": 17224 }, { "epoch": 2.432222536006778, "grad_norm": 3.5527284163116355, "learning_rate": 1.8206422187270823e-06, "loss": 0.1472, "step": 17225 }, { "epoch": 2.432363739056764, "grad_norm": 2.728546326426804, "learning_rate": 1.8197652814487054e-06, "loss": 0.1324, "step": 17226 }, { "epoch": 2.4325049421067497, "grad_norm": 2.5518412629950653, "learning_rate": 1.8188885342754481e-06, "loss": 0.1211, "step": 17227 }, { "epoch": 2.4326461451567356, "grad_norm": 3.32274652476081, "learning_rate": 1.818011977227686e-06, "loss": 0.1583, "step": 17228 }, { "epoch": 2.4327873482067215, "grad_norm": 3.1013064180302736, "learning_rate": 1.8171356103257942e-06, "loss": 0.1183, "step": 17229 }, { "epoch": 2.4329285512567074, "grad_norm": 3.43214513386801, "learning_rate": 1.8162594335901363e-06, "loss": 0.1508, "step": 17230 }, { "epoch": 2.4330697543066933, "grad_norm": 3.351763724303904, "learning_rate": 1.8153834470410758e-06, "loss": 0.1589, "step": 17231 }, { "epoch": 2.433210957356679, "grad_norm": 3.337588406520045, "learning_rate": 1.814507650698969e-06, "loss": 0.1429, "step": 17232 }, { "epoch": 2.433352160406665, "grad_norm": 3.6443654478298524, "learning_rate": 1.8136320445841693e-06, "loss": 0.1691, "step": 17233 }, { "epoch": 2.433493363456651, "grad_norm": 3.1130296443172876, "learning_rate": 1.812756628717025e-06, "loss": 0.1461, "step": 17234 }, { "epoch": 2.433634566506637, "grad_norm": 3.331029963570307, "learning_rate": 1.8118814031178822e-06, "loss": 0.09, "step": 17235 }, { "epoch": 2.4337757695566227, "grad_norm": 3.222937538724706, "learning_rate": 1.8110063678070778e-06, "loss": 0.1539, "step": 17236 }, { "epoch": 2.433916972606608, "grad_norm": 3.3823771127924647, "learning_rate": 1.8101315228049498e-06, "loss": 0.174, "step": 17237 }, { "epoch": 2.434058175656594, "grad_norm": 2.917301796009687, "learning_rate": 1.809256868131828e-06, "loss": 0.1389, "step": 17238 }, { "epoch": 2.43419937870658, "grad_norm": 3.0951001755624943, "learning_rate": 1.80838240380804e-06, "loss": 0.137, "step": 17239 }, { "epoch": 2.4343405817565658, "grad_norm": 2.547149511365408, "learning_rate": 1.8075081298539032e-06, "loss": 0.138, "step": 17240 }, { "epoch": 2.4344817848065516, "grad_norm": 2.6507846007217126, "learning_rate": 1.8066340462897435e-06, "loss": 0.1288, "step": 17241 }, { "epoch": 2.4346229878565375, "grad_norm": 3.479158426646564, "learning_rate": 1.8057601531358693e-06, "loss": 0.1835, "step": 17242 }, { "epoch": 2.4347641909065234, "grad_norm": 2.520138431855701, "learning_rate": 1.804886450412593e-06, "loss": 0.102, "step": 17243 }, { "epoch": 2.4349053939565093, "grad_norm": 3.116229566631311, "learning_rate": 1.8040129381402137e-06, "loss": 0.161, "step": 17244 }, { "epoch": 2.435046597006495, "grad_norm": 2.6524300149918334, "learning_rate": 1.8031396163390314e-06, "loss": 0.1146, "step": 17245 }, { "epoch": 2.435187800056481, "grad_norm": 3.0850634330095303, "learning_rate": 1.802266485029347e-06, "loss": 0.1238, "step": 17246 }, { "epoch": 2.435329003106467, "grad_norm": 2.738934147477456, "learning_rate": 1.8013935442314502e-06, "loss": 0.1182, "step": 17247 }, { "epoch": 2.435470206156453, "grad_norm": 2.9747631240238106, "learning_rate": 1.8005207939656255e-06, "loss": 0.1516, "step": 17248 }, { "epoch": 2.4356114092064387, "grad_norm": 3.236784471974631, "learning_rate": 1.799648234252157e-06, "loss": 0.15, "step": 17249 }, { "epoch": 2.4357526122564246, "grad_norm": 3.5388537161929357, "learning_rate": 1.7987758651113218e-06, "loss": 0.208, "step": 17250 }, { "epoch": 2.4358938153064105, "grad_norm": 3.1778392936440896, "learning_rate": 1.7979036865633949e-06, "loss": 0.1536, "step": 17251 }, { "epoch": 2.4360350183563964, "grad_norm": 3.280262454234508, "learning_rate": 1.797031698628643e-06, "loss": 0.1665, "step": 17252 }, { "epoch": 2.4361762214063822, "grad_norm": 2.9633046066969797, "learning_rate": 1.7961599013273312e-06, "loss": 0.1669, "step": 17253 }, { "epoch": 2.436317424456368, "grad_norm": 3.376633298419689, "learning_rate": 1.7952882946797212e-06, "loss": 0.1579, "step": 17254 }, { "epoch": 2.436458627506354, "grad_norm": 3.540608445739577, "learning_rate": 1.7944168787060678e-06, "loss": 0.1841, "step": 17255 }, { "epoch": 2.43659983055634, "grad_norm": 3.6602989971620845, "learning_rate": 1.7935456534266227e-06, "loss": 0.1215, "step": 17256 }, { "epoch": 2.4367410336063258, "grad_norm": 2.9951540927230482, "learning_rate": 1.7926746188616295e-06, "loss": 0.1636, "step": 17257 }, { "epoch": 2.4368822366563117, "grad_norm": 3.20566298717064, "learning_rate": 1.7918037750313366e-06, "loss": 0.1127, "step": 17258 }, { "epoch": 2.4370234397062975, "grad_norm": 4.312635640100855, "learning_rate": 1.7909331219559788e-06, "loss": 0.1909, "step": 17259 }, { "epoch": 2.4371646427562834, "grad_norm": 3.4937181498339553, "learning_rate": 1.7900626596557924e-06, "loss": 0.1574, "step": 17260 }, { "epoch": 2.4373058458062693, "grad_norm": 3.594099875363766, "learning_rate": 1.7891923881510021e-06, "loss": 0.141, "step": 17261 }, { "epoch": 2.437447048856255, "grad_norm": 3.476415690439647, "learning_rate": 1.7883223074618316e-06, "loss": 0.1913, "step": 17262 }, { "epoch": 2.437588251906241, "grad_norm": 2.8753806750818494, "learning_rate": 1.7874524176085073e-06, "loss": 0.1406, "step": 17263 }, { "epoch": 2.437729454956227, "grad_norm": 3.669772299737988, "learning_rate": 1.7865827186112429e-06, "loss": 0.1502, "step": 17264 }, { "epoch": 2.437870658006213, "grad_norm": 3.283253555907916, "learning_rate": 1.7857132104902474e-06, "loss": 0.1391, "step": 17265 }, { "epoch": 2.4380118610561987, "grad_norm": 3.789568305936943, "learning_rate": 1.7848438932657309e-06, "loss": 0.1876, "step": 17266 }, { "epoch": 2.4381530641061846, "grad_norm": 3.2811100720823476, "learning_rate": 1.7839747669578932e-06, "loss": 0.1555, "step": 17267 }, { "epoch": 2.4382942671561705, "grad_norm": 2.9257847495670366, "learning_rate": 1.7831058315869343e-06, "loss": 0.1026, "step": 17268 }, { "epoch": 2.4384354702061564, "grad_norm": 3.1288573534989403, "learning_rate": 1.7822370871730465e-06, "loss": 0.1329, "step": 17269 }, { "epoch": 2.4385766732561422, "grad_norm": 2.6235434089535663, "learning_rate": 1.7813685337364205e-06, "loss": 0.1124, "step": 17270 }, { "epoch": 2.438717876306128, "grad_norm": 2.8432602347551716, "learning_rate": 1.7805001712972415e-06, "loss": 0.1645, "step": 17271 }, { "epoch": 2.438859079356114, "grad_norm": 3.3519059676668106, "learning_rate": 1.7796319998756872e-06, "loss": 0.1204, "step": 17272 }, { "epoch": 2.4390002824061, "grad_norm": 2.780088035758387, "learning_rate": 1.7787640194919354e-06, "loss": 0.1148, "step": 17273 }, { "epoch": 2.439141485456086, "grad_norm": 3.0285458365226745, "learning_rate": 1.777896230166155e-06, "loss": 0.1228, "step": 17274 }, { "epoch": 2.4392826885060717, "grad_norm": 2.8521131343331687, "learning_rate": 1.7770286319185182e-06, "loss": 0.1074, "step": 17275 }, { "epoch": 2.4394238915560575, "grad_norm": 3.1365560098363408, "learning_rate": 1.776161224769185e-06, "loss": 0.1524, "step": 17276 }, { "epoch": 2.4395650946060434, "grad_norm": 3.0548683092830284, "learning_rate": 1.7752940087383153e-06, "loss": 0.141, "step": 17277 }, { "epoch": 2.4397062976560293, "grad_norm": 3.0023053706765404, "learning_rate": 1.774426983846058e-06, "loss": 0.1253, "step": 17278 }, { "epoch": 2.439847500706015, "grad_norm": 2.870947198062366, "learning_rate": 1.7735601501125632e-06, "loss": 0.1358, "step": 17279 }, { "epoch": 2.439988703756001, "grad_norm": 3.3658243695686645, "learning_rate": 1.7726935075579798e-06, "loss": 0.1525, "step": 17280 }, { "epoch": 2.440129906805987, "grad_norm": 3.2900338703543555, "learning_rate": 1.7718270562024464e-06, "loss": 0.1654, "step": 17281 }, { "epoch": 2.440271109855973, "grad_norm": 2.98088213777779, "learning_rate": 1.770960796066099e-06, "loss": 0.1282, "step": 17282 }, { "epoch": 2.4404123129059587, "grad_norm": 3.2657632121822813, "learning_rate": 1.7700947271690693e-06, "loss": 0.1813, "step": 17283 }, { "epoch": 2.4405535159559446, "grad_norm": 3.6475264514872676, "learning_rate": 1.7692288495314836e-06, "loss": 0.1353, "step": 17284 }, { "epoch": 2.4406947190059305, "grad_norm": 3.0980042599562228, "learning_rate": 1.7683631631734643e-06, "loss": 0.1562, "step": 17285 }, { "epoch": 2.4408359220559164, "grad_norm": 2.5788459697367854, "learning_rate": 1.7674976681151302e-06, "loss": 0.1092, "step": 17286 }, { "epoch": 2.4409771251059023, "grad_norm": 3.2534962048122256, "learning_rate": 1.7666323643765947e-06, "loss": 0.1337, "step": 17287 }, { "epoch": 2.441118328155888, "grad_norm": 2.975086146586491, "learning_rate": 1.765767251977969e-06, "loss": 0.1562, "step": 17288 }, { "epoch": 2.441259531205874, "grad_norm": 3.4665949999230183, "learning_rate": 1.7649023309393543e-06, "loss": 0.1439, "step": 17289 }, { "epoch": 2.44140073425586, "grad_norm": 3.0987555210675866, "learning_rate": 1.7640376012808536e-06, "loss": 0.1311, "step": 17290 }, { "epoch": 2.441541937305846, "grad_norm": 3.3529874140203204, "learning_rate": 1.7631730630225609e-06, "loss": 0.1617, "step": 17291 }, { "epoch": 2.4416831403558317, "grad_norm": 2.231268159980494, "learning_rate": 1.7623087161845699e-06, "loss": 0.0914, "step": 17292 }, { "epoch": 2.4418243434058176, "grad_norm": 3.708674345259756, "learning_rate": 1.7614445607869667e-06, "loss": 0.1833, "step": 17293 }, { "epoch": 2.4419655464558034, "grad_norm": 3.7619961130611914, "learning_rate": 1.760580596849838e-06, "loss": 0.2044, "step": 17294 }, { "epoch": 2.4421067495057893, "grad_norm": 2.81402149234809, "learning_rate": 1.7597168243932538e-06, "loss": 0.1158, "step": 17295 }, { "epoch": 2.442247952555775, "grad_norm": 2.6094826557838826, "learning_rate": 1.758853243437293e-06, "loss": 0.1044, "step": 17296 }, { "epoch": 2.442389155605761, "grad_norm": 3.5392231981004905, "learning_rate": 1.7579898540020202e-06, "loss": 0.174, "step": 17297 }, { "epoch": 2.442530358655747, "grad_norm": 3.281645220392604, "learning_rate": 1.7571266561075073e-06, "loss": 0.1522, "step": 17298 }, { "epoch": 2.442671561705733, "grad_norm": 3.5545665440754473, "learning_rate": 1.7562636497738105e-06, "loss": 0.1578, "step": 17299 }, { "epoch": 2.4428127647557187, "grad_norm": 3.6124390028532765, "learning_rate": 1.7554008350209862e-06, "loss": 0.1782, "step": 17300 }, { "epoch": 2.4429539678057046, "grad_norm": 3.376967388581666, "learning_rate": 1.7545382118690868e-06, "loss": 0.1737, "step": 17301 }, { "epoch": 2.4430951708556905, "grad_norm": 2.396380928461803, "learning_rate": 1.7536757803381576e-06, "loss": 0.1269, "step": 17302 }, { "epoch": 2.4432363739056764, "grad_norm": 4.015094003367481, "learning_rate": 1.7528135404482415e-06, "loss": 0.1909, "step": 17303 }, { "epoch": 2.4433775769556623, "grad_norm": 3.7380352650518804, "learning_rate": 1.751951492219378e-06, "loss": 0.1812, "step": 17304 }, { "epoch": 2.443518780005648, "grad_norm": 3.3925321512326114, "learning_rate": 1.751089635671599e-06, "loss": 0.1324, "step": 17305 }, { "epoch": 2.443659983055634, "grad_norm": 3.499209158994362, "learning_rate": 1.750227970824936e-06, "loss": 0.1801, "step": 17306 }, { "epoch": 2.44380118610562, "grad_norm": 2.6418144982835408, "learning_rate": 1.7493664976994106e-06, "loss": 0.1011, "step": 17307 }, { "epoch": 2.443942389155606, "grad_norm": 3.986539252847985, "learning_rate": 1.7485052163150452e-06, "loss": 0.1941, "step": 17308 }, { "epoch": 2.4440835922055917, "grad_norm": 3.414155276357219, "learning_rate": 1.747644126691852e-06, "loss": 0.1503, "step": 17309 }, { "epoch": 2.4442247952555776, "grad_norm": 3.3628508066069673, "learning_rate": 1.746783228849851e-06, "loss": 0.1258, "step": 17310 }, { "epoch": 2.4443659983055634, "grad_norm": 2.8227059519542173, "learning_rate": 1.7459225228090404e-06, "loss": 0.1205, "step": 17311 }, { "epoch": 2.4445072013555493, "grad_norm": 2.559181681041187, "learning_rate": 1.7450620085894255e-06, "loss": 0.1121, "step": 17312 }, { "epoch": 2.444648404405535, "grad_norm": 3.1930315449931634, "learning_rate": 1.7442016862110056e-06, "loss": 0.1445, "step": 17313 }, { "epoch": 2.444789607455521, "grad_norm": 2.9633648016624665, "learning_rate": 1.7433415556937693e-06, "loss": 0.1164, "step": 17314 }, { "epoch": 2.444930810505507, "grad_norm": 3.062864372879908, "learning_rate": 1.742481617057713e-06, "loss": 0.1084, "step": 17315 }, { "epoch": 2.445072013555493, "grad_norm": 3.0900978306649876, "learning_rate": 1.741621870322817e-06, "loss": 0.1306, "step": 17316 }, { "epoch": 2.4452132166054787, "grad_norm": 3.743581038086917, "learning_rate": 1.7407623155090635e-06, "loss": 0.187, "step": 17317 }, { "epoch": 2.4453544196554646, "grad_norm": 3.3986200457026, "learning_rate": 1.7399029526364254e-06, "loss": 0.1624, "step": 17318 }, { "epoch": 2.4454956227054505, "grad_norm": 3.4400475745889585, "learning_rate": 1.7390437817248763e-06, "loss": 0.1446, "step": 17319 }, { "epoch": 2.4456368257554364, "grad_norm": 3.7858254244640084, "learning_rate": 1.7381848027943815e-06, "loss": 0.1852, "step": 17320 }, { "epoch": 2.4457780288054223, "grad_norm": 2.746608018793526, "learning_rate": 1.7373260158649042e-06, "loss": 0.1463, "step": 17321 }, { "epoch": 2.445919231855408, "grad_norm": 4.19525552710215, "learning_rate": 1.7364674209564025e-06, "loss": 0.1893, "step": 17322 }, { "epoch": 2.446060434905394, "grad_norm": 3.0730638725818897, "learning_rate": 1.7356090180888286e-06, "loss": 0.1006, "step": 17323 }, { "epoch": 2.44620163795538, "grad_norm": 3.5254080261844942, "learning_rate": 1.7347508072821317e-06, "loss": 0.133, "step": 17324 }, { "epoch": 2.446342841005366, "grad_norm": 3.7564256367011954, "learning_rate": 1.7338927885562573e-06, "loss": 0.1798, "step": 17325 }, { "epoch": 2.4464840440553517, "grad_norm": 3.541625572551512, "learning_rate": 1.7330349619311415e-06, "loss": 0.136, "step": 17326 }, { "epoch": 2.4466252471053376, "grad_norm": 4.373165131446355, "learning_rate": 1.7321773274267284e-06, "loss": 0.178, "step": 17327 }, { "epoch": 2.4467664501553235, "grad_norm": 3.4552968595193008, "learning_rate": 1.731319885062941e-06, "loss": 0.1643, "step": 17328 }, { "epoch": 2.4469076532053093, "grad_norm": 3.365486453386146, "learning_rate": 1.7304626348597075e-06, "loss": 0.169, "step": 17329 }, { "epoch": 2.4470488562552952, "grad_norm": 3.837009608908741, "learning_rate": 1.7296055768369524e-06, "loss": 0.1628, "step": 17330 }, { "epoch": 2.447190059305281, "grad_norm": 2.460743028036989, "learning_rate": 1.7287487110145896e-06, "loss": 0.0942, "step": 17331 }, { "epoch": 2.447331262355267, "grad_norm": 2.9450621809494755, "learning_rate": 1.7278920374125362e-06, "loss": 0.1618, "step": 17332 }, { "epoch": 2.447472465405253, "grad_norm": 2.6230917997613643, "learning_rate": 1.7270355560506991e-06, "loss": 0.149, "step": 17333 }, { "epoch": 2.4476136684552388, "grad_norm": 3.1794081212748866, "learning_rate": 1.726179266948984e-06, "loss": 0.1726, "step": 17334 }, { "epoch": 2.4477548715052246, "grad_norm": 3.8040983387830605, "learning_rate": 1.7253231701272887e-06, "loss": 0.1858, "step": 17335 }, { "epoch": 2.4478960745552105, "grad_norm": 2.630983220948977, "learning_rate": 1.7244672656055105e-06, "loss": 0.0998, "step": 17336 }, { "epoch": 2.4480372776051964, "grad_norm": 3.817498410196405, "learning_rate": 1.7236115534035381e-06, "loss": 0.1901, "step": 17337 }, { "epoch": 2.4481784806551823, "grad_norm": 2.752501333448148, "learning_rate": 1.7227560335412597e-06, "loss": 0.1228, "step": 17338 }, { "epoch": 2.448319683705168, "grad_norm": 3.687596708631765, "learning_rate": 1.721900706038555e-06, "loss": 0.1759, "step": 17339 }, { "epoch": 2.448460886755154, "grad_norm": 2.8532413124648994, "learning_rate": 1.721045570915304e-06, "loss": 0.1182, "step": 17340 }, { "epoch": 2.44860208980514, "grad_norm": 3.061332123374655, "learning_rate": 1.7201906281913784e-06, "loss": 0.1355, "step": 17341 }, { "epoch": 2.448743292855126, "grad_norm": 3.0671755286243885, "learning_rate": 1.7193358778866464e-06, "loss": 0.1573, "step": 17342 }, { "epoch": 2.4488844959051117, "grad_norm": 3.1513989247936163, "learning_rate": 1.7184813200209704e-06, "loss": 0.1589, "step": 17343 }, { "epoch": 2.4490256989550976, "grad_norm": 3.686118025013961, "learning_rate": 1.7176269546142166e-06, "loss": 0.1826, "step": 17344 }, { "epoch": 2.4491669020050835, "grad_norm": 3.048702265504464, "learning_rate": 1.7167727816862333e-06, "loss": 0.118, "step": 17345 }, { "epoch": 2.4493081050550694, "grad_norm": 3.1665239453554648, "learning_rate": 1.715918801256874e-06, "loss": 0.1592, "step": 17346 }, { "epoch": 2.4494493081050552, "grad_norm": 3.3856624559500195, "learning_rate": 1.7150650133459835e-06, "loss": 0.1502, "step": 17347 }, { "epoch": 2.449590511155041, "grad_norm": 2.6074767780809345, "learning_rate": 1.7142114179734004e-06, "loss": 0.1553, "step": 17348 }, { "epoch": 2.449731714205027, "grad_norm": 3.98630562842716, "learning_rate": 1.7133580151589701e-06, "loss": 0.2099, "step": 17349 }, { "epoch": 2.449872917255013, "grad_norm": 3.8054557540188942, "learning_rate": 1.712504804922519e-06, "loss": 0.1779, "step": 17350 }, { "epoch": 2.4500141203049988, "grad_norm": 3.3041151327987324, "learning_rate": 1.7116517872838788e-06, "loss": 0.1367, "step": 17351 }, { "epoch": 2.4501553233549846, "grad_norm": 3.1424163518918946, "learning_rate": 1.7107989622628706e-06, "loss": 0.1524, "step": 17352 }, { "epoch": 2.4502965264049705, "grad_norm": 3.343362136161125, "learning_rate": 1.709946329879315e-06, "loss": 0.1242, "step": 17353 }, { "epoch": 2.4504377294549564, "grad_norm": 2.5968900350226694, "learning_rate": 1.7090938901530264e-06, "loss": 0.1208, "step": 17354 }, { "epoch": 2.4505789325049423, "grad_norm": 3.0575961656830377, "learning_rate": 1.708241643103815e-06, "loss": 0.128, "step": 17355 }, { "epoch": 2.450720135554928, "grad_norm": 3.2469591216091374, "learning_rate": 1.7073895887514869e-06, "loss": 0.1167, "step": 17356 }, { "epoch": 2.4508613386049136, "grad_norm": 3.7061651484360527, "learning_rate": 1.7065377271158434e-06, "loss": 0.1929, "step": 17357 }, { "epoch": 2.4510025416548995, "grad_norm": 3.9234229253414044, "learning_rate": 1.7056860582166823e-06, "loss": 0.1812, "step": 17358 }, { "epoch": 2.4511437447048854, "grad_norm": 3.036486746602459, "learning_rate": 1.7048345820737944e-06, "loss": 0.147, "step": 17359 }, { "epoch": 2.4512849477548713, "grad_norm": 3.2581255430568192, "learning_rate": 1.703983298706966e-06, "loss": 0.1531, "step": 17360 }, { "epoch": 2.451426150804857, "grad_norm": 4.022350787830313, "learning_rate": 1.703132208135988e-06, "loss": 0.1855, "step": 17361 }, { "epoch": 2.451567353854843, "grad_norm": 2.743292867619651, "learning_rate": 1.7022813103806324e-06, "loss": 0.1248, "step": 17362 }, { "epoch": 2.451708556904829, "grad_norm": 3.5101056139063993, "learning_rate": 1.7014306054606744e-06, "loss": 0.1722, "step": 17363 }, { "epoch": 2.451849759954815, "grad_norm": 3.2745120995800137, "learning_rate": 1.700580093395886e-06, "loss": 0.1512, "step": 17364 }, { "epoch": 2.4519909630048007, "grad_norm": 2.818151591381503, "learning_rate": 1.6997297742060293e-06, "loss": 0.1441, "step": 17365 }, { "epoch": 2.4521321660547866, "grad_norm": 3.148566655665294, "learning_rate": 1.6988796479108716e-06, "loss": 0.1411, "step": 17366 }, { "epoch": 2.4522733691047724, "grad_norm": 2.845156259539625, "learning_rate": 1.698029714530165e-06, "loss": 0.1481, "step": 17367 }, { "epoch": 2.4524145721547583, "grad_norm": 3.4530014071361252, "learning_rate": 1.6971799740836625e-06, "loss": 0.2038, "step": 17368 }, { "epoch": 2.452555775204744, "grad_norm": 2.4867796448882693, "learning_rate": 1.6963304265911106e-06, "loss": 0.1152, "step": 17369 }, { "epoch": 2.45269697825473, "grad_norm": 2.844120383077472, "learning_rate": 1.6954810720722581e-06, "loss": 0.125, "step": 17370 }, { "epoch": 2.452838181304716, "grad_norm": 4.15629160416572, "learning_rate": 1.6946319105468324e-06, "loss": 0.1947, "step": 17371 }, { "epoch": 2.452979384354702, "grad_norm": 3.0913270672018447, "learning_rate": 1.6937829420345775e-06, "loss": 0.1546, "step": 17372 }, { "epoch": 2.4531205874046877, "grad_norm": 3.504386294409317, "learning_rate": 1.6929341665552201e-06, "loss": 0.1456, "step": 17373 }, { "epoch": 2.4532617904546736, "grad_norm": 4.456315651321824, "learning_rate": 1.6920855841284844e-06, "loss": 0.214, "step": 17374 }, { "epoch": 2.4534029935046595, "grad_norm": 3.1676527952522533, "learning_rate": 1.6912371947740924e-06, "loss": 0.138, "step": 17375 }, { "epoch": 2.4535441965546454, "grad_norm": 3.1685429172347663, "learning_rate": 1.6903889985117594e-06, "loss": 0.1414, "step": 17376 }, { "epoch": 2.4536853996046313, "grad_norm": 3.142995656681546, "learning_rate": 1.6895409953611952e-06, "loss": 0.1471, "step": 17377 }, { "epoch": 2.453826602654617, "grad_norm": 3.2184639539462343, "learning_rate": 1.688693185342114e-06, "loss": 0.1742, "step": 17378 }, { "epoch": 2.453967805704603, "grad_norm": 2.8140775923100465, "learning_rate": 1.6878455684742113e-06, "loss": 0.1138, "step": 17379 }, { "epoch": 2.454109008754589, "grad_norm": 2.665841601174925, "learning_rate": 1.6869981447771876e-06, "loss": 0.1276, "step": 17380 }, { "epoch": 2.454250211804575, "grad_norm": 3.064872193522012, "learning_rate": 1.6861509142707376e-06, "loss": 0.1534, "step": 17381 }, { "epoch": 2.4543914148545607, "grad_norm": 3.199340274873859, "learning_rate": 1.6853038769745466e-06, "loss": 0.1438, "step": 17382 }, { "epoch": 2.4545326179045466, "grad_norm": 2.8324053774688305, "learning_rate": 1.6844570329083066e-06, "loss": 0.1394, "step": 17383 }, { "epoch": 2.4546738209545325, "grad_norm": 2.8735502244207094, "learning_rate": 1.6836103820916926e-06, "loss": 0.1288, "step": 17384 }, { "epoch": 2.4548150240045183, "grad_norm": 5.835297541494395, "learning_rate": 1.6827639245443817e-06, "loss": 0.1403, "step": 17385 }, { "epoch": 2.4549562270545042, "grad_norm": 3.1340875889961657, "learning_rate": 1.6819176602860454e-06, "loss": 0.1627, "step": 17386 }, { "epoch": 2.45509743010449, "grad_norm": 2.6592661536903, "learning_rate": 1.6810715893363539e-06, "loss": 0.1094, "step": 17387 }, { "epoch": 2.455238633154476, "grad_norm": 3.453254714824574, "learning_rate": 1.68022571171496e-06, "loss": 0.1702, "step": 17388 }, { "epoch": 2.455379836204462, "grad_norm": 3.482589259047617, "learning_rate": 1.6793800274415295e-06, "loss": 0.199, "step": 17389 }, { "epoch": 2.4555210392544478, "grad_norm": 3.234346344020309, "learning_rate": 1.6785345365357153e-06, "loss": 0.1439, "step": 17390 }, { "epoch": 2.4556622423044336, "grad_norm": 2.721238465395554, "learning_rate": 1.677689239017164e-06, "loss": 0.1068, "step": 17391 }, { "epoch": 2.4558034453544195, "grad_norm": 3.3437502318949375, "learning_rate": 1.6768441349055197e-06, "loss": 0.1422, "step": 17392 }, { "epoch": 2.4559446484044054, "grad_norm": 3.273245391922852, "learning_rate": 1.6759992242204247e-06, "loss": 0.135, "step": 17393 }, { "epoch": 2.4560858514543913, "grad_norm": 3.3621167729868, "learning_rate": 1.675154506981509e-06, "loss": 0.1809, "step": 17394 }, { "epoch": 2.456227054504377, "grad_norm": 2.740774260966187, "learning_rate": 1.674309983208413e-06, "loss": 0.1205, "step": 17395 }, { "epoch": 2.456368257554363, "grad_norm": 2.798120709908191, "learning_rate": 1.673465652920755e-06, "loss": 0.135, "step": 17396 }, { "epoch": 2.456509460604349, "grad_norm": 2.7445460921659293, "learning_rate": 1.6726215161381598e-06, "loss": 0.1076, "step": 17397 }, { "epoch": 2.456650663654335, "grad_norm": 3.2006709477414477, "learning_rate": 1.6717775728802432e-06, "loss": 0.148, "step": 17398 }, { "epoch": 2.4567918667043207, "grad_norm": 4.764041781983336, "learning_rate": 1.6709338231666194e-06, "loss": 0.1354, "step": 17399 }, { "epoch": 2.4569330697543066, "grad_norm": 3.523281515072695, "learning_rate": 1.670090267016895e-06, "loss": 0.1293, "step": 17400 }, { "epoch": 2.4570742728042925, "grad_norm": 3.4260206525199344, "learning_rate": 1.6692469044506765e-06, "loss": 0.2088, "step": 17401 }, { "epoch": 2.4572154758542784, "grad_norm": 2.927362873414556, "learning_rate": 1.6684037354875626e-06, "loss": 0.1731, "step": 17402 }, { "epoch": 2.4573566789042642, "grad_norm": 3.4462600511058668, "learning_rate": 1.6675607601471477e-06, "loss": 0.1698, "step": 17403 }, { "epoch": 2.45749788195425, "grad_norm": 3.5040878839990452, "learning_rate": 1.6667179784490251e-06, "loss": 0.1615, "step": 17404 }, { "epoch": 2.457639085004236, "grad_norm": 2.8401030768069924, "learning_rate": 1.6658753904127734e-06, "loss": 0.1334, "step": 17405 }, { "epoch": 2.457780288054222, "grad_norm": 2.412833641588675, "learning_rate": 1.6650329960579792e-06, "loss": 0.1113, "step": 17406 }, { "epoch": 2.4579214911042078, "grad_norm": 2.5075717106421602, "learning_rate": 1.6641907954042203e-06, "loss": 0.1132, "step": 17407 }, { "epoch": 2.4580626941541937, "grad_norm": 2.8115048756542302, "learning_rate": 1.6633487884710663e-06, "loss": 0.1251, "step": 17408 }, { "epoch": 2.4582038972041795, "grad_norm": 3.6483814761494178, "learning_rate": 1.6625069752780864e-06, "loss": 0.1161, "step": 17409 }, { "epoch": 2.4583451002541654, "grad_norm": 2.50306666117014, "learning_rate": 1.6616653558448437e-06, "loss": 0.1275, "step": 17410 }, { "epoch": 2.4584863033041513, "grad_norm": 2.753911871622546, "learning_rate": 1.660823930190897e-06, "loss": 0.1371, "step": 17411 }, { "epoch": 2.458627506354137, "grad_norm": 3.627864550231711, "learning_rate": 1.6599826983358002e-06, "loss": 0.1569, "step": 17412 }, { "epoch": 2.458768709404123, "grad_norm": 2.8687432752209454, "learning_rate": 1.659141660299105e-06, "loss": 0.1281, "step": 17413 }, { "epoch": 2.458909912454109, "grad_norm": 3.1283613024557155, "learning_rate": 1.6583008161003544e-06, "loss": 0.1517, "step": 17414 }, { "epoch": 2.459051115504095, "grad_norm": 3.597307663349181, "learning_rate": 1.6574601657590904e-06, "loss": 0.1568, "step": 17415 }, { "epoch": 2.4591923185540807, "grad_norm": 3.6533673444485966, "learning_rate": 1.65661970929485e-06, "loss": 0.1682, "step": 17416 }, { "epoch": 2.4593335216040666, "grad_norm": 2.758510221765672, "learning_rate": 1.6557794467271616e-06, "loss": 0.1207, "step": 17417 }, { "epoch": 2.4594747246540525, "grad_norm": 3.6437207311166606, "learning_rate": 1.6549393780755574e-06, "loss": 0.1728, "step": 17418 }, { "epoch": 2.4596159277040384, "grad_norm": 4.259301713743304, "learning_rate": 1.6540995033595587e-06, "loss": 0.218, "step": 17419 }, { "epoch": 2.4597571307540242, "grad_norm": 2.83964089028987, "learning_rate": 1.653259822598683e-06, "loss": 0.1267, "step": 17420 }, { "epoch": 2.45989833380401, "grad_norm": 3.024499319090894, "learning_rate": 1.652420335812447e-06, "loss": 0.1561, "step": 17421 }, { "epoch": 2.460039536853996, "grad_norm": 4.690077713236752, "learning_rate": 1.6515810430203516e-06, "loss": 0.2362, "step": 17422 }, { "epoch": 2.460180739903982, "grad_norm": 2.699324303351017, "learning_rate": 1.6507419442419103e-06, "loss": 0.1045, "step": 17423 }, { "epoch": 2.4603219429539678, "grad_norm": 3.2266939451434853, "learning_rate": 1.649903039496621e-06, "loss": 0.132, "step": 17424 }, { "epoch": 2.4604631460039537, "grad_norm": 2.48723441117315, "learning_rate": 1.6490643288039776e-06, "loss": 0.1094, "step": 17425 }, { "epoch": 2.4606043490539395, "grad_norm": 3.436449014421852, "learning_rate": 1.6482258121834737e-06, "loss": 0.1971, "step": 17426 }, { "epoch": 2.4607455521039254, "grad_norm": 2.2810347251519265, "learning_rate": 1.647387489654595e-06, "loss": 0.1235, "step": 17427 }, { "epoch": 2.4608867551539113, "grad_norm": 2.706304753585215, "learning_rate": 1.6465493612368233e-06, "loss": 0.1629, "step": 17428 }, { "epoch": 2.461027958203897, "grad_norm": 4.066951427566559, "learning_rate": 1.6457114269496378e-06, "loss": 0.1985, "step": 17429 }, { "epoch": 2.461169161253883, "grad_norm": 2.997228562186128, "learning_rate": 1.6448736868125093e-06, "loss": 0.1414, "step": 17430 }, { "epoch": 2.461310364303869, "grad_norm": 2.815938641093809, "learning_rate": 1.644036140844909e-06, "loss": 0.1428, "step": 17431 }, { "epoch": 2.461451567353855, "grad_norm": 2.4876020556754788, "learning_rate": 1.6431987890663004e-06, "loss": 0.1079, "step": 17432 }, { "epoch": 2.4615927704038407, "grad_norm": 3.0003607973990327, "learning_rate": 1.6423616314961421e-06, "loss": 0.157, "step": 17433 }, { "epoch": 2.4617339734538266, "grad_norm": 3.078283265163677, "learning_rate": 1.6415246681538887e-06, "loss": 0.1465, "step": 17434 }, { "epoch": 2.4618751765038125, "grad_norm": 3.60747833432939, "learning_rate": 1.6406878990589936e-06, "loss": 0.1763, "step": 17435 }, { "epoch": 2.4620163795537984, "grad_norm": 2.846443610800276, "learning_rate": 1.6398513242309022e-06, "loss": 0.1464, "step": 17436 }, { "epoch": 2.4621575826037843, "grad_norm": 3.5025215706468185, "learning_rate": 1.6390149436890556e-06, "loss": 0.1474, "step": 17437 }, { "epoch": 2.46229878565377, "grad_norm": 2.847339648816644, "learning_rate": 1.638178757452894e-06, "loss": 0.1251, "step": 17438 }, { "epoch": 2.462439988703756, "grad_norm": 3.620187607031835, "learning_rate": 1.6373427655418406e-06, "loss": 0.2069, "step": 17439 }, { "epoch": 2.462581191753742, "grad_norm": 2.309314331741859, "learning_rate": 1.6365069679753331e-06, "loss": 0.1044, "step": 17440 }, { "epoch": 2.462722394803728, "grad_norm": 3.9836255030289163, "learning_rate": 1.6356713647727917e-06, "loss": 0.133, "step": 17441 }, { "epoch": 2.4628635978537137, "grad_norm": 3.465757345704433, "learning_rate": 1.6348359559536353e-06, "loss": 0.1477, "step": 17442 }, { "epoch": 2.4630048009036996, "grad_norm": 2.8996930796735496, "learning_rate": 1.6340007415372783e-06, "loss": 0.1587, "step": 17443 }, { "epoch": 2.4631460039536854, "grad_norm": 3.0104116705945203, "learning_rate": 1.6331657215431319e-06, "loss": 0.1524, "step": 17444 }, { "epoch": 2.4632872070036713, "grad_norm": 2.8736356532574887, "learning_rate": 1.6323308959906003e-06, "loss": 0.1642, "step": 17445 }, { "epoch": 2.463428410053657, "grad_norm": 3.4576007428280096, "learning_rate": 1.631496264899085e-06, "loss": 0.1835, "step": 17446 }, { "epoch": 2.463569613103643, "grad_norm": 3.4990671082834006, "learning_rate": 1.6306618282879816e-06, "loss": 0.1589, "step": 17447 }, { "epoch": 2.463710816153629, "grad_norm": 2.968471656972759, "learning_rate": 1.6298275861766843e-06, "loss": 0.1614, "step": 17448 }, { "epoch": 2.463852019203615, "grad_norm": 4.266959644294783, "learning_rate": 1.628993538584578e-06, "loss": 0.209, "step": 17449 }, { "epoch": 2.4639932222536007, "grad_norm": 4.064974056591121, "learning_rate": 1.6281596855310478e-06, "loss": 0.1868, "step": 17450 }, { "epoch": 2.4641344253035866, "grad_norm": 3.3614911273216883, "learning_rate": 1.6273260270354673e-06, "loss": 0.1439, "step": 17451 }, { "epoch": 2.4642756283535725, "grad_norm": 3.1364421141816767, "learning_rate": 1.626492563117217e-06, "loss": 0.1389, "step": 17452 }, { "epoch": 2.4644168314035584, "grad_norm": 3.8346860564277856, "learning_rate": 1.625659293795664e-06, "loss": 0.1664, "step": 17453 }, { "epoch": 2.4645580344535443, "grad_norm": 3.020833602504063, "learning_rate": 1.624826219090172e-06, "loss": 0.1274, "step": 17454 }, { "epoch": 2.46469923750353, "grad_norm": 3.9153079557253463, "learning_rate": 1.6239933390201034e-06, "loss": 0.2021, "step": 17455 }, { "epoch": 2.464840440553516, "grad_norm": 4.13314547113519, "learning_rate": 1.6231606536048083e-06, "loss": 0.1687, "step": 17456 }, { "epoch": 2.464981643603502, "grad_norm": 3.4186927060255474, "learning_rate": 1.6223281628636433e-06, "loss": 0.1703, "step": 17457 }, { "epoch": 2.465122846653488, "grad_norm": 3.9303225526105448, "learning_rate": 1.6214958668159552e-06, "loss": 0.1692, "step": 17458 }, { "epoch": 2.4652640497034737, "grad_norm": 3.0690573155983336, "learning_rate": 1.6206637654810842e-06, "loss": 0.1348, "step": 17459 }, { "epoch": 2.4654052527534596, "grad_norm": 3.4103732079610047, "learning_rate": 1.619831858878368e-06, "loss": 0.1613, "step": 17460 }, { "epoch": 2.4655464558034454, "grad_norm": 2.782586447504867, "learning_rate": 1.6190001470271399e-06, "loss": 0.1281, "step": 17461 }, { "epoch": 2.4656876588534313, "grad_norm": 3.9539585873468046, "learning_rate": 1.6181686299467303e-06, "loss": 0.1439, "step": 17462 }, { "epoch": 2.465828861903417, "grad_norm": 3.247244987904383, "learning_rate": 1.6173373076564614e-06, "loss": 0.1172, "step": 17463 }, { "epoch": 2.465970064953403, "grad_norm": 2.9650938131173428, "learning_rate": 1.6165061801756531e-06, "loss": 0.1285, "step": 17464 }, { "epoch": 2.466111268003389, "grad_norm": 2.580495917000253, "learning_rate": 1.6156752475236205e-06, "loss": 0.0995, "step": 17465 }, { "epoch": 2.466252471053375, "grad_norm": 3.6374492339703175, "learning_rate": 1.614844509719674e-06, "loss": 0.21, "step": 17466 }, { "epoch": 2.4663936741033607, "grad_norm": 2.839127941007601, "learning_rate": 1.61401396678312e-06, "loss": 0.1518, "step": 17467 }, { "epoch": 2.4665348771533466, "grad_norm": 2.7396576525735816, "learning_rate": 1.6131836187332584e-06, "loss": 0.1582, "step": 17468 }, { "epoch": 2.4666760802033325, "grad_norm": 3.8177171795370146, "learning_rate": 1.6123534655893891e-06, "loss": 0.2111, "step": 17469 }, { "epoch": 2.4668172832533184, "grad_norm": 3.115372881842822, "learning_rate": 1.6115235073708024e-06, "loss": 0.1188, "step": 17470 }, { "epoch": 2.4669584863033043, "grad_norm": 8.939925924167861, "learning_rate": 1.6106937440967897e-06, "loss": 0.168, "step": 17471 }, { "epoch": 2.46709968935329, "grad_norm": 2.4191097076930053, "learning_rate": 1.6098641757866285e-06, "loss": 0.1234, "step": 17472 }, { "epoch": 2.467240892403276, "grad_norm": 3.459741475404155, "learning_rate": 1.6090348024596014e-06, "loss": 0.1623, "step": 17473 }, { "epoch": 2.467382095453262, "grad_norm": 3.732640680980925, "learning_rate": 1.6082056241349787e-06, "loss": 0.1334, "step": 17474 }, { "epoch": 2.467523298503248, "grad_norm": 3.486616431068132, "learning_rate": 1.6073766408320356e-06, "loss": 0.1755, "step": 17475 }, { "epoch": 2.4676645015532337, "grad_norm": 3.3786364555315616, "learning_rate": 1.606547852570034e-06, "loss": 0.1769, "step": 17476 }, { "epoch": 2.4678057046032196, "grad_norm": 3.222997775248464, "learning_rate": 1.605719259368237e-06, "loss": 0.1162, "step": 17477 }, { "epoch": 2.4679469076532055, "grad_norm": 3.409950238763562, "learning_rate": 1.604890861245898e-06, "loss": 0.1606, "step": 17478 }, { "epoch": 2.4680881107031913, "grad_norm": 3.1805890242857164, "learning_rate": 1.6040626582222706e-06, "loss": 0.1673, "step": 17479 }, { "epoch": 2.4682293137531772, "grad_norm": 2.785899036729417, "learning_rate": 1.6032346503166007e-06, "loss": 0.099, "step": 17480 }, { "epoch": 2.468370516803163, "grad_norm": 2.7751807296388615, "learning_rate": 1.6024068375481316e-06, "loss": 0.1507, "step": 17481 }, { "epoch": 2.468511719853149, "grad_norm": 3.1638597997118376, "learning_rate": 1.6015792199361003e-06, "loss": 0.1095, "step": 17482 }, { "epoch": 2.468652922903135, "grad_norm": 3.4051973491271985, "learning_rate": 1.6007517974997411e-06, "loss": 0.2028, "step": 17483 }, { "epoch": 2.4687941259531208, "grad_norm": 2.9150151152812587, "learning_rate": 1.5999245702582833e-06, "loss": 0.1532, "step": 17484 }, { "epoch": 2.4689353290031066, "grad_norm": 3.341212384410382, "learning_rate": 1.5990975382309503e-06, "loss": 0.1778, "step": 17485 }, { "epoch": 2.4690765320530925, "grad_norm": 3.635493155859683, "learning_rate": 1.5982707014369603e-06, "loss": 0.2279, "step": 17486 }, { "epoch": 2.4692177351030784, "grad_norm": 2.8349561273788506, "learning_rate": 1.5974440598955332e-06, "loss": 0.1595, "step": 17487 }, { "epoch": 2.4693589381530643, "grad_norm": 3.1000972300431178, "learning_rate": 1.5966176136258794e-06, "loss": 0.1504, "step": 17488 }, { "epoch": 2.46950014120305, "grad_norm": 3.340010291144055, "learning_rate": 1.595791362647201e-06, "loss": 0.123, "step": 17489 }, { "epoch": 2.469641344253036, "grad_norm": 4.185348719790143, "learning_rate": 1.5949653069787018e-06, "loss": 0.219, "step": 17490 }, { "epoch": 2.469782547303022, "grad_norm": 2.869577011066909, "learning_rate": 1.5941394466395766e-06, "loss": 0.1534, "step": 17491 }, { "epoch": 2.469923750353008, "grad_norm": 2.452904297489696, "learning_rate": 1.5933137816490229e-06, "loss": 0.1321, "step": 17492 }, { "epoch": 2.4700649534029937, "grad_norm": 2.715586916934936, "learning_rate": 1.5924883120262258e-06, "loss": 0.1322, "step": 17493 }, { "epoch": 2.4702061564529796, "grad_norm": 3.170999805839384, "learning_rate": 1.5916630377903696e-06, "loss": 0.1385, "step": 17494 }, { "epoch": 2.4703473595029655, "grad_norm": 2.8094828549000512, "learning_rate": 1.5908379589606338e-06, "loss": 0.1431, "step": 17495 }, { "epoch": 2.4704885625529514, "grad_norm": 3.062954453590655, "learning_rate": 1.5900130755561916e-06, "loss": 0.1447, "step": 17496 }, { "epoch": 2.4706297656029372, "grad_norm": 3.3837366998767857, "learning_rate": 1.5891883875962132e-06, "loss": 0.1411, "step": 17497 }, { "epoch": 2.470770968652923, "grad_norm": 3.2222464112601332, "learning_rate": 1.588363895099866e-06, "loss": 0.1517, "step": 17498 }, { "epoch": 2.470912171702909, "grad_norm": 2.4772060577729, "learning_rate": 1.5875395980863073e-06, "loss": 0.1217, "step": 17499 }, { "epoch": 2.471053374752895, "grad_norm": 3.4423324885946114, "learning_rate": 1.5867154965746956e-06, "loss": 0.1508, "step": 17500 }, { "epoch": 2.4711945778028808, "grad_norm": 3.0374894961655245, "learning_rate": 1.585891590584183e-06, "loss": 0.1324, "step": 17501 }, { "epoch": 2.4713357808528666, "grad_norm": 3.118968647874877, "learning_rate": 1.585067880133916e-06, "loss": 0.1307, "step": 17502 }, { "epoch": 2.4714769839028525, "grad_norm": 3.4277920608675974, "learning_rate": 1.584244365243035e-06, "loss": 0.1552, "step": 17503 }, { "epoch": 2.4716181869528384, "grad_norm": 2.5911953714514393, "learning_rate": 1.583421045930682e-06, "loss": 0.1153, "step": 17504 }, { "epoch": 2.4717593900028243, "grad_norm": 2.7895566997725467, "learning_rate": 1.5825979222159925e-06, "loss": 0.1226, "step": 17505 }, { "epoch": 2.47190059305281, "grad_norm": 3.9314562349301037, "learning_rate": 1.5817749941180893e-06, "loss": 0.159, "step": 17506 }, { "epoch": 2.472041796102796, "grad_norm": 2.8682109870292685, "learning_rate": 1.5809522616560991e-06, "loss": 0.1511, "step": 17507 }, { "epoch": 2.472182999152782, "grad_norm": 2.4711040950046232, "learning_rate": 1.580129724849141e-06, "loss": 0.1005, "step": 17508 }, { "epoch": 2.472324202202768, "grad_norm": 3.086285901791688, "learning_rate": 1.579307383716333e-06, "loss": 0.1231, "step": 17509 }, { "epoch": 2.4724654052527533, "grad_norm": 3.322237216498263, "learning_rate": 1.5784852382767856e-06, "loss": 0.1322, "step": 17510 }, { "epoch": 2.472606608302739, "grad_norm": 3.068353430211476, "learning_rate": 1.5776632885496045e-06, "loss": 0.1342, "step": 17511 }, { "epoch": 2.472747811352725, "grad_norm": 3.3780245109484093, "learning_rate": 1.5768415345538911e-06, "loss": 0.1665, "step": 17512 }, { "epoch": 2.472889014402711, "grad_norm": 3.5982552310028706, "learning_rate": 1.5760199763087425e-06, "loss": 0.165, "step": 17513 }, { "epoch": 2.473030217452697, "grad_norm": 3.1870990774202945, "learning_rate": 1.575198613833252e-06, "loss": 0.1308, "step": 17514 }, { "epoch": 2.4731714205026827, "grad_norm": 2.789887818065228, "learning_rate": 1.5743774471465068e-06, "loss": 0.128, "step": 17515 }, { "epoch": 2.4733126235526686, "grad_norm": 3.0491998200312262, "learning_rate": 1.5735564762675914e-06, "loss": 0.1275, "step": 17516 }, { "epoch": 2.4734538266026544, "grad_norm": 3.3299892960656403, "learning_rate": 1.572735701215584e-06, "loss": 0.147, "step": 17517 }, { "epoch": 2.4735950296526403, "grad_norm": 3.4027546741820673, "learning_rate": 1.5719151220095596e-06, "loss": 0.1351, "step": 17518 }, { "epoch": 2.473736232702626, "grad_norm": 2.9995565937027644, "learning_rate": 1.5710947386685881e-06, "loss": 0.1043, "step": 17519 }, { "epoch": 2.473877435752612, "grad_norm": 2.9120239310611793, "learning_rate": 1.5702745512117323e-06, "loss": 0.131, "step": 17520 }, { "epoch": 2.474018638802598, "grad_norm": 3.2943396028712555, "learning_rate": 1.569454559658058e-06, "loss": 0.1189, "step": 17521 }, { "epoch": 2.474159841852584, "grad_norm": 3.7027942697781993, "learning_rate": 1.5686347640266208e-06, "loss": 0.1727, "step": 17522 }, { "epoch": 2.4743010449025697, "grad_norm": 3.3277307577766115, "learning_rate": 1.5678151643364692e-06, "loss": 0.149, "step": 17523 }, { "epoch": 2.4744422479525556, "grad_norm": 2.552802389575025, "learning_rate": 1.56699576060665e-06, "loss": 0.1467, "step": 17524 }, { "epoch": 2.4745834510025415, "grad_norm": 3.0529331249628413, "learning_rate": 1.5661765528562057e-06, "loss": 0.156, "step": 17525 }, { "epoch": 2.4747246540525274, "grad_norm": 3.1889106045306086, "learning_rate": 1.5653575411041788e-06, "loss": 0.1381, "step": 17526 }, { "epoch": 2.4748658571025133, "grad_norm": 3.2774968889926073, "learning_rate": 1.5645387253695998e-06, "loss": 0.1592, "step": 17527 }, { "epoch": 2.475007060152499, "grad_norm": 2.76625527204261, "learning_rate": 1.5637201056714967e-06, "loss": 0.1408, "step": 17528 }, { "epoch": 2.475148263202485, "grad_norm": 2.8575636908666704, "learning_rate": 1.5629016820288966e-06, "loss": 0.1597, "step": 17529 }, { "epoch": 2.475289466252471, "grad_norm": 3.3435659455285394, "learning_rate": 1.5620834544608166e-06, "loss": 0.1193, "step": 17530 }, { "epoch": 2.475430669302457, "grad_norm": 3.197004504653835, "learning_rate": 1.5612654229862734e-06, "loss": 0.1353, "step": 17531 }, { "epoch": 2.4755718723524427, "grad_norm": 3.4938399935295466, "learning_rate": 1.5604475876242775e-06, "loss": 0.1235, "step": 17532 }, { "epoch": 2.4757130754024286, "grad_norm": 2.808277216274006, "learning_rate": 1.5596299483938348e-06, "loss": 0.1204, "step": 17533 }, { "epoch": 2.4758542784524145, "grad_norm": 3.7501660756823747, "learning_rate": 1.558812505313947e-06, "loss": 0.1465, "step": 17534 }, { "epoch": 2.4759954815024003, "grad_norm": 3.5110235989917253, "learning_rate": 1.5579952584036117e-06, "loss": 0.1502, "step": 17535 }, { "epoch": 2.4761366845523862, "grad_norm": 2.6060063753929734, "learning_rate": 1.5571782076818197e-06, "loss": 0.1123, "step": 17536 }, { "epoch": 2.476277887602372, "grad_norm": 3.9335809926105876, "learning_rate": 1.556361353167558e-06, "loss": 0.1805, "step": 17537 }, { "epoch": 2.476419090652358, "grad_norm": 3.346129541368894, "learning_rate": 1.5555446948798147e-06, "loss": 0.1285, "step": 17538 }, { "epoch": 2.476560293702344, "grad_norm": 2.812795225340383, "learning_rate": 1.5547282328375678e-06, "loss": 0.1395, "step": 17539 }, { "epoch": 2.4767014967523298, "grad_norm": 3.703814615676757, "learning_rate": 1.553911967059788e-06, "loss": 0.1733, "step": 17540 }, { "epoch": 2.4768426998023156, "grad_norm": 2.586212251506088, "learning_rate": 1.5530958975654454e-06, "loss": 0.1323, "step": 17541 }, { "epoch": 2.4769839028523015, "grad_norm": 3.5477150890357776, "learning_rate": 1.5522800243735037e-06, "loss": 0.1421, "step": 17542 }, { "epoch": 2.4771251059022874, "grad_norm": 3.7601379108465065, "learning_rate": 1.551464347502929e-06, "loss": 0.1491, "step": 17543 }, { "epoch": 2.4772663089522733, "grad_norm": 3.8945412677106686, "learning_rate": 1.5506488669726738e-06, "loss": 0.1672, "step": 17544 }, { "epoch": 2.477407512002259, "grad_norm": 3.0989518214987744, "learning_rate": 1.5498335828016909e-06, "loss": 0.1587, "step": 17545 }, { "epoch": 2.477548715052245, "grad_norm": 5.255567504835095, "learning_rate": 1.549018495008925e-06, "loss": 0.1956, "step": 17546 }, { "epoch": 2.477689918102231, "grad_norm": 5.168274462591074, "learning_rate": 1.5482036036133197e-06, "loss": 0.1355, "step": 17547 }, { "epoch": 2.477831121152217, "grad_norm": 2.927846773749783, "learning_rate": 1.5473889086338134e-06, "loss": 0.115, "step": 17548 }, { "epoch": 2.4779723242022027, "grad_norm": 2.7801101330000138, "learning_rate": 1.5465744100893377e-06, "loss": 0.0779, "step": 17549 }, { "epoch": 2.4781135272521886, "grad_norm": 2.8953702455430776, "learning_rate": 1.5457601079988226e-06, "loss": 0.1168, "step": 17550 }, { "epoch": 2.4782547303021745, "grad_norm": 3.3148804331646837, "learning_rate": 1.5449460023811913e-06, "loss": 0.1664, "step": 17551 }, { "epoch": 2.4783959333521604, "grad_norm": 4.025901712913785, "learning_rate": 1.5441320932553627e-06, "loss": 0.1633, "step": 17552 }, { "epoch": 2.4785371364021462, "grad_norm": 3.0077367367990178, "learning_rate": 1.543318380640253e-06, "loss": 0.1417, "step": 17553 }, { "epoch": 2.478678339452132, "grad_norm": 2.3914597141889855, "learning_rate": 1.5425048645547703e-06, "loss": 0.0908, "step": 17554 }, { "epoch": 2.478819542502118, "grad_norm": 3.578360414352736, "learning_rate": 1.5416915450178238e-06, "loss": 0.169, "step": 17555 }, { "epoch": 2.478960745552104, "grad_norm": 3.2555868666849253, "learning_rate": 1.5408784220483152e-06, "loss": 0.1755, "step": 17556 }, { "epoch": 2.4791019486020898, "grad_norm": 3.095855915544936, "learning_rate": 1.5400654956651362e-06, "loss": 0.1584, "step": 17557 }, { "epoch": 2.4792431516520756, "grad_norm": 3.1106867032734837, "learning_rate": 1.5392527658871813e-06, "loss": 0.108, "step": 17558 }, { "epoch": 2.4793843547020615, "grad_norm": 2.3079200992962345, "learning_rate": 1.538440232733337e-06, "loss": 0.1153, "step": 17559 }, { "epoch": 2.4795255577520474, "grad_norm": 3.98520952609747, "learning_rate": 1.537627896222489e-06, "loss": 0.1893, "step": 17560 }, { "epoch": 2.4796667608020333, "grad_norm": 3.1316306142290364, "learning_rate": 1.5368157563735142e-06, "loss": 0.1599, "step": 17561 }, { "epoch": 2.479807963852019, "grad_norm": 3.5687148132743958, "learning_rate": 1.5360038132052869e-06, "loss": 0.1529, "step": 17562 }, { "epoch": 2.479949166902005, "grad_norm": 3.016658880577094, "learning_rate": 1.5351920667366749e-06, "loss": 0.1272, "step": 17563 }, { "epoch": 2.480090369951991, "grad_norm": 4.533544539336306, "learning_rate": 1.5343805169865434e-06, "loss": 0.1952, "step": 17564 }, { "epoch": 2.480231573001977, "grad_norm": 3.0762225497191684, "learning_rate": 1.5335691639737528e-06, "loss": 0.1605, "step": 17565 }, { "epoch": 2.4803727760519627, "grad_norm": 3.0325499461721654, "learning_rate": 1.5327580077171589e-06, "loss": 0.1316, "step": 17566 }, { "epoch": 2.4805139791019486, "grad_norm": 2.955595019961034, "learning_rate": 1.5319470482356125e-06, "loss": 0.1437, "step": 17567 }, { "epoch": 2.4806551821519345, "grad_norm": 2.939285281850652, "learning_rate": 1.5311362855479584e-06, "loss": 0.1402, "step": 17568 }, { "epoch": 2.4807963852019204, "grad_norm": 3.425360283311531, "learning_rate": 1.5303257196730403e-06, "loss": 0.1489, "step": 17569 }, { "epoch": 2.4809375882519062, "grad_norm": 3.7449174537883, "learning_rate": 1.5295153506296944e-06, "loss": 0.1707, "step": 17570 }, { "epoch": 2.481078791301892, "grad_norm": 2.7661406994854625, "learning_rate": 1.5287051784367524e-06, "loss": 0.1328, "step": 17571 }, { "epoch": 2.481219994351878, "grad_norm": 3.3193693787937595, "learning_rate": 1.5278952031130445e-06, "loss": 0.1169, "step": 17572 }, { "epoch": 2.481361197401864, "grad_norm": 3.5496135187085294, "learning_rate": 1.527085424677397e-06, "loss": 0.1616, "step": 17573 }, { "epoch": 2.4815024004518498, "grad_norm": 2.671518400695353, "learning_rate": 1.5262758431486213e-06, "loss": 0.12, "step": 17574 }, { "epoch": 2.4816436035018357, "grad_norm": 2.958518437444626, "learning_rate": 1.525466458545536e-06, "loss": 0.1131, "step": 17575 }, { "epoch": 2.4817848065518215, "grad_norm": 2.61450492102859, "learning_rate": 1.524657270886951e-06, "loss": 0.1321, "step": 17576 }, { "epoch": 2.4819260096018074, "grad_norm": 3.1517575614491427, "learning_rate": 1.5238482801916676e-06, "loss": 0.1464, "step": 17577 }, { "epoch": 2.4820672126517933, "grad_norm": 3.569853137524002, "learning_rate": 1.5230394864784925e-06, "loss": 0.158, "step": 17578 }, { "epoch": 2.482208415701779, "grad_norm": 2.621176894006861, "learning_rate": 1.5222308897662185e-06, "loss": 0.1244, "step": 17579 }, { "epoch": 2.482349618751765, "grad_norm": 3.7318544841751398, "learning_rate": 1.5214224900736375e-06, "loss": 0.1623, "step": 17580 }, { "epoch": 2.482490821801751, "grad_norm": 3.0417534735483613, "learning_rate": 1.5206142874195362e-06, "loss": 0.131, "step": 17581 }, { "epoch": 2.482632024851737, "grad_norm": 3.2938696808151655, "learning_rate": 1.5198062818226967e-06, "loss": 0.1266, "step": 17582 }, { "epoch": 2.4827732279017227, "grad_norm": 3.1852431550670084, "learning_rate": 1.518998473301897e-06, "loss": 0.1202, "step": 17583 }, { "epoch": 2.4829144309517086, "grad_norm": 4.0581561771278665, "learning_rate": 1.5181908618759101e-06, "loss": 0.1659, "step": 17584 }, { "epoch": 2.4830556340016945, "grad_norm": 3.1108744921911677, "learning_rate": 1.5173834475635042e-06, "loss": 0.1217, "step": 17585 }, { "epoch": 2.4831968370516804, "grad_norm": 3.264317306737899, "learning_rate": 1.5165762303834442e-06, "loss": 0.1156, "step": 17586 }, { "epoch": 2.4833380401016663, "grad_norm": 3.2798832204496637, "learning_rate": 1.5157692103544884e-06, "loss": 0.1505, "step": 17587 }, { "epoch": 2.483479243151652, "grad_norm": 3.091248537062963, "learning_rate": 1.5149623874953922e-06, "loss": 0.141, "step": 17588 }, { "epoch": 2.483620446201638, "grad_norm": 3.549769446011815, "learning_rate": 1.5141557618249036e-06, "loss": 0.182, "step": 17589 }, { "epoch": 2.483761649251624, "grad_norm": 2.6999325024117535, "learning_rate": 1.5133493333617755e-06, "loss": 0.1135, "step": 17590 }, { "epoch": 2.48390285230161, "grad_norm": 3.102197807182706, "learning_rate": 1.5125431021247406e-06, "loss": 0.1644, "step": 17591 }, { "epoch": 2.4840440553515957, "grad_norm": 3.105625791971736, "learning_rate": 1.5117370681325393e-06, "loss": 0.1785, "step": 17592 }, { "epoch": 2.4841852584015816, "grad_norm": 3.3425189850768042, "learning_rate": 1.5109312314039027e-06, "loss": 0.1289, "step": 17593 }, { "epoch": 2.4843264614515674, "grad_norm": 3.664054501402457, "learning_rate": 1.5101255919575552e-06, "loss": 0.163, "step": 17594 }, { "epoch": 2.4844676645015533, "grad_norm": 3.1671716217886168, "learning_rate": 1.5093201498122246e-06, "loss": 0.1712, "step": 17595 }, { "epoch": 2.484608867551539, "grad_norm": 3.1665732967678952, "learning_rate": 1.5085149049866277e-06, "loss": 0.1081, "step": 17596 }, { "epoch": 2.484750070601525, "grad_norm": 4.158963507598375, "learning_rate": 1.5077098574994763e-06, "loss": 0.2032, "step": 17597 }, { "epoch": 2.484891273651511, "grad_norm": 3.375506140054457, "learning_rate": 1.5069050073694813e-06, "loss": 0.1293, "step": 17598 }, { "epoch": 2.485032476701497, "grad_norm": 3.0920788624373072, "learning_rate": 1.5061003546153452e-06, "loss": 0.112, "step": 17599 }, { "epoch": 2.4851736797514827, "grad_norm": 3.3849232947374905, "learning_rate": 1.5052958992557687e-06, "loss": 0.1539, "step": 17600 }, { "epoch": 2.4853148828014686, "grad_norm": 3.486714769602391, "learning_rate": 1.5044916413094478e-06, "loss": 0.132, "step": 17601 }, { "epoch": 2.4854560858514545, "grad_norm": 3.5646193416498027, "learning_rate": 1.5036875807950712e-06, "loss": 0.1777, "step": 17602 }, { "epoch": 2.4855972889014404, "grad_norm": 2.626828645203138, "learning_rate": 1.502883717731326e-06, "loss": 0.0978, "step": 17603 }, { "epoch": 2.4857384919514263, "grad_norm": 2.8047197023968526, "learning_rate": 1.502080052136894e-06, "loss": 0.1239, "step": 17604 }, { "epoch": 2.485879695001412, "grad_norm": 3.2551043324515128, "learning_rate": 1.5012765840304522e-06, "loss": 0.1446, "step": 17605 }, { "epoch": 2.486020898051398, "grad_norm": 2.7173452193609067, "learning_rate": 1.5004733134306692e-06, "loss": 0.1243, "step": 17606 }, { "epoch": 2.486162101101384, "grad_norm": 2.5948833901245334, "learning_rate": 1.4996702403562202e-06, "loss": 0.1089, "step": 17607 }, { "epoch": 2.48630330415137, "grad_norm": 2.9663052104352388, "learning_rate": 1.4988673648257624e-06, "loss": 0.1435, "step": 17608 }, { "epoch": 2.4864445072013557, "grad_norm": 2.8957222292630442, "learning_rate": 1.498064686857954e-06, "loss": 0.1409, "step": 17609 }, { "epoch": 2.4865857102513416, "grad_norm": 3.3490107451707085, "learning_rate": 1.4972622064714515e-06, "loss": 0.1513, "step": 17610 }, { "epoch": 2.4867269133013274, "grad_norm": 3.3136083917892876, "learning_rate": 1.496459923684902e-06, "loss": 0.1467, "step": 17611 }, { "epoch": 2.4868681163513133, "grad_norm": 2.8165355954495537, "learning_rate": 1.495657838516953e-06, "loss": 0.0933, "step": 17612 }, { "epoch": 2.487009319401299, "grad_norm": 3.683759204585895, "learning_rate": 1.4948559509862426e-06, "loss": 0.1759, "step": 17613 }, { "epoch": 2.487150522451285, "grad_norm": 3.4138961903378657, "learning_rate": 1.4940542611114073e-06, "loss": 0.1583, "step": 17614 }, { "epoch": 2.487291725501271, "grad_norm": 3.735169473794392, "learning_rate": 1.4932527689110764e-06, "loss": 0.1832, "step": 17615 }, { "epoch": 2.487432928551257, "grad_norm": 3.2170959087045548, "learning_rate": 1.4924514744038787e-06, "loss": 0.1477, "step": 17616 }, { "epoch": 2.4875741316012427, "grad_norm": 2.4690067280094934, "learning_rate": 1.4916503776084345e-06, "loss": 0.116, "step": 17617 }, { "epoch": 2.4877153346512286, "grad_norm": 3.2862290320364846, "learning_rate": 1.4908494785433603e-06, "loss": 0.1946, "step": 17618 }, { "epoch": 2.4878565377012145, "grad_norm": 3.6546785571413523, "learning_rate": 1.490048777227271e-06, "loss": 0.1559, "step": 17619 }, { "epoch": 2.4879977407512004, "grad_norm": 3.4036193841056077, "learning_rate": 1.4892482736787717e-06, "loss": 0.171, "step": 17620 }, { "epoch": 2.4881389438011863, "grad_norm": 2.711937421368551, "learning_rate": 1.4884479679164664e-06, "loss": 0.1214, "step": 17621 }, { "epoch": 2.488280146851172, "grad_norm": 3.3735292347676684, "learning_rate": 1.487647859958956e-06, "loss": 0.1665, "step": 17622 }, { "epoch": 2.488421349901158, "grad_norm": 2.752707937336635, "learning_rate": 1.48684794982483e-06, "loss": 0.134, "step": 17623 }, { "epoch": 2.488562552951144, "grad_norm": 2.968423486010832, "learning_rate": 1.4860482375326857e-06, "loss": 0.1291, "step": 17624 }, { "epoch": 2.48870375600113, "grad_norm": 3.172802763592464, "learning_rate": 1.4852487231011014e-06, "loss": 0.1542, "step": 17625 }, { "epoch": 2.4888449590511157, "grad_norm": 2.816323071394256, "learning_rate": 1.4844494065486592e-06, "loss": 0.1445, "step": 17626 }, { "epoch": 2.4889861621011016, "grad_norm": 4.087037557995734, "learning_rate": 1.4836502878939351e-06, "loss": 0.1511, "step": 17627 }, { "epoch": 2.4891273651510875, "grad_norm": 2.8674684954784597, "learning_rate": 1.4828513671554978e-06, "loss": 0.1226, "step": 17628 }, { "epoch": 2.489268568201073, "grad_norm": 3.9774349405132448, "learning_rate": 1.48205264435192e-06, "loss": 0.2094, "step": 17629 }, { "epoch": 2.489409771251059, "grad_norm": 3.4092866882016675, "learning_rate": 1.4812541195017593e-06, "loss": 0.1469, "step": 17630 }, { "epoch": 2.4895509743010447, "grad_norm": 3.6014494489813553, "learning_rate": 1.480455792623574e-06, "loss": 0.164, "step": 17631 }, { "epoch": 2.4896921773510305, "grad_norm": 3.310735723643198, "learning_rate": 1.4796576637359194e-06, "loss": 0.148, "step": 17632 }, { "epoch": 2.4898333804010164, "grad_norm": 3.4002429927075943, "learning_rate": 1.4788597328573362e-06, "loss": 0.1396, "step": 17633 }, { "epoch": 2.4899745834510023, "grad_norm": 2.820426823362409, "learning_rate": 1.478062000006375e-06, "loss": 0.1195, "step": 17634 }, { "epoch": 2.490115786500988, "grad_norm": 3.3137533043652794, "learning_rate": 1.4772644652015722e-06, "loss": 0.1833, "step": 17635 }, { "epoch": 2.490256989550974, "grad_norm": 3.8286886264338005, "learning_rate": 1.4764671284614629e-06, "loss": 0.1204, "step": 17636 }, { "epoch": 2.49039819260096, "grad_norm": 3.748248426270184, "learning_rate": 1.4756699898045767e-06, "loss": 0.1634, "step": 17637 }, { "epoch": 2.490539395650946, "grad_norm": 2.8661830166724362, "learning_rate": 1.474873049249439e-06, "loss": 0.1191, "step": 17638 }, { "epoch": 2.4906805987009317, "grad_norm": 3.7964122146022365, "learning_rate": 1.4740763068145692e-06, "loss": 0.1872, "step": 17639 }, { "epoch": 2.4908218017509176, "grad_norm": 3.4287227724208837, "learning_rate": 1.4732797625184814e-06, "loss": 0.1694, "step": 17640 }, { "epoch": 2.4909630048009035, "grad_norm": 3.6434173886332735, "learning_rate": 1.4724834163796942e-06, "loss": 0.1778, "step": 17641 }, { "epoch": 2.4911042078508894, "grad_norm": 3.1408643009788713, "learning_rate": 1.4716872684167082e-06, "loss": 0.1508, "step": 17642 }, { "epoch": 2.4912454109008753, "grad_norm": 2.878316055019368, "learning_rate": 1.4708913186480266e-06, "loss": 0.1562, "step": 17643 }, { "epoch": 2.491386613950861, "grad_norm": 2.6831706547096497, "learning_rate": 1.4700955670921468e-06, "loss": 0.119, "step": 17644 }, { "epoch": 2.491527817000847, "grad_norm": 2.8761823819257857, "learning_rate": 1.4693000137675605e-06, "loss": 0.1197, "step": 17645 }, { "epoch": 2.491669020050833, "grad_norm": 2.8153197980929408, "learning_rate": 1.4685046586927598e-06, "loss": 0.126, "step": 17646 }, { "epoch": 2.491810223100819, "grad_norm": 2.5314129963771057, "learning_rate": 1.4677095018862264e-06, "loss": 0.1246, "step": 17647 }, { "epoch": 2.4919514261508047, "grad_norm": 3.8199524010882855, "learning_rate": 1.46691454336644e-06, "loss": 0.1872, "step": 17648 }, { "epoch": 2.4920926292007906, "grad_norm": 3.000554238600249, "learning_rate": 1.4661197831518759e-06, "loss": 0.1481, "step": 17649 }, { "epoch": 2.4922338322507764, "grad_norm": 2.9145598270844144, "learning_rate": 1.465325221260998e-06, "loss": 0.1304, "step": 17650 }, { "epoch": 2.4923750353007623, "grad_norm": 2.927832959293454, "learning_rate": 1.4645308577122786e-06, "loss": 0.1281, "step": 17651 }, { "epoch": 2.492516238350748, "grad_norm": 4.019479257118664, "learning_rate": 1.463736692524176e-06, "loss": 0.1375, "step": 17652 }, { "epoch": 2.492657441400734, "grad_norm": 2.9943075521314064, "learning_rate": 1.4629427257151462e-06, "loss": 0.1384, "step": 17653 }, { "epoch": 2.49279864445072, "grad_norm": 3.3232067279097666, "learning_rate": 1.4621489573036407e-06, "loss": 0.1787, "step": 17654 }, { "epoch": 2.492939847500706, "grad_norm": 2.6769758857248713, "learning_rate": 1.4613553873081054e-06, "loss": 0.1531, "step": 17655 }, { "epoch": 2.4930810505506917, "grad_norm": 2.8277921668040626, "learning_rate": 1.4605620157469835e-06, "loss": 0.1308, "step": 17656 }, { "epoch": 2.4932222536006776, "grad_norm": 2.7161753447922794, "learning_rate": 1.4597688426387114e-06, "loss": 0.1087, "step": 17657 }, { "epoch": 2.4933634566506635, "grad_norm": 2.769098729745969, "learning_rate": 1.4589758680017263e-06, "loss": 0.128, "step": 17658 }, { "epoch": 2.4935046597006494, "grad_norm": 2.810276770286583, "learning_rate": 1.4581830918544515e-06, "loss": 0.1276, "step": 17659 }, { "epoch": 2.4936458627506353, "grad_norm": 2.961492398297367, "learning_rate": 1.4573905142153134e-06, "loss": 0.1558, "step": 17660 }, { "epoch": 2.493787065800621, "grad_norm": 2.9936755533757564, "learning_rate": 1.4565981351027303e-06, "loss": 0.1131, "step": 17661 }, { "epoch": 2.493928268850607, "grad_norm": 4.57162151288452, "learning_rate": 1.4558059545351144e-06, "loss": 0.2249, "step": 17662 }, { "epoch": 2.494069471900593, "grad_norm": 3.4459026064783216, "learning_rate": 1.45501397253088e-06, "loss": 0.1597, "step": 17663 }, { "epoch": 2.494210674950579, "grad_norm": 3.4957554275345175, "learning_rate": 1.4542221891084307e-06, "loss": 0.1386, "step": 17664 }, { "epoch": 2.4943518780005647, "grad_norm": 2.7017313861424643, "learning_rate": 1.453430604286168e-06, "loss": 0.13, "step": 17665 }, { "epoch": 2.4944930810505506, "grad_norm": 2.550638618392824, "learning_rate": 1.4526392180824888e-06, "loss": 0.1266, "step": 17666 }, { "epoch": 2.4946342841005364, "grad_norm": 2.846820340041814, "learning_rate": 1.4518480305157801e-06, "loss": 0.1378, "step": 17667 }, { "epoch": 2.4947754871505223, "grad_norm": 3.144776172013608, "learning_rate": 1.45105704160443e-06, "loss": 0.106, "step": 17668 }, { "epoch": 2.494916690200508, "grad_norm": 2.9809593820751874, "learning_rate": 1.4502662513668241e-06, "loss": 0.1468, "step": 17669 }, { "epoch": 2.495057893250494, "grad_norm": 2.666691103771677, "learning_rate": 1.4494756598213377e-06, "loss": 0.0924, "step": 17670 }, { "epoch": 2.49519909630048, "grad_norm": 3.496307187786648, "learning_rate": 1.448685266986345e-06, "loss": 0.1535, "step": 17671 }, { "epoch": 2.495340299350466, "grad_norm": 2.872519467735819, "learning_rate": 1.4478950728802132e-06, "loss": 0.1408, "step": 17672 }, { "epoch": 2.4954815024004517, "grad_norm": 2.855515221737581, "learning_rate": 1.4471050775213068e-06, "loss": 0.0981, "step": 17673 }, { "epoch": 2.4956227054504376, "grad_norm": 3.4126606031176303, "learning_rate": 1.4463152809279824e-06, "loss": 0.1625, "step": 17674 }, { "epoch": 2.4957639085004235, "grad_norm": 3.3112861265930493, "learning_rate": 1.4455256831186016e-06, "loss": 0.1637, "step": 17675 }, { "epoch": 2.4959051115504094, "grad_norm": 3.372083804849956, "learning_rate": 1.4447362841115075e-06, "loss": 0.1615, "step": 17676 }, { "epoch": 2.4960463146003953, "grad_norm": 2.9093657186264097, "learning_rate": 1.4439470839250469e-06, "loss": 0.1199, "step": 17677 }, { "epoch": 2.496187517650381, "grad_norm": 3.64037147727848, "learning_rate": 1.4431580825775604e-06, "loss": 0.141, "step": 17678 }, { "epoch": 2.496328720700367, "grad_norm": 2.909907819158235, "learning_rate": 1.4423692800873857e-06, "loss": 0.1306, "step": 17679 }, { "epoch": 2.496469923750353, "grad_norm": 3.5049646057336306, "learning_rate": 1.44158067647285e-06, "loss": 0.1811, "step": 17680 }, { "epoch": 2.496611126800339, "grad_norm": 2.7887536102456725, "learning_rate": 1.440792271752287e-06, "loss": 0.1397, "step": 17681 }, { "epoch": 2.4967523298503247, "grad_norm": 3.6735652905911405, "learning_rate": 1.440004065944014e-06, "loss": 0.1544, "step": 17682 }, { "epoch": 2.4968935329003106, "grad_norm": 3.1963855450477716, "learning_rate": 1.4392160590663517e-06, "loss": 0.1394, "step": 17683 }, { "epoch": 2.4970347359502965, "grad_norm": 3.2308228696393937, "learning_rate": 1.4384282511376102e-06, "loss": 0.1485, "step": 17684 }, { "epoch": 2.4971759390002823, "grad_norm": 3.6047256226610376, "learning_rate": 1.4376406421760946e-06, "loss": 0.1713, "step": 17685 }, { "epoch": 2.4973171420502682, "grad_norm": 2.9735617087654393, "learning_rate": 1.4368532322001161e-06, "loss": 0.1348, "step": 17686 }, { "epoch": 2.497458345100254, "grad_norm": 2.876349457098305, "learning_rate": 1.4360660212279698e-06, "loss": 0.0845, "step": 17687 }, { "epoch": 2.49759954815024, "grad_norm": 3.7817743996086275, "learning_rate": 1.4352790092779511e-06, "loss": 0.2039, "step": 17688 }, { "epoch": 2.497740751200226, "grad_norm": 3.9864033731864734, "learning_rate": 1.4344921963683501e-06, "loss": 0.2017, "step": 17689 }, { "epoch": 2.4978819542502118, "grad_norm": 3.641398258853611, "learning_rate": 1.4337055825174506e-06, "loss": 0.1722, "step": 17690 }, { "epoch": 2.4980231573001976, "grad_norm": 3.1718990632451174, "learning_rate": 1.4329191677435338e-06, "loss": 0.1535, "step": 17691 }, { "epoch": 2.4981643603501835, "grad_norm": 2.7504988441699285, "learning_rate": 1.4321329520648752e-06, "loss": 0.1107, "step": 17692 }, { "epoch": 2.4983055634001694, "grad_norm": 3.2757433255544717, "learning_rate": 1.4313469354997468e-06, "loss": 0.1477, "step": 17693 }, { "epoch": 2.4984467664501553, "grad_norm": 3.422196087027472, "learning_rate": 1.4305611180664157e-06, "loss": 0.107, "step": 17694 }, { "epoch": 2.498587969500141, "grad_norm": 3.2388472357516185, "learning_rate": 1.4297754997831436e-06, "loss": 0.1602, "step": 17695 }, { "epoch": 2.498729172550127, "grad_norm": 2.8631982454898286, "learning_rate": 1.4289900806681866e-06, "loss": 0.1255, "step": 17696 }, { "epoch": 2.498870375600113, "grad_norm": 3.666937346087086, "learning_rate": 1.4282048607397969e-06, "loss": 0.1914, "step": 17697 }, { "epoch": 2.499011578650099, "grad_norm": 2.7074175093749577, "learning_rate": 1.4274198400162265e-06, "loss": 0.1354, "step": 17698 }, { "epoch": 2.4991527817000847, "grad_norm": 2.9387672306406647, "learning_rate": 1.4266350185157175e-06, "loss": 0.1558, "step": 17699 }, { "epoch": 2.4992939847500706, "grad_norm": 3.5205264886051446, "learning_rate": 1.4258503962565096e-06, "loss": 0.1275, "step": 17700 }, { "epoch": 2.4994351878000565, "grad_norm": 3.606292690442444, "learning_rate": 1.425065973256833e-06, "loss": 0.1139, "step": 17701 }, { "epoch": 2.4995763908500424, "grad_norm": 2.8633068203393615, "learning_rate": 1.424281749534918e-06, "loss": 0.1518, "step": 17702 }, { "epoch": 2.4997175939000282, "grad_norm": 4.6914760522605405, "learning_rate": 1.4234977251089944e-06, "loss": 0.2149, "step": 17703 }, { "epoch": 2.499858796950014, "grad_norm": 2.8935568446058713, "learning_rate": 1.4227138999972801e-06, "loss": 0.1451, "step": 17704 }, { "epoch": 2.5, "grad_norm": 3.2382524583135885, "learning_rate": 1.4219302742179897e-06, "loss": 0.1344, "step": 17705 }, { "epoch": 2.500141203049986, "grad_norm": 3.077644609991341, "learning_rate": 1.4211468477893352e-06, "loss": 0.1096, "step": 17706 }, { "epoch": 2.5002824060999718, "grad_norm": 3.6937340168387167, "learning_rate": 1.4203636207295223e-06, "loss": 0.186, "step": 17707 }, { "epoch": 2.5004236091499576, "grad_norm": 4.376475668854339, "learning_rate": 1.4195805930567552e-06, "loss": 0.1904, "step": 17708 }, { "epoch": 2.5005648121999435, "grad_norm": 3.3251145702051663, "learning_rate": 1.418797764789228e-06, "loss": 0.1593, "step": 17709 }, { "epoch": 2.5007060152499294, "grad_norm": 2.948120597645961, "learning_rate": 1.4180151359451367e-06, "loss": 0.1191, "step": 17710 }, { "epoch": 2.5008472182999153, "grad_norm": 2.773734105597387, "learning_rate": 1.417232706542666e-06, "loss": 0.1001, "step": 17711 }, { "epoch": 2.500988421349901, "grad_norm": 2.8698132276112265, "learning_rate": 1.416450476600001e-06, "loss": 0.1002, "step": 17712 }, { "epoch": 2.501129624399887, "grad_norm": 2.7899202480713763, "learning_rate": 1.4156684461353188e-06, "loss": 0.112, "step": 17713 }, { "epoch": 2.501270827449873, "grad_norm": 4.037037634023028, "learning_rate": 1.414886615166794e-06, "loss": 0.1598, "step": 17714 }, { "epoch": 2.501412030499859, "grad_norm": 3.5715976057079195, "learning_rate": 1.4141049837125975e-06, "loss": 0.1834, "step": 17715 }, { "epoch": 2.5015532335498447, "grad_norm": 2.9350536063306603, "learning_rate": 1.4133235517908938e-06, "loss": 0.1133, "step": 17716 }, { "epoch": 2.5016944365998306, "grad_norm": 2.4682416029059264, "learning_rate": 1.4125423194198451e-06, "loss": 0.1348, "step": 17717 }, { "epoch": 2.5018356396498165, "grad_norm": 2.374448658115203, "learning_rate": 1.4117612866176022e-06, "loss": 0.099, "step": 17718 }, { "epoch": 2.5019768426998024, "grad_norm": 3.74376659795953, "learning_rate": 1.4109804534023153e-06, "loss": 0.1829, "step": 17719 }, { "epoch": 2.5021180457497882, "grad_norm": 3.3656452519731994, "learning_rate": 1.4101998197921352e-06, "loss": 0.1497, "step": 17720 }, { "epoch": 2.502259248799774, "grad_norm": 3.5764279477386074, "learning_rate": 1.409419385805202e-06, "loss": 0.1369, "step": 17721 }, { "epoch": 2.50240045184976, "grad_norm": 3.671451877334214, "learning_rate": 1.4086391514596532e-06, "loss": 0.1631, "step": 17722 }, { "epoch": 2.502541654899746, "grad_norm": 2.784895270238234, "learning_rate": 1.407859116773619e-06, "loss": 0.1171, "step": 17723 }, { "epoch": 2.5026828579497318, "grad_norm": 2.7587274399623705, "learning_rate": 1.407079281765229e-06, "loss": 0.1587, "step": 17724 }, { "epoch": 2.5028240609997177, "grad_norm": 2.9712312438809274, "learning_rate": 1.4062996464526046e-06, "loss": 0.1502, "step": 17725 }, { "epoch": 2.5029652640497035, "grad_norm": 2.779483522738431, "learning_rate": 1.4055202108538657e-06, "loss": 0.1255, "step": 17726 }, { "epoch": 2.5031064670996894, "grad_norm": 2.406566453506488, "learning_rate": 1.4047409749871255e-06, "loss": 0.1372, "step": 17727 }, { "epoch": 2.5032476701496753, "grad_norm": 3.598050669433387, "learning_rate": 1.4039619388704928e-06, "loss": 0.1672, "step": 17728 }, { "epoch": 2.503388873199661, "grad_norm": 4.3880446045780275, "learning_rate": 1.4031831025220722e-06, "loss": 0.1767, "step": 17729 }, { "epoch": 2.503530076249647, "grad_norm": 2.915244914417148, "learning_rate": 1.4024044659599633e-06, "loss": 0.1357, "step": 17730 }, { "epoch": 2.503671279299633, "grad_norm": 3.6895058595832446, "learning_rate": 1.40162602920226e-06, "loss": 0.182, "step": 17731 }, { "epoch": 2.503812482349619, "grad_norm": 3.2464712447418442, "learning_rate": 1.4008477922670571e-06, "loss": 0.1362, "step": 17732 }, { "epoch": 2.5039536853996047, "grad_norm": 2.840192158536428, "learning_rate": 1.4000697551724362e-06, "loss": 0.1597, "step": 17733 }, { "epoch": 2.5040948884495906, "grad_norm": 4.164212328946199, "learning_rate": 1.399291917936484e-06, "loss": 0.1536, "step": 17734 }, { "epoch": 2.5042360914995765, "grad_norm": 2.9629184691248773, "learning_rate": 1.39851428057727e-06, "loss": 0.1488, "step": 17735 }, { "epoch": 2.5043772945495624, "grad_norm": 2.9731145085347115, "learning_rate": 1.3977368431128679e-06, "loss": 0.1738, "step": 17736 }, { "epoch": 2.5045184975995483, "grad_norm": 3.0920639576385414, "learning_rate": 1.3969596055613489e-06, "loss": 0.149, "step": 17737 }, { "epoch": 2.504659700649534, "grad_norm": 2.8473342593088997, "learning_rate": 1.3961825679407726e-06, "loss": 0.1555, "step": 17738 }, { "epoch": 2.50480090369952, "grad_norm": 3.1679016095029953, "learning_rate": 1.3954057302691981e-06, "loss": 0.153, "step": 17739 }, { "epoch": 2.504942106749506, "grad_norm": 2.8458042187524195, "learning_rate": 1.3946290925646788e-06, "loss": 0.1316, "step": 17740 }, { "epoch": 2.505083309799492, "grad_norm": 3.5263398375881976, "learning_rate": 1.3938526548452625e-06, "loss": 0.1578, "step": 17741 }, { "epoch": 2.5052245128494777, "grad_norm": 2.8584185506630155, "learning_rate": 1.3930764171289935e-06, "loss": 0.1183, "step": 17742 }, { "epoch": 2.5053657158994636, "grad_norm": 3.5149181460571577, "learning_rate": 1.3923003794339119e-06, "loss": 0.1993, "step": 17743 }, { "epoch": 2.5055069189494494, "grad_norm": 4.118995532529588, "learning_rate": 1.391524541778052e-06, "loss": 0.15, "step": 17744 }, { "epoch": 2.5056481219994353, "grad_norm": 2.682071286008766, "learning_rate": 1.3907489041794442e-06, "loss": 0.1137, "step": 17745 }, { "epoch": 2.505789325049421, "grad_norm": 3.0904447931600187, "learning_rate": 1.3899734666561138e-06, "loss": 0.1448, "step": 17746 }, { "epoch": 2.505930528099407, "grad_norm": 3.253627278654687, "learning_rate": 1.389198229226081e-06, "loss": 0.1569, "step": 17747 }, { "epoch": 2.506071731149393, "grad_norm": 3.433113002227054, "learning_rate": 1.388423191907361e-06, "loss": 0.1563, "step": 17748 }, { "epoch": 2.506212934199379, "grad_norm": 3.288083247069401, "learning_rate": 1.3876483547179688e-06, "loss": 0.1626, "step": 17749 }, { "epoch": 2.5063541372493647, "grad_norm": 3.2312605029722437, "learning_rate": 1.3868737176759105e-06, "loss": 0.1067, "step": 17750 }, { "epoch": 2.5064953402993506, "grad_norm": 3.7956596571605097, "learning_rate": 1.386099280799188e-06, "loss": 0.1655, "step": 17751 }, { "epoch": 2.5066365433493365, "grad_norm": 3.890507407601209, "learning_rate": 1.3853250441057975e-06, "loss": 0.1646, "step": 17752 }, { "epoch": 2.5067777463993224, "grad_norm": 2.828658426710023, "learning_rate": 1.3845510076137293e-06, "loss": 0.1397, "step": 17753 }, { "epoch": 2.5069189494493083, "grad_norm": 3.2862781996478385, "learning_rate": 1.3837771713409776e-06, "loss": 0.1676, "step": 17754 }, { "epoch": 2.507060152499294, "grad_norm": 2.457387412741747, "learning_rate": 1.3830035353055226e-06, "loss": 0.1151, "step": 17755 }, { "epoch": 2.50720135554928, "grad_norm": 3.770826527989192, "learning_rate": 1.3822300995253445e-06, "loss": 0.1907, "step": 17756 }, { "epoch": 2.507342558599266, "grad_norm": 3.3760808042425965, "learning_rate": 1.381456864018418e-06, "loss": 0.1494, "step": 17757 }, { "epoch": 2.507483761649252, "grad_norm": 2.5337683551288634, "learning_rate": 1.3806838288027113e-06, "loss": 0.1102, "step": 17758 }, { "epoch": 2.5076249646992377, "grad_norm": 2.5196957495452796, "learning_rate": 1.3799109938961897e-06, "loss": 0.1208, "step": 17759 }, { "epoch": 2.5077661677492236, "grad_norm": 3.477083957112909, "learning_rate": 1.379138359316814e-06, "loss": 0.1583, "step": 17760 }, { "epoch": 2.5079073707992094, "grad_norm": 3.127146039160101, "learning_rate": 1.37836592508254e-06, "loss": 0.145, "step": 17761 }, { "epoch": 2.5080485738491953, "grad_norm": 2.7706950583164187, "learning_rate": 1.3775936912113187e-06, "loss": 0.1339, "step": 17762 }, { "epoch": 2.508189776899181, "grad_norm": 3.687061228170924, "learning_rate": 1.3768216577210959e-06, "loss": 0.1497, "step": 17763 }, { "epoch": 2.508330979949167, "grad_norm": 3.1651781421865497, "learning_rate": 1.3760498246298138e-06, "loss": 0.1485, "step": 17764 }, { "epoch": 2.508472182999153, "grad_norm": 2.640539815433156, "learning_rate": 1.375278191955407e-06, "loss": 0.1216, "step": 17765 }, { "epoch": 2.508613386049139, "grad_norm": 3.0437877265039934, "learning_rate": 1.3745067597158123e-06, "loss": 0.1648, "step": 17766 }, { "epoch": 2.5087545890991247, "grad_norm": 4.359468123888233, "learning_rate": 1.3737355279289566e-06, "loss": 0.1967, "step": 17767 }, { "epoch": 2.5088957921491106, "grad_norm": 3.0790462012443562, "learning_rate": 1.372964496612763e-06, "loss": 0.1557, "step": 17768 }, { "epoch": 2.5090369951990965, "grad_norm": 3.102154557241754, "learning_rate": 1.3721936657851464e-06, "loss": 0.1421, "step": 17769 }, { "epoch": 2.5091781982490824, "grad_norm": 2.972293204873582, "learning_rate": 1.3714230354640234e-06, "loss": 0.1358, "step": 17770 }, { "epoch": 2.5093194012990683, "grad_norm": 2.7600427766839273, "learning_rate": 1.3706526056673008e-06, "loss": 0.1156, "step": 17771 }, { "epoch": 2.509460604349054, "grad_norm": 2.960362660522951, "learning_rate": 1.3698823764128867e-06, "loss": 0.1755, "step": 17772 }, { "epoch": 2.50960180739904, "grad_norm": 3.4562601887374353, "learning_rate": 1.3691123477186786e-06, "loss": 0.1385, "step": 17773 }, { "epoch": 2.509743010449026, "grad_norm": 3.1566639756322648, "learning_rate": 1.3683425196025734e-06, "loss": 0.1566, "step": 17774 }, { "epoch": 2.509884213499012, "grad_norm": 3.0130359470943495, "learning_rate": 1.3675728920824593e-06, "loss": 0.1276, "step": 17775 }, { "epoch": 2.5100254165489977, "grad_norm": 3.369965768926957, "learning_rate": 1.366803465176223e-06, "loss": 0.1342, "step": 17776 }, { "epoch": 2.5101666195989836, "grad_norm": 3.5547676888308466, "learning_rate": 1.3660342389017466e-06, "loss": 0.1567, "step": 17777 }, { "epoch": 2.5103078226489695, "grad_norm": 3.4652555477147984, "learning_rate": 1.365265213276905e-06, "loss": 0.1896, "step": 17778 }, { "epoch": 2.5104490256989553, "grad_norm": 3.8968470072630126, "learning_rate": 1.3644963883195716e-06, "loss": 0.1723, "step": 17779 }, { "epoch": 2.510590228748941, "grad_norm": 2.6756036320989938, "learning_rate": 1.363727764047612e-06, "loss": 0.1274, "step": 17780 }, { "epoch": 2.510731431798927, "grad_norm": 3.1059373548846447, "learning_rate": 1.36295934047889e-06, "loss": 0.1267, "step": 17781 }, { "epoch": 2.510872634848913, "grad_norm": 3.7443498129390207, "learning_rate": 1.362191117631263e-06, "loss": 0.1433, "step": 17782 }, { "epoch": 2.511013837898899, "grad_norm": 2.5140585181932353, "learning_rate": 1.3614230955225817e-06, "loss": 0.1048, "step": 17783 }, { "epoch": 2.5111550409488848, "grad_norm": 3.2964762220893173, "learning_rate": 1.360655274170698e-06, "loss": 0.1446, "step": 17784 }, { "epoch": 2.5112962439988706, "grad_norm": 3.3729307613321304, "learning_rate": 1.359887653593458e-06, "loss": 0.1973, "step": 17785 }, { "epoch": 2.5114374470488565, "grad_norm": 3.078843701808194, "learning_rate": 1.359120233808695e-06, "loss": 0.145, "step": 17786 }, { "epoch": 2.5115786500988424, "grad_norm": 3.759164441460674, "learning_rate": 1.3583530148342461e-06, "loss": 0.1508, "step": 17787 }, { "epoch": 2.5117198531488283, "grad_norm": 2.920605521794752, "learning_rate": 1.357585996687939e-06, "loss": 0.1237, "step": 17788 }, { "epoch": 2.511861056198814, "grad_norm": 3.675097491352447, "learning_rate": 1.356819179387604e-06, "loss": 0.181, "step": 17789 }, { "epoch": 2.5120022592488, "grad_norm": 3.014531669693543, "learning_rate": 1.3560525629510567e-06, "loss": 0.1384, "step": 17790 }, { "epoch": 2.5121434622987855, "grad_norm": 2.7500545920779804, "learning_rate": 1.3552861473961164e-06, "loss": 0.1164, "step": 17791 }, { "epoch": 2.5122846653487714, "grad_norm": 3.0599655268110935, "learning_rate": 1.3545199327405922e-06, "loss": 0.1478, "step": 17792 }, { "epoch": 2.5124258683987573, "grad_norm": 2.816875012272122, "learning_rate": 1.3537539190022909e-06, "loss": 0.1443, "step": 17793 }, { "epoch": 2.512567071448743, "grad_norm": 3.8263027919260573, "learning_rate": 1.3529881061990147e-06, "loss": 0.1767, "step": 17794 }, { "epoch": 2.512708274498729, "grad_norm": 2.4378935489279105, "learning_rate": 1.3522224943485606e-06, "loss": 0.1169, "step": 17795 }, { "epoch": 2.512849477548715, "grad_norm": 2.672608408610488, "learning_rate": 1.3514570834687203e-06, "loss": 0.1461, "step": 17796 }, { "epoch": 2.512990680598701, "grad_norm": 2.981794932241918, "learning_rate": 1.3506918735772833e-06, "loss": 0.1332, "step": 17797 }, { "epoch": 2.5131318836486867, "grad_norm": 3.6424684184609992, "learning_rate": 1.3499268646920317e-06, "loss": 0.1237, "step": 17798 }, { "epoch": 2.5132730866986726, "grad_norm": 3.293564571589729, "learning_rate": 1.349162056830744e-06, "loss": 0.1212, "step": 17799 }, { "epoch": 2.5134142897486584, "grad_norm": 2.7906831206007308, "learning_rate": 1.3483974500111907e-06, "loss": 0.1497, "step": 17800 }, { "epoch": 2.5135554927986443, "grad_norm": 2.904687114161842, "learning_rate": 1.3476330442511476e-06, "loss": 0.1431, "step": 17801 }, { "epoch": 2.51369669584863, "grad_norm": 3.3867467954904678, "learning_rate": 1.3468688395683783e-06, "loss": 0.1601, "step": 17802 }, { "epoch": 2.513837898898616, "grad_norm": 2.7875826630949203, "learning_rate": 1.3461048359806384e-06, "loss": 0.1174, "step": 17803 }, { "epoch": 2.513979101948602, "grad_norm": 3.306422385859353, "learning_rate": 1.3453410335056837e-06, "loss": 0.1584, "step": 17804 }, { "epoch": 2.514120304998588, "grad_norm": 2.689681870730258, "learning_rate": 1.3445774321612637e-06, "loss": 0.1318, "step": 17805 }, { "epoch": 2.5142615080485737, "grad_norm": 2.691615647805514, "learning_rate": 1.3438140319651283e-06, "loss": 0.161, "step": 17806 }, { "epoch": 2.5144027110985596, "grad_norm": 3.1359956681233316, "learning_rate": 1.3430508329350166e-06, "loss": 0.135, "step": 17807 }, { "epoch": 2.5145439141485455, "grad_norm": 3.020221122671316, "learning_rate": 1.3422878350886658e-06, "loss": 0.1586, "step": 17808 }, { "epoch": 2.5146851171985314, "grad_norm": 3.883703848195002, "learning_rate": 1.341525038443806e-06, "loss": 0.1527, "step": 17809 }, { "epoch": 2.5148263202485173, "grad_norm": 3.5719676440572763, "learning_rate": 1.3407624430181644e-06, "loss": 0.1609, "step": 17810 }, { "epoch": 2.514967523298503, "grad_norm": 3.5975357001848316, "learning_rate": 1.3400000488294651e-06, "loss": 0.2061, "step": 17811 }, { "epoch": 2.515108726348489, "grad_norm": 4.417238649831551, "learning_rate": 1.3392378558954233e-06, "loss": 0.1952, "step": 17812 }, { "epoch": 2.515249929398475, "grad_norm": 3.3287719425128612, "learning_rate": 1.3384758642337547e-06, "loss": 0.1723, "step": 17813 }, { "epoch": 2.515391132448461, "grad_norm": 2.5588965650273945, "learning_rate": 1.3377140738621663e-06, "loss": 0.1251, "step": 17814 }, { "epoch": 2.5155323354984467, "grad_norm": 2.3893668428827897, "learning_rate": 1.3369524847983617e-06, "loss": 0.1166, "step": 17815 }, { "epoch": 2.5156735385484326, "grad_norm": 3.0073237600303133, "learning_rate": 1.33619109706004e-06, "loss": 0.1258, "step": 17816 }, { "epoch": 2.5158147415984184, "grad_norm": 3.8270897914115145, "learning_rate": 1.3354299106648927e-06, "loss": 0.1705, "step": 17817 }, { "epoch": 2.5159559446484043, "grad_norm": 2.865850416565983, "learning_rate": 1.3346689256306155e-06, "loss": 0.1218, "step": 17818 }, { "epoch": 2.51609714769839, "grad_norm": 3.043978326866323, "learning_rate": 1.3339081419748922e-06, "loss": 0.1358, "step": 17819 }, { "epoch": 2.516238350748376, "grad_norm": 3.1790148414003787, "learning_rate": 1.3331475597153988e-06, "loss": 0.1516, "step": 17820 }, { "epoch": 2.516379553798362, "grad_norm": 3.159448421592378, "learning_rate": 1.3323871788698129e-06, "loss": 0.1603, "step": 17821 }, { "epoch": 2.516520756848348, "grad_norm": 3.3134719346330654, "learning_rate": 1.331626999455804e-06, "loss": 0.117, "step": 17822 }, { "epoch": 2.5166619598983337, "grad_norm": 2.9465247194714013, "learning_rate": 1.3308670214910413e-06, "loss": 0.1309, "step": 17823 }, { "epoch": 2.5168031629483196, "grad_norm": 3.5150496964863995, "learning_rate": 1.3301072449931862e-06, "loss": 0.1269, "step": 17824 }, { "epoch": 2.5169443659983055, "grad_norm": 3.090989502713918, "learning_rate": 1.3293476699798936e-06, "loss": 0.1529, "step": 17825 }, { "epoch": 2.5170855690482914, "grad_norm": 2.8104367727431416, "learning_rate": 1.3285882964688168e-06, "loss": 0.135, "step": 17826 }, { "epoch": 2.5172267720982773, "grad_norm": 2.3917480967527753, "learning_rate": 1.3278291244776042e-06, "loss": 0.109, "step": 17827 }, { "epoch": 2.517367975148263, "grad_norm": 3.590218447312786, "learning_rate": 1.3270701540238962e-06, "loss": 0.1638, "step": 17828 }, { "epoch": 2.517509178198249, "grad_norm": 2.898975789987712, "learning_rate": 1.326311385125333e-06, "loss": 0.1576, "step": 17829 }, { "epoch": 2.517650381248235, "grad_norm": 3.3151279428839793, "learning_rate": 1.325552817799547e-06, "loss": 0.1703, "step": 17830 }, { "epoch": 2.517791584298221, "grad_norm": 3.170275309793015, "learning_rate": 1.3247944520641676e-06, "loss": 0.1431, "step": 17831 }, { "epoch": 2.5179327873482067, "grad_norm": 2.872940723178655, "learning_rate": 1.3240362879368184e-06, "loss": 0.139, "step": 17832 }, { "epoch": 2.5180739903981926, "grad_norm": 3.361711937741, "learning_rate": 1.3232783254351189e-06, "loss": 0.1424, "step": 17833 }, { "epoch": 2.5182151934481785, "grad_norm": 2.980034823747822, "learning_rate": 1.3225205645766815e-06, "loss": 0.1347, "step": 17834 }, { "epoch": 2.5183563964981643, "grad_norm": 3.774093300491614, "learning_rate": 1.3217630053791209e-06, "loss": 0.1905, "step": 17835 }, { "epoch": 2.5184975995481502, "grad_norm": 3.095581223696831, "learning_rate": 1.3210056478600431e-06, "loss": 0.1447, "step": 17836 }, { "epoch": 2.518638802598136, "grad_norm": 3.308932938084319, "learning_rate": 1.3202484920370429e-06, "loss": 0.1341, "step": 17837 }, { "epoch": 2.518780005648122, "grad_norm": 3.2545220217658373, "learning_rate": 1.3194915379277195e-06, "loss": 0.1302, "step": 17838 }, { "epoch": 2.518921208698108, "grad_norm": 3.164203567260968, "learning_rate": 1.3187347855496624e-06, "loss": 0.1399, "step": 17839 }, { "epoch": 2.5190624117480938, "grad_norm": 3.0568965598678624, "learning_rate": 1.3179782349204618e-06, "loss": 0.1027, "step": 17840 }, { "epoch": 2.5192036147980796, "grad_norm": 3.1524694720792077, "learning_rate": 1.3172218860576968e-06, "loss": 0.1781, "step": 17841 }, { "epoch": 2.5193448178480655, "grad_norm": 2.218604617404965, "learning_rate": 1.3164657389789459e-06, "loss": 0.1225, "step": 17842 }, { "epoch": 2.5194860208980514, "grad_norm": 3.654881000488937, "learning_rate": 1.3157097937017804e-06, "loss": 0.1631, "step": 17843 }, { "epoch": 2.5196272239480373, "grad_norm": 2.7272340935657624, "learning_rate": 1.314954050243772e-06, "loss": 0.1181, "step": 17844 }, { "epoch": 2.519768426998023, "grad_norm": 3.437392178145744, "learning_rate": 1.3141985086224751e-06, "loss": 0.1702, "step": 17845 }, { "epoch": 2.519909630048009, "grad_norm": 2.4036414645486817, "learning_rate": 1.3134431688554572e-06, "loss": 0.1085, "step": 17846 }, { "epoch": 2.520050833097995, "grad_norm": 3.776039234562146, "learning_rate": 1.3126880309602674e-06, "loss": 0.1906, "step": 17847 }, { "epoch": 2.520192036147981, "grad_norm": 3.0893740787679684, "learning_rate": 1.3119330949544573e-06, "loss": 0.1337, "step": 17848 }, { "epoch": 2.5203332391979667, "grad_norm": 3.579956223361852, "learning_rate": 1.3111783608555695e-06, "loss": 0.1684, "step": 17849 }, { "epoch": 2.5204744422479526, "grad_norm": 2.8015465770148054, "learning_rate": 1.3104238286811433e-06, "loss": 0.1164, "step": 17850 }, { "epoch": 2.5206156452979385, "grad_norm": 3.0556484374696096, "learning_rate": 1.3096694984487134e-06, "loss": 0.109, "step": 17851 }, { "epoch": 2.5207568483479244, "grad_norm": 3.6848313578823038, "learning_rate": 1.3089153701758128e-06, "loss": 0.1515, "step": 17852 }, { "epoch": 2.5208980513979102, "grad_norm": 3.5538462746564656, "learning_rate": 1.3081614438799684e-06, "loss": 0.1413, "step": 17853 }, { "epoch": 2.521039254447896, "grad_norm": 2.613997912471486, "learning_rate": 1.307407719578696e-06, "loss": 0.1581, "step": 17854 }, { "epoch": 2.521180457497882, "grad_norm": 3.1105543243159373, "learning_rate": 1.306654197289514e-06, "loss": 0.1515, "step": 17855 }, { "epoch": 2.521321660547868, "grad_norm": 3.336654300983237, "learning_rate": 1.305900877029932e-06, "loss": 0.1492, "step": 17856 }, { "epoch": 2.5214628635978538, "grad_norm": 3.310056258786906, "learning_rate": 1.305147758817461e-06, "loss": 0.1251, "step": 17857 }, { "epoch": 2.5216040666478396, "grad_norm": 3.39931861375549, "learning_rate": 1.3043948426696019e-06, "loss": 0.1536, "step": 17858 }, { "epoch": 2.5217452696978255, "grad_norm": 3.420519212846632, "learning_rate": 1.3036421286038502e-06, "loss": 0.1469, "step": 17859 }, { "epoch": 2.5218864727478114, "grad_norm": 3.0318853896333993, "learning_rate": 1.3028896166377003e-06, "loss": 0.1259, "step": 17860 }, { "epoch": 2.5220276757977973, "grad_norm": 2.8629541730566093, "learning_rate": 1.3021373067886423e-06, "loss": 0.1231, "step": 17861 }, { "epoch": 2.522168878847783, "grad_norm": 4.126518572045825, "learning_rate": 1.301385199074151e-06, "loss": 0.1698, "step": 17862 }, { "epoch": 2.522310081897769, "grad_norm": 2.9011912505895676, "learning_rate": 1.3006332935117149e-06, "loss": 0.1382, "step": 17863 }, { "epoch": 2.522451284947755, "grad_norm": 2.755854095489973, "learning_rate": 1.2998815901188033e-06, "loss": 0.1472, "step": 17864 }, { "epoch": 2.522592487997741, "grad_norm": 3.540766189193318, "learning_rate": 1.2991300889128867e-06, "loss": 0.1453, "step": 17865 }, { "epoch": 2.5227336910477267, "grad_norm": 3.0412141638485504, "learning_rate": 1.2983787899114286e-06, "loss": 0.134, "step": 17866 }, { "epoch": 2.5228748940977126, "grad_norm": 3.334270044690637, "learning_rate": 1.2976276931318899e-06, "loss": 0.1518, "step": 17867 }, { "epoch": 2.5230160971476985, "grad_norm": 3.7472718444192927, "learning_rate": 1.296876798591723e-06, "loss": 0.148, "step": 17868 }, { "epoch": 2.5231573001976844, "grad_norm": 2.504443856369491, "learning_rate": 1.296126106308383e-06, "loss": 0.0945, "step": 17869 }, { "epoch": 2.5232985032476702, "grad_norm": 4.586699043844658, "learning_rate": 1.2953756162993158e-06, "loss": 0.1783, "step": 17870 }, { "epoch": 2.523439706297656, "grad_norm": 3.1535144873287484, "learning_rate": 1.2946253285819576e-06, "loss": 0.1593, "step": 17871 }, { "epoch": 2.523580909347642, "grad_norm": 2.5678730355108965, "learning_rate": 1.2938752431737467e-06, "loss": 0.133, "step": 17872 }, { "epoch": 2.523722112397628, "grad_norm": 2.501959820838591, "learning_rate": 1.2931253600921157e-06, "loss": 0.1191, "step": 17873 }, { "epoch": 2.5238633154476138, "grad_norm": 3.5755679798366873, "learning_rate": 1.2923756793544895e-06, "loss": 0.1532, "step": 17874 }, { "epoch": 2.5240045184975997, "grad_norm": 4.333123912382137, "learning_rate": 1.2916262009782932e-06, "loss": 0.207, "step": 17875 }, { "epoch": 2.5241457215475855, "grad_norm": 3.130525655914661, "learning_rate": 1.290876924980944e-06, "loss": 0.1239, "step": 17876 }, { "epoch": 2.5242869245975714, "grad_norm": 2.9370417061050134, "learning_rate": 1.2901278513798533e-06, "loss": 0.1425, "step": 17877 }, { "epoch": 2.5244281276475573, "grad_norm": 3.0114264811866565, "learning_rate": 1.2893789801924328e-06, "loss": 0.157, "step": 17878 }, { "epoch": 2.524569330697543, "grad_norm": 3.074452252281101, "learning_rate": 1.2886303114360777e-06, "loss": 0.1223, "step": 17879 }, { "epoch": 2.524710533747529, "grad_norm": 2.638178710052066, "learning_rate": 1.2878818451281939e-06, "loss": 0.1154, "step": 17880 }, { "epoch": 2.524851736797515, "grad_norm": 3.324860966289068, "learning_rate": 1.287133581286174e-06, "loss": 0.1336, "step": 17881 }, { "epoch": 2.524992939847501, "grad_norm": 3.8958962065012592, "learning_rate": 1.2863855199274079e-06, "loss": 0.1306, "step": 17882 }, { "epoch": 2.5251341428974867, "grad_norm": 3.833048029144912, "learning_rate": 1.2856376610692777e-06, "loss": 0.1737, "step": 17883 }, { "epoch": 2.5252753459474726, "grad_norm": 3.924621752457308, "learning_rate": 1.2848900047291657e-06, "loss": 0.1688, "step": 17884 }, { "epoch": 2.5254165489974585, "grad_norm": 2.9610364217986698, "learning_rate": 1.2841425509244453e-06, "loss": 0.1307, "step": 17885 }, { "epoch": 2.5255577520474444, "grad_norm": 3.1188489589227886, "learning_rate": 1.2833952996724864e-06, "loss": 0.1339, "step": 17886 }, { "epoch": 2.5256989550974303, "grad_norm": 2.9292128683730243, "learning_rate": 1.2826482509906613e-06, "loss": 0.1261, "step": 17887 }, { "epoch": 2.525840158147416, "grad_norm": 2.845658638002225, "learning_rate": 1.281901404896323e-06, "loss": 0.1251, "step": 17888 }, { "epoch": 2.525981361197402, "grad_norm": 3.04497922687233, "learning_rate": 1.281154761406831e-06, "loss": 0.143, "step": 17889 }, { "epoch": 2.526122564247388, "grad_norm": 3.214037483231397, "learning_rate": 1.280408320539538e-06, "loss": 0.1425, "step": 17890 }, { "epoch": 2.526263767297374, "grad_norm": 2.908666696091374, "learning_rate": 1.2796620823117866e-06, "loss": 0.1489, "step": 17891 }, { "epoch": 2.5264049703473597, "grad_norm": 4.105252374008186, "learning_rate": 1.2789160467409244e-06, "loss": 0.1695, "step": 17892 }, { "epoch": 2.526546173397345, "grad_norm": 3.812597119509838, "learning_rate": 1.2781702138442874e-06, "loss": 0.1485, "step": 17893 }, { "epoch": 2.526687376447331, "grad_norm": 3.307336179256453, "learning_rate": 1.2774245836392085e-06, "loss": 0.1569, "step": 17894 }, { "epoch": 2.526828579497317, "grad_norm": 3.1161358727169888, "learning_rate": 1.276679156143017e-06, "loss": 0.1286, "step": 17895 }, { "epoch": 2.5269697825473028, "grad_norm": 3.2250704341595804, "learning_rate": 1.2759339313730302e-06, "loss": 0.1671, "step": 17896 }, { "epoch": 2.5271109855972886, "grad_norm": 2.726062067820866, "learning_rate": 1.275188909346573e-06, "loss": 0.1082, "step": 17897 }, { "epoch": 2.5272521886472745, "grad_norm": 2.90972061794221, "learning_rate": 1.2744440900809584e-06, "loss": 0.1507, "step": 17898 }, { "epoch": 2.5273933916972604, "grad_norm": 2.4721872594343988, "learning_rate": 1.2736994735934949e-06, "loss": 0.0995, "step": 17899 }, { "epoch": 2.5275345947472463, "grad_norm": 2.827791874576526, "learning_rate": 1.2729550599014862e-06, "loss": 0.1883, "step": 17900 }, { "epoch": 2.527675797797232, "grad_norm": 4.156221819471408, "learning_rate": 1.272210849022234e-06, "loss": 0.161, "step": 17901 }, { "epoch": 2.527817000847218, "grad_norm": 3.38930211263252, "learning_rate": 1.2714668409730312e-06, "loss": 0.1453, "step": 17902 }, { "epoch": 2.527958203897204, "grad_norm": 3.5936047884381765, "learning_rate": 1.2707230357711686e-06, "loss": 0.1566, "step": 17903 }, { "epoch": 2.52809940694719, "grad_norm": 3.4384232930137233, "learning_rate": 1.2699794334339356e-06, "loss": 0.1282, "step": 17904 }, { "epoch": 2.5282406099971757, "grad_norm": 3.0540859944531817, "learning_rate": 1.2692360339786092e-06, "loss": 0.1287, "step": 17905 }, { "epoch": 2.5283818130471616, "grad_norm": 2.9732653266311564, "learning_rate": 1.268492837422467e-06, "loss": 0.1593, "step": 17906 }, { "epoch": 2.5285230160971475, "grad_norm": 3.0226697379079894, "learning_rate": 1.2677498437827796e-06, "loss": 0.1305, "step": 17907 }, { "epoch": 2.5286642191471334, "grad_norm": 3.325004731993773, "learning_rate": 1.2670070530768131e-06, "loss": 0.1619, "step": 17908 }, { "epoch": 2.5288054221971192, "grad_norm": 3.657026739442091, "learning_rate": 1.2662644653218336e-06, "loss": 0.1876, "step": 17909 }, { "epoch": 2.528946625247105, "grad_norm": 3.081597299036925, "learning_rate": 1.2655220805350953e-06, "loss": 0.1153, "step": 17910 }, { "epoch": 2.529087828297091, "grad_norm": 3.171658293877184, "learning_rate": 1.2647798987338523e-06, "loss": 0.1323, "step": 17911 }, { "epoch": 2.529229031347077, "grad_norm": 3.0492145281410084, "learning_rate": 1.2640379199353536e-06, "loss": 0.1315, "step": 17912 }, { "epoch": 2.5293702343970628, "grad_norm": 3.6350764348370914, "learning_rate": 1.263296144156837e-06, "loss": 0.1426, "step": 17913 }, { "epoch": 2.5295114374470486, "grad_norm": 3.7049718902194035, "learning_rate": 1.2625545714155474e-06, "loss": 0.1582, "step": 17914 }, { "epoch": 2.5296526404970345, "grad_norm": 3.0134031011172833, "learning_rate": 1.2618132017287154e-06, "loss": 0.1212, "step": 17915 }, { "epoch": 2.5297938435470204, "grad_norm": 3.183295819096301, "learning_rate": 1.2610720351135718e-06, "loss": 0.1276, "step": 17916 }, { "epoch": 2.5299350465970063, "grad_norm": 2.990624145301635, "learning_rate": 1.2603310715873396e-06, "loss": 0.1386, "step": 17917 }, { "epoch": 2.530076249646992, "grad_norm": 3.2218574780251115, "learning_rate": 1.259590311167238e-06, "loss": 0.1351, "step": 17918 }, { "epoch": 2.530217452696978, "grad_norm": 4.022017394200225, "learning_rate": 1.2588497538704836e-06, "loss": 0.1584, "step": 17919 }, { "epoch": 2.530358655746964, "grad_norm": 3.574689234653763, "learning_rate": 1.2581093997142846e-06, "loss": 0.1882, "step": 17920 }, { "epoch": 2.53049985879695, "grad_norm": 3.565516823029238, "learning_rate": 1.2573692487158507e-06, "loss": 0.1819, "step": 17921 }, { "epoch": 2.5306410618469357, "grad_norm": 2.7373813086691934, "learning_rate": 1.256629300892379e-06, "loss": 0.1216, "step": 17922 }, { "epoch": 2.5307822648969216, "grad_norm": 2.944383859194827, "learning_rate": 1.2558895562610652e-06, "loss": 0.1284, "step": 17923 }, { "epoch": 2.5309234679469075, "grad_norm": 3.337290051686374, "learning_rate": 1.2551500148391026e-06, "loss": 0.1565, "step": 17924 }, { "epoch": 2.5310646709968934, "grad_norm": 3.4279352818867084, "learning_rate": 1.2544106766436747e-06, "loss": 0.1437, "step": 17925 }, { "epoch": 2.5312058740468792, "grad_norm": 3.3104953242678987, "learning_rate": 1.2536715416919676e-06, "loss": 0.1623, "step": 17926 }, { "epoch": 2.531347077096865, "grad_norm": 3.2516262132200473, "learning_rate": 1.2529326100011575e-06, "loss": 0.1588, "step": 17927 }, { "epoch": 2.531488280146851, "grad_norm": 3.2735550571650567, "learning_rate": 1.252193881588415e-06, "loss": 0.1121, "step": 17928 }, { "epoch": 2.531629483196837, "grad_norm": 2.9386585500832765, "learning_rate": 1.2514553564709108e-06, "loss": 0.1513, "step": 17929 }, { "epoch": 2.5317706862468228, "grad_norm": 3.8575094432437766, "learning_rate": 1.2507170346658027e-06, "loss": 0.1628, "step": 17930 }, { "epoch": 2.5319118892968087, "grad_norm": 3.68098040739097, "learning_rate": 1.2499789161902532e-06, "loss": 0.2038, "step": 17931 }, { "epoch": 2.5320530923467945, "grad_norm": 3.916451549711646, "learning_rate": 1.2492410010614154e-06, "loss": 0.2006, "step": 17932 }, { "epoch": 2.5321942953967804, "grad_norm": 4.213964961682608, "learning_rate": 1.2485032892964378e-06, "loss": 0.2163, "step": 17933 }, { "epoch": 2.5323354984467663, "grad_norm": 3.3735887402905838, "learning_rate": 1.2477657809124632e-06, "loss": 0.145, "step": 17934 }, { "epoch": 2.532476701496752, "grad_norm": 3.001783780144544, "learning_rate": 1.2470284759266339e-06, "loss": 0.127, "step": 17935 }, { "epoch": 2.532617904546738, "grad_norm": 3.329448586744464, "learning_rate": 1.246291374356081e-06, "loss": 0.1341, "step": 17936 }, { "epoch": 2.532759107596724, "grad_norm": 3.2206152528201994, "learning_rate": 1.245554476217935e-06, "loss": 0.1413, "step": 17937 }, { "epoch": 2.53290031064671, "grad_norm": 3.7621965677205145, "learning_rate": 1.244817781529326e-06, "loss": 0.1651, "step": 17938 }, { "epoch": 2.5330415136966957, "grad_norm": 2.8096233127634243, "learning_rate": 1.2440812903073685e-06, "loss": 0.1262, "step": 17939 }, { "epoch": 2.5331827167466816, "grad_norm": 3.7880928687248105, "learning_rate": 1.2433450025691807e-06, "loss": 0.201, "step": 17940 }, { "epoch": 2.5333239197966675, "grad_norm": 3.512445285956427, "learning_rate": 1.2426089183318736e-06, "loss": 0.1564, "step": 17941 }, { "epoch": 2.5334651228466534, "grad_norm": 3.832429262458635, "learning_rate": 1.24187303761255e-06, "loss": 0.2013, "step": 17942 }, { "epoch": 2.5336063258966393, "grad_norm": 3.1646061846412508, "learning_rate": 1.2411373604283173e-06, "loss": 0.1699, "step": 17943 }, { "epoch": 2.533747528946625, "grad_norm": 3.5536167828080103, "learning_rate": 1.2404018867962697e-06, "loss": 0.1769, "step": 17944 }, { "epoch": 2.533888731996611, "grad_norm": 3.6389528998608354, "learning_rate": 1.2396666167335002e-06, "loss": 0.2009, "step": 17945 }, { "epoch": 2.534029935046597, "grad_norm": 2.943832723543698, "learning_rate": 1.2389315502570965e-06, "loss": 0.1168, "step": 17946 }, { "epoch": 2.534171138096583, "grad_norm": 3.507909220029245, "learning_rate": 1.2381966873841377e-06, "loss": 0.1412, "step": 17947 }, { "epoch": 2.5343123411465687, "grad_norm": 2.9891693906352956, "learning_rate": 1.2374620281317019e-06, "loss": 0.1749, "step": 17948 }, { "epoch": 2.5344535441965546, "grad_norm": 3.30558816419717, "learning_rate": 1.236727572516867e-06, "loss": 0.1495, "step": 17949 }, { "epoch": 2.5345947472465404, "grad_norm": 2.4361804430501803, "learning_rate": 1.2359933205566987e-06, "loss": 0.1002, "step": 17950 }, { "epoch": 2.5347359502965263, "grad_norm": 3.14070621259883, "learning_rate": 1.23525927226826e-06, "loss": 0.1449, "step": 17951 }, { "epoch": 2.534877153346512, "grad_norm": 3.4048775836978646, "learning_rate": 1.2345254276686114e-06, "loss": 0.1873, "step": 17952 }, { "epoch": 2.535018356396498, "grad_norm": 2.872671392855132, "learning_rate": 1.233791786774805e-06, "loss": 0.1309, "step": 17953 }, { "epoch": 2.535159559446484, "grad_norm": 2.453820147212221, "learning_rate": 1.2330583496038929e-06, "loss": 0.1124, "step": 17954 }, { "epoch": 2.53530076249647, "grad_norm": 3.299496187608401, "learning_rate": 1.232325116172919e-06, "loss": 0.1328, "step": 17955 }, { "epoch": 2.5354419655464557, "grad_norm": 2.8010480907765274, "learning_rate": 1.2315920864989218e-06, "loss": 0.1413, "step": 17956 }, { "epoch": 2.5355831685964416, "grad_norm": 3.3913207111764856, "learning_rate": 1.2308592605989378e-06, "loss": 0.1528, "step": 17957 }, { "epoch": 2.5357243716464275, "grad_norm": 4.269614184310873, "learning_rate": 1.230126638489998e-06, "loss": 0.164, "step": 17958 }, { "epoch": 2.5358655746964134, "grad_norm": 3.1456667558458546, "learning_rate": 1.2293942201891275e-06, "loss": 0.1417, "step": 17959 }, { "epoch": 2.5360067777463993, "grad_norm": 2.616921534103586, "learning_rate": 1.2286620057133459e-06, "loss": 0.1277, "step": 17960 }, { "epoch": 2.536147980796385, "grad_norm": 3.5435002934719284, "learning_rate": 1.227929995079673e-06, "loss": 0.1559, "step": 17961 }, { "epoch": 2.536289183846371, "grad_norm": 2.499789768934979, "learning_rate": 1.2271981883051187e-06, "loss": 0.1076, "step": 17962 }, { "epoch": 2.536430386896357, "grad_norm": 2.8052997226549268, "learning_rate": 1.2264665854066915e-06, "loss": 0.1169, "step": 17963 }, { "epoch": 2.536571589946343, "grad_norm": 3.467757325058574, "learning_rate": 1.22573518640139e-06, "loss": 0.1349, "step": 17964 }, { "epoch": 2.5367127929963287, "grad_norm": 3.149765665791565, "learning_rate": 1.2250039913062118e-06, "loss": 0.1581, "step": 17965 }, { "epoch": 2.5368539960463146, "grad_norm": 3.2217024430721994, "learning_rate": 1.2242730001381532e-06, "loss": 0.1526, "step": 17966 }, { "epoch": 2.5369951990963004, "grad_norm": 2.6459250832005488, "learning_rate": 1.2235422129141993e-06, "loss": 0.1272, "step": 17967 }, { "epoch": 2.5371364021462863, "grad_norm": 3.563561349237218, "learning_rate": 1.2228116296513348e-06, "loss": 0.1554, "step": 17968 }, { "epoch": 2.537277605196272, "grad_norm": 2.8949328197456135, "learning_rate": 1.2220812503665369e-06, "loss": 0.1592, "step": 17969 }, { "epoch": 2.537418808246258, "grad_norm": 4.090879771299867, "learning_rate": 1.221351075076781e-06, "loss": 0.2142, "step": 17970 }, { "epoch": 2.537560011296244, "grad_norm": 3.3000035737741467, "learning_rate": 1.2206211037990346e-06, "loss": 0.159, "step": 17971 }, { "epoch": 2.53770121434623, "grad_norm": 3.28890129834144, "learning_rate": 1.2198913365502606e-06, "loss": 0.1574, "step": 17972 }, { "epoch": 2.5378424173962157, "grad_norm": 3.6398646237433656, "learning_rate": 1.2191617733474214e-06, "loss": 0.1599, "step": 17973 }, { "epoch": 2.5379836204462016, "grad_norm": 3.6789510764510833, "learning_rate": 1.218432414207471e-06, "loss": 0.1407, "step": 17974 }, { "epoch": 2.5381248234961875, "grad_norm": 2.397577175634883, "learning_rate": 1.2177032591473582e-06, "loss": 0.1011, "step": 17975 }, { "epoch": 2.5382660265461734, "grad_norm": 3.037995309263503, "learning_rate": 1.216974308184029e-06, "loss": 0.1209, "step": 17976 }, { "epoch": 2.5384072295961593, "grad_norm": 3.3815067449837555, "learning_rate": 1.2162455613344214e-06, "loss": 0.1664, "step": 17977 }, { "epoch": 2.538548432646145, "grad_norm": 3.726942759271576, "learning_rate": 1.2155170186154753e-06, "loss": 0.1893, "step": 17978 }, { "epoch": 2.538689635696131, "grad_norm": 2.941641481621435, "learning_rate": 1.2147886800441211e-06, "loss": 0.1424, "step": 17979 }, { "epoch": 2.538830838746117, "grad_norm": 3.6561841344733628, "learning_rate": 1.2140605456372856e-06, "loss": 0.1865, "step": 17980 }, { "epoch": 2.538972041796103, "grad_norm": 3.7497889684476595, "learning_rate": 1.2133326154118862e-06, "loss": 0.1693, "step": 17981 }, { "epoch": 2.5391132448460887, "grad_norm": 2.559632452741716, "learning_rate": 1.2126048893848396e-06, "loss": 0.1226, "step": 17982 }, { "epoch": 2.5392544478960746, "grad_norm": 3.674432623228928, "learning_rate": 1.2118773675730633e-06, "loss": 0.18, "step": 17983 }, { "epoch": 2.5393956509460605, "grad_norm": 2.69022657877782, "learning_rate": 1.2111500499934613e-06, "loss": 0.1137, "step": 17984 }, { "epoch": 2.5395368539960463, "grad_norm": 2.2270626533320605, "learning_rate": 1.2104229366629372e-06, "loss": 0.1012, "step": 17985 }, { "epoch": 2.5396780570460322, "grad_norm": 3.1877022658913474, "learning_rate": 1.2096960275983872e-06, "loss": 0.1197, "step": 17986 }, { "epoch": 2.539819260096018, "grad_norm": 3.2463164643972675, "learning_rate": 1.2089693228167054e-06, "loss": 0.1323, "step": 17987 }, { "epoch": 2.539960463146004, "grad_norm": 3.298669733793099, "learning_rate": 1.208242822334781e-06, "loss": 0.1598, "step": 17988 }, { "epoch": 2.54010166619599, "grad_norm": 2.8560693585959607, "learning_rate": 1.2075165261694954e-06, "loss": 0.1217, "step": 17989 }, { "epoch": 2.5402428692459758, "grad_norm": 3.8388646052403694, "learning_rate": 1.206790434337729e-06, "loss": 0.1884, "step": 17990 }, { "epoch": 2.5403840722959616, "grad_norm": 3.1679835728061323, "learning_rate": 1.206064546856356e-06, "loss": 0.1321, "step": 17991 }, { "epoch": 2.5405252753459475, "grad_norm": 2.9902272323883987, "learning_rate": 1.2053388637422437e-06, "loss": 0.1377, "step": 17992 }, { "epoch": 2.5406664783959334, "grad_norm": 3.245041660848904, "learning_rate": 1.2046133850122587e-06, "loss": 0.1376, "step": 17993 }, { "epoch": 2.5408076814459193, "grad_norm": 2.630845905352137, "learning_rate": 1.203888110683259e-06, "loss": 0.1008, "step": 17994 }, { "epoch": 2.540948884495905, "grad_norm": 2.65815234242837, "learning_rate": 1.2031630407721018e-06, "loss": 0.1148, "step": 17995 }, { "epoch": 2.541090087545891, "grad_norm": 2.710279901800701, "learning_rate": 1.2024381752956372e-06, "loss": 0.1031, "step": 17996 }, { "epoch": 2.541231290595877, "grad_norm": 2.942529364963399, "learning_rate": 1.2017135142707115e-06, "loss": 0.1428, "step": 17997 }, { "epoch": 2.541372493645863, "grad_norm": 3.320646235592123, "learning_rate": 1.2009890577141625e-06, "loss": 0.1702, "step": 17998 }, { "epoch": 2.5415136966958487, "grad_norm": 3.316460452522499, "learning_rate": 1.2002648056428257e-06, "loss": 0.1405, "step": 17999 }, { "epoch": 2.5416548997458346, "grad_norm": 3.0692207464299903, "learning_rate": 1.1995407580735364e-06, "loss": 0.1505, "step": 18000 }, { "epoch": 2.5417961027958205, "grad_norm": 2.6537113515097777, "learning_rate": 1.1988169150231188e-06, "loss": 0.1237, "step": 18001 }, { "epoch": 2.5419373058458063, "grad_norm": 3.197743118005143, "learning_rate": 1.1980932765083964e-06, "loss": 0.1418, "step": 18002 }, { "epoch": 2.5420785088957922, "grad_norm": 3.8095796529863266, "learning_rate": 1.1973698425461832e-06, "loss": 0.1572, "step": 18003 }, { "epoch": 2.542219711945778, "grad_norm": 3.271490336134001, "learning_rate": 1.196646613153295e-06, "loss": 0.1694, "step": 18004 }, { "epoch": 2.542360914995764, "grad_norm": 2.5480965960182638, "learning_rate": 1.195923588346537e-06, "loss": 0.1154, "step": 18005 }, { "epoch": 2.54250211804575, "grad_norm": 3.7332051020000576, "learning_rate": 1.1952007681427124e-06, "loss": 0.1586, "step": 18006 }, { "epoch": 2.5426433210957358, "grad_norm": 3.4506593120840336, "learning_rate": 1.1944781525586192e-06, "loss": 0.1775, "step": 18007 }, { "epoch": 2.5427845241457216, "grad_norm": 3.262620531093886, "learning_rate": 1.1937557416110512e-06, "loss": 0.1101, "step": 18008 }, { "epoch": 2.5429257271957075, "grad_norm": 4.028409640785629, "learning_rate": 1.1930335353167965e-06, "loss": 0.185, "step": 18009 }, { "epoch": 2.5430669302456934, "grad_norm": 2.951010363710715, "learning_rate": 1.1923115336926394e-06, "loss": 0.1421, "step": 18010 }, { "epoch": 2.5432081332956793, "grad_norm": 3.4403129332878293, "learning_rate": 1.1915897367553564e-06, "loss": 0.1095, "step": 18011 }, { "epoch": 2.543349336345665, "grad_norm": 2.944385899245262, "learning_rate": 1.1908681445217263e-06, "loss": 0.1138, "step": 18012 }, { "epoch": 2.543490539395651, "grad_norm": 3.98474718398513, "learning_rate": 1.1901467570085156e-06, "loss": 0.1889, "step": 18013 }, { "epoch": 2.543631742445637, "grad_norm": 3.1336577949796247, "learning_rate": 1.189425574232491e-06, "loss": 0.1448, "step": 18014 }, { "epoch": 2.543772945495623, "grad_norm": 2.770974564885659, "learning_rate": 1.1887045962104105e-06, "loss": 0.1266, "step": 18015 }, { "epoch": 2.5439141485456087, "grad_norm": 2.612356569656299, "learning_rate": 1.1879838229590269e-06, "loss": 0.1161, "step": 18016 }, { "epoch": 2.5440553515955946, "grad_norm": 3.075850860487638, "learning_rate": 1.1872632544950958e-06, "loss": 0.1369, "step": 18017 }, { "epoch": 2.5441965546455805, "grad_norm": 3.5621999151482253, "learning_rate": 1.1865428908353606e-06, "loss": 0.1515, "step": 18018 }, { "epoch": 2.5443377576955664, "grad_norm": 3.2412328872226506, "learning_rate": 1.1858227319965621e-06, "loss": 0.1478, "step": 18019 }, { "epoch": 2.5444789607455522, "grad_norm": 3.4444295464598937, "learning_rate": 1.1851027779954373e-06, "loss": 0.1803, "step": 18020 }, { "epoch": 2.544620163795538, "grad_norm": 3.1800538468958575, "learning_rate": 1.1843830288487167e-06, "loss": 0.1393, "step": 18021 }, { "epoch": 2.544761366845524, "grad_norm": 3.0040951503758935, "learning_rate": 1.1836634845731288e-06, "loss": 0.1308, "step": 18022 }, { "epoch": 2.54490256989551, "grad_norm": 2.816886911754463, "learning_rate": 1.1829441451853919e-06, "loss": 0.1236, "step": 18023 }, { "epoch": 2.5450437729454958, "grad_norm": 3.302845677279997, "learning_rate": 1.1822250107022271e-06, "loss": 0.142, "step": 18024 }, { "epoch": 2.5451849759954817, "grad_norm": 2.9720327913561806, "learning_rate": 1.1815060811403434e-06, "loss": 0.1201, "step": 18025 }, { "epoch": 2.5453261790454675, "grad_norm": 3.8874735885430507, "learning_rate": 1.1807873565164507e-06, "loss": 0.1123, "step": 18026 }, { "epoch": 2.5454673820954534, "grad_norm": 3.475900603093404, "learning_rate": 1.1800688368472512e-06, "loss": 0.1745, "step": 18027 }, { "epoch": 2.5456085851454393, "grad_norm": 2.9107744500302757, "learning_rate": 1.1793505221494405e-06, "loss": 0.1559, "step": 18028 }, { "epoch": 2.545749788195425, "grad_norm": 3.0786138703302313, "learning_rate": 1.1786324124397165e-06, "loss": 0.1468, "step": 18029 }, { "epoch": 2.545890991245411, "grad_norm": 3.731138841321838, "learning_rate": 1.1779145077347653e-06, "loss": 0.1782, "step": 18030 }, { "epoch": 2.546032194295397, "grad_norm": 2.830469795015861, "learning_rate": 1.177196808051274e-06, "loss": 0.1555, "step": 18031 }, { "epoch": 2.546173397345383, "grad_norm": 3.6894928452401388, "learning_rate": 1.176479313405916e-06, "loss": 0.1474, "step": 18032 }, { "epoch": 2.5463146003953687, "grad_norm": 3.390879953146881, "learning_rate": 1.1757620238153656e-06, "loss": 0.17, "step": 18033 }, { "epoch": 2.5464558034453546, "grad_norm": 2.43978710311581, "learning_rate": 1.175044939296297e-06, "loss": 0.108, "step": 18034 }, { "epoch": 2.5465970064953405, "grad_norm": 3.497768875509265, "learning_rate": 1.174328059865374e-06, "loss": 0.1674, "step": 18035 }, { "epoch": 2.5467382095453264, "grad_norm": 5.042378530346302, "learning_rate": 1.173611385539254e-06, "loss": 0.2188, "step": 18036 }, { "epoch": 2.5468794125953123, "grad_norm": 2.9308614221893112, "learning_rate": 1.1728949163345937e-06, "loss": 0.1526, "step": 18037 }, { "epoch": 2.547020615645298, "grad_norm": 4.380124410677033, "learning_rate": 1.1721786522680445e-06, "loss": 0.1817, "step": 18038 }, { "epoch": 2.547161818695284, "grad_norm": 3.980288169557924, "learning_rate": 1.1714625933562507e-06, "loss": 0.1795, "step": 18039 }, { "epoch": 2.54730302174527, "grad_norm": 3.8329245587214715, "learning_rate": 1.1707467396158524e-06, "loss": 0.1841, "step": 18040 }, { "epoch": 2.547444224795256, "grad_norm": 2.9347048692641127, "learning_rate": 1.170031091063487e-06, "loss": 0.1176, "step": 18041 }, { "epoch": 2.5475854278452417, "grad_norm": 3.4046373850704796, "learning_rate": 1.1693156477157863e-06, "loss": 0.137, "step": 18042 }, { "epoch": 2.5477266308952276, "grad_norm": 2.483233751326855, "learning_rate": 1.1686004095893766e-06, "loss": 0.1025, "step": 18043 }, { "epoch": 2.5478678339452134, "grad_norm": 2.9655940304046453, "learning_rate": 1.167885376700879e-06, "loss": 0.1331, "step": 18044 }, { "epoch": 2.5480090369951993, "grad_norm": 3.7713804727746787, "learning_rate": 1.1671705490669082e-06, "loss": 0.1365, "step": 18045 }, { "epoch": 2.548150240045185, "grad_norm": 2.7548235605644606, "learning_rate": 1.1664559267040821e-06, "loss": 0.1377, "step": 18046 }, { "epoch": 2.548291443095171, "grad_norm": 3.8604996499237973, "learning_rate": 1.1657415096290058e-06, "loss": 0.1694, "step": 18047 }, { "epoch": 2.548432646145157, "grad_norm": 3.027151331129241, "learning_rate": 1.1650272978582823e-06, "loss": 0.1127, "step": 18048 }, { "epoch": 2.548573849195143, "grad_norm": 3.106206959979416, "learning_rate": 1.1643132914085075e-06, "loss": 0.1136, "step": 18049 }, { "epoch": 2.5487150522451287, "grad_norm": 4.167226492324366, "learning_rate": 1.1635994902962767e-06, "loss": 0.167, "step": 18050 }, { "epoch": 2.5488562552951146, "grad_norm": 2.98155320226957, "learning_rate": 1.1628858945381738e-06, "loss": 0.1364, "step": 18051 }, { "epoch": 2.5489974583451005, "grad_norm": 3.3069314638588017, "learning_rate": 1.1621725041507904e-06, "loss": 0.1563, "step": 18052 }, { "epoch": 2.5491386613950864, "grad_norm": 3.0351683589254264, "learning_rate": 1.1614593191506996e-06, "loss": 0.1395, "step": 18053 }, { "epoch": 2.5492798644450723, "grad_norm": 3.8310010464328776, "learning_rate": 1.1607463395544782e-06, "loss": 0.1648, "step": 18054 }, { "epoch": 2.549421067495058, "grad_norm": 2.7225886091780804, "learning_rate": 1.1600335653786932e-06, "loss": 0.1329, "step": 18055 }, { "epoch": 2.549562270545044, "grad_norm": 2.461510485585992, "learning_rate": 1.159320996639911e-06, "loss": 0.0946, "step": 18056 }, { "epoch": 2.54970347359503, "grad_norm": 3.35022717074725, "learning_rate": 1.158608633354692e-06, "loss": 0.1471, "step": 18057 }, { "epoch": 2.549844676645016, "grad_norm": 3.6888856154658907, "learning_rate": 1.1578964755395883e-06, "loss": 0.138, "step": 18058 }, { "epoch": 2.5499858796950017, "grad_norm": 2.8291703678835147, "learning_rate": 1.1571845232111534e-06, "loss": 0.1137, "step": 18059 }, { "epoch": 2.5501270827449876, "grad_norm": 3.0398151577173684, "learning_rate": 1.1564727763859306e-06, "loss": 0.1108, "step": 18060 }, { "epoch": 2.5502682857949734, "grad_norm": 2.5235305031587303, "learning_rate": 1.1557612350804615e-06, "loss": 0.1092, "step": 18061 }, { "epoch": 2.5504094888449593, "grad_norm": 2.5665500716176926, "learning_rate": 1.1550498993112812e-06, "loss": 0.1363, "step": 18062 }, { "epoch": 2.5505506918949448, "grad_norm": 3.1220160967744626, "learning_rate": 1.1543387690949192e-06, "loss": 0.1498, "step": 18063 }, { "epoch": 2.5506918949449306, "grad_norm": 3.921495450402653, "learning_rate": 1.1536278444479066e-06, "loss": 0.1411, "step": 18064 }, { "epoch": 2.5508330979949165, "grad_norm": 3.1476092801392084, "learning_rate": 1.1529171253867643e-06, "loss": 0.1689, "step": 18065 }, { "epoch": 2.5509743010449024, "grad_norm": 3.6333385522977135, "learning_rate": 1.1522066119280062e-06, "loss": 0.1526, "step": 18066 }, { "epoch": 2.5511155040948883, "grad_norm": 2.5591838453408333, "learning_rate": 1.1514963040881444e-06, "loss": 0.1155, "step": 18067 }, { "epoch": 2.551256707144874, "grad_norm": 2.4611500114420806, "learning_rate": 1.1507862018836846e-06, "loss": 0.1122, "step": 18068 }, { "epoch": 2.55139791019486, "grad_norm": 3.2327999350536327, "learning_rate": 1.1500763053311347e-06, "loss": 0.1427, "step": 18069 }, { "epoch": 2.551539113244846, "grad_norm": 3.4988095596876376, "learning_rate": 1.1493666144469894e-06, "loss": 0.1486, "step": 18070 }, { "epoch": 2.551680316294832, "grad_norm": 3.029366707035611, "learning_rate": 1.1486571292477412e-06, "loss": 0.1131, "step": 18071 }, { "epoch": 2.5518215193448177, "grad_norm": 3.9587800218189613, "learning_rate": 1.1479478497498796e-06, "loss": 0.1634, "step": 18072 }, { "epoch": 2.5519627223948036, "grad_norm": 2.838950276117492, "learning_rate": 1.1472387759698855e-06, "loss": 0.1408, "step": 18073 }, { "epoch": 2.5521039254447895, "grad_norm": 2.832469843744986, "learning_rate": 1.146529907924241e-06, "loss": 0.1159, "step": 18074 }, { "epoch": 2.5522451284947754, "grad_norm": 3.105750667829268, "learning_rate": 1.145821245629416e-06, "loss": 0.1525, "step": 18075 }, { "epoch": 2.5523863315447612, "grad_norm": 3.782061526304485, "learning_rate": 1.1451127891018832e-06, "loss": 0.173, "step": 18076 }, { "epoch": 2.552527534594747, "grad_norm": 3.2271584840012326, "learning_rate": 1.1444045383581037e-06, "loss": 0.1433, "step": 18077 }, { "epoch": 2.552668737644733, "grad_norm": 2.835457888887627, "learning_rate": 1.143696493414539e-06, "loss": 0.1372, "step": 18078 }, { "epoch": 2.552809940694719, "grad_norm": 2.6953410528419317, "learning_rate": 1.1429886542876423e-06, "loss": 0.1274, "step": 18079 }, { "epoch": 2.5529511437447048, "grad_norm": 3.702785131878803, "learning_rate": 1.1422810209938627e-06, "loss": 0.1914, "step": 18080 }, { "epoch": 2.5530923467946907, "grad_norm": 3.6474775356559195, "learning_rate": 1.1415735935496497e-06, "loss": 0.179, "step": 18081 }, { "epoch": 2.5532335498446765, "grad_norm": 2.8929043996782804, "learning_rate": 1.1408663719714418e-06, "loss": 0.1279, "step": 18082 }, { "epoch": 2.5533747528946624, "grad_norm": 3.3660289851576866, "learning_rate": 1.1401593562756718e-06, "loss": 0.1271, "step": 18083 }, { "epoch": 2.5535159559446483, "grad_norm": 4.157580854387625, "learning_rate": 1.1394525464787708e-06, "loss": 0.2027, "step": 18084 }, { "epoch": 2.553657158994634, "grad_norm": 3.327971017984178, "learning_rate": 1.1387459425971659e-06, "loss": 0.1506, "step": 18085 }, { "epoch": 2.55379836204462, "grad_norm": 3.012474743731629, "learning_rate": 1.138039544647279e-06, "loss": 0.1481, "step": 18086 }, { "epoch": 2.553939565094606, "grad_norm": 3.419300261358047, "learning_rate": 1.1373333526455265e-06, "loss": 0.19, "step": 18087 }, { "epoch": 2.554080768144592, "grad_norm": 3.144514108389352, "learning_rate": 1.1366273666083194e-06, "loss": 0.1317, "step": 18088 }, { "epoch": 2.5542219711945777, "grad_norm": 2.859776146905121, "learning_rate": 1.1359215865520645e-06, "loss": 0.1022, "step": 18089 }, { "epoch": 2.5543631742445636, "grad_norm": 2.6622662921025553, "learning_rate": 1.1352160124931644e-06, "loss": 0.1109, "step": 18090 }, { "epoch": 2.5545043772945495, "grad_norm": 3.0801916851106528, "learning_rate": 1.1345106444480148e-06, "loss": 0.1166, "step": 18091 }, { "epoch": 2.5546455803445354, "grad_norm": 3.082024860517313, "learning_rate": 1.1338054824330092e-06, "loss": 0.1076, "step": 18092 }, { "epoch": 2.5547867833945213, "grad_norm": 2.465112602366522, "learning_rate": 1.1331005264645355e-06, "loss": 0.1151, "step": 18093 }, { "epoch": 2.554927986444507, "grad_norm": 3.08804543734582, "learning_rate": 1.1323957765589766e-06, "loss": 0.1236, "step": 18094 }, { "epoch": 2.555069189494493, "grad_norm": 3.515058287089412, "learning_rate": 1.13169123273271e-06, "loss": 0.1632, "step": 18095 }, { "epoch": 2.555210392544479, "grad_norm": 3.10288954747099, "learning_rate": 1.1309868950021085e-06, "loss": 0.1463, "step": 18096 }, { "epoch": 2.555351595594465, "grad_norm": 3.040650413140555, "learning_rate": 1.13028276338354e-06, "loss": 0.1179, "step": 18097 }, { "epoch": 2.5554927986444507, "grad_norm": 3.1141678092501315, "learning_rate": 1.1295788378933713e-06, "loss": 0.156, "step": 18098 }, { "epoch": 2.5556340016944366, "grad_norm": 2.487107612767889, "learning_rate": 1.1288751185479618e-06, "loss": 0.1169, "step": 18099 }, { "epoch": 2.5557752047444224, "grad_norm": 4.290609793135492, "learning_rate": 1.1281716053636616e-06, "loss": 0.1995, "step": 18100 }, { "epoch": 2.5559164077944083, "grad_norm": 3.8539310608864423, "learning_rate": 1.127468298356822e-06, "loss": 0.1461, "step": 18101 }, { "epoch": 2.556057610844394, "grad_norm": 3.48803233667669, "learning_rate": 1.1267651975437844e-06, "loss": 0.1318, "step": 18102 }, { "epoch": 2.55619881389438, "grad_norm": 3.2696010754786697, "learning_rate": 1.1260623029408945e-06, "loss": 0.1471, "step": 18103 }, { "epoch": 2.556340016944366, "grad_norm": 3.660739877020991, "learning_rate": 1.125359614564483e-06, "loss": 0.139, "step": 18104 }, { "epoch": 2.556481219994352, "grad_norm": 3.8033224258300145, "learning_rate": 1.124657132430883e-06, "loss": 0.1855, "step": 18105 }, { "epoch": 2.5566224230443377, "grad_norm": 3.5532140615639856, "learning_rate": 1.1239548565564173e-06, "loss": 0.1538, "step": 18106 }, { "epoch": 2.5567636260943236, "grad_norm": 3.712564770397251, "learning_rate": 1.1232527869574083e-06, "loss": 0.1714, "step": 18107 }, { "epoch": 2.5569048291443095, "grad_norm": 3.290367390283262, "learning_rate": 1.1225509236501698e-06, "loss": 0.1763, "step": 18108 }, { "epoch": 2.5570460321942954, "grad_norm": 3.460779686341147, "learning_rate": 1.1218492666510151e-06, "loss": 0.1718, "step": 18109 }, { "epoch": 2.5571872352442813, "grad_norm": 2.689111240164661, "learning_rate": 1.121147815976248e-06, "loss": 0.1237, "step": 18110 }, { "epoch": 2.557328438294267, "grad_norm": 2.9138543998963446, "learning_rate": 1.120446571642172e-06, "loss": 0.1232, "step": 18111 }, { "epoch": 2.557469641344253, "grad_norm": 3.26815718229328, "learning_rate": 1.119745533665083e-06, "loss": 0.1777, "step": 18112 }, { "epoch": 2.557610844394239, "grad_norm": 3.2549554086435664, "learning_rate": 1.1190447020612726e-06, "loss": 0.1499, "step": 18113 }, { "epoch": 2.557752047444225, "grad_norm": 3.3978601491214597, "learning_rate": 1.1183440768470255e-06, "loss": 0.1326, "step": 18114 }, { "epoch": 2.5578932504942107, "grad_norm": 3.9562328731414356, "learning_rate": 1.1176436580386307e-06, "loss": 0.1592, "step": 18115 }, { "epoch": 2.5580344535441966, "grad_norm": 3.2973267990483777, "learning_rate": 1.1169434456523598e-06, "loss": 0.1126, "step": 18116 }, { "epoch": 2.5581756565941824, "grad_norm": 3.302925173769135, "learning_rate": 1.1162434397044863e-06, "loss": 0.1116, "step": 18117 }, { "epoch": 2.5583168596441683, "grad_norm": 3.389054406538706, "learning_rate": 1.1155436402112785e-06, "loss": 0.1554, "step": 18118 }, { "epoch": 2.558458062694154, "grad_norm": 2.73220642882601, "learning_rate": 1.1148440471889977e-06, "loss": 0.1529, "step": 18119 }, { "epoch": 2.55859926574414, "grad_norm": 4.060580075112043, "learning_rate": 1.1141446606539063e-06, "loss": 0.1983, "step": 18120 }, { "epoch": 2.558740468794126, "grad_norm": 3.0731511384582855, "learning_rate": 1.113445480622255e-06, "loss": 0.0988, "step": 18121 }, { "epoch": 2.558881671844112, "grad_norm": 3.7965665954326107, "learning_rate": 1.1127465071102938e-06, "loss": 0.2159, "step": 18122 }, { "epoch": 2.5590228748940977, "grad_norm": 3.315577842645405, "learning_rate": 1.1120477401342656e-06, "loss": 0.1648, "step": 18123 }, { "epoch": 2.5591640779440836, "grad_norm": 3.023036913243173, "learning_rate": 1.1113491797104093e-06, "loss": 0.1275, "step": 18124 }, { "epoch": 2.5593052809940695, "grad_norm": 3.3573236739514294, "learning_rate": 1.1106508258549587e-06, "loss": 0.1812, "step": 18125 }, { "epoch": 2.5594464840440554, "grad_norm": 3.2516434271076387, "learning_rate": 1.109952678584144e-06, "loss": 0.172, "step": 18126 }, { "epoch": 2.5595876870940413, "grad_norm": 3.4198705127120337, "learning_rate": 1.1092547379141905e-06, "loss": 0.1384, "step": 18127 }, { "epoch": 2.559728890144027, "grad_norm": 2.902061681787575, "learning_rate": 1.108557003861317e-06, "loss": 0.153, "step": 18128 }, { "epoch": 2.559870093194013, "grad_norm": 2.930432337116502, "learning_rate": 1.1078594764417382e-06, "loss": 0.1614, "step": 18129 }, { "epoch": 2.560011296243999, "grad_norm": 3.552939862115775, "learning_rate": 1.107162155671665e-06, "loss": 0.171, "step": 18130 }, { "epoch": 2.560152499293985, "grad_norm": 3.586560767200432, "learning_rate": 1.1064650415673016e-06, "loss": 0.1433, "step": 18131 }, { "epoch": 2.5602937023439707, "grad_norm": 3.4286223640051556, "learning_rate": 1.1057681341448533e-06, "loss": 0.1497, "step": 18132 }, { "epoch": 2.5604349053939566, "grad_norm": 2.5372202011605998, "learning_rate": 1.1050714334205104e-06, "loss": 0.0962, "step": 18133 }, { "epoch": 2.5605761084439425, "grad_norm": 2.8853521794412216, "learning_rate": 1.1043749394104665e-06, "loss": 0.1395, "step": 18134 }, { "epoch": 2.5607173114939283, "grad_norm": 3.9096006147146736, "learning_rate": 1.1036786521309062e-06, "loss": 0.1633, "step": 18135 }, { "epoch": 2.560858514543914, "grad_norm": 3.054653504504503, "learning_rate": 1.1029825715980115e-06, "loss": 0.1364, "step": 18136 }, { "epoch": 2.5609997175939, "grad_norm": 3.838996444428988, "learning_rate": 1.10228669782796e-06, "loss": 0.1586, "step": 18137 }, { "epoch": 2.561140920643886, "grad_norm": 3.135102087103943, "learning_rate": 1.1015910308369239e-06, "loss": 0.1104, "step": 18138 }, { "epoch": 2.561282123693872, "grad_norm": 3.2179668168799265, "learning_rate": 1.1008955706410696e-06, "loss": 0.1598, "step": 18139 }, { "epoch": 2.5614233267438578, "grad_norm": 3.271310524133325, "learning_rate": 1.1002003172565579e-06, "loss": 0.1493, "step": 18140 }, { "epoch": 2.5615645297938436, "grad_norm": 3.5599349137492755, "learning_rate": 1.0995052706995502e-06, "loss": 0.2049, "step": 18141 }, { "epoch": 2.5617057328438295, "grad_norm": 3.2979125750237177, "learning_rate": 1.0988104309861913e-06, "loss": 0.1632, "step": 18142 }, { "epoch": 2.5618469358938154, "grad_norm": 3.633654913261387, "learning_rate": 1.0981157981326374e-06, "loss": 0.1673, "step": 18143 }, { "epoch": 2.5619881389438013, "grad_norm": 3.5391782869550203, "learning_rate": 1.0974213721550264e-06, "loss": 0.2031, "step": 18144 }, { "epoch": 2.562129341993787, "grad_norm": 3.0082461760370736, "learning_rate": 1.096727153069499e-06, "loss": 0.1312, "step": 18145 }, { "epoch": 2.562270545043773, "grad_norm": 3.612543799885159, "learning_rate": 1.0960331408921865e-06, "loss": 0.1493, "step": 18146 }, { "epoch": 2.562411748093759, "grad_norm": 3.8173948131604303, "learning_rate": 1.0953393356392195e-06, "loss": 0.173, "step": 18147 }, { "epoch": 2.562552951143745, "grad_norm": 2.885934360680415, "learning_rate": 1.0946457373267183e-06, "loss": 0.1306, "step": 18148 }, { "epoch": 2.5626941541937307, "grad_norm": 3.099886123455802, "learning_rate": 1.0939523459708078e-06, "loss": 0.1536, "step": 18149 }, { "epoch": 2.5628353572437166, "grad_norm": 3.067752137872937, "learning_rate": 1.0932591615875975e-06, "loss": 0.1259, "step": 18150 }, { "epoch": 2.5629765602937025, "grad_norm": 3.7695212656849257, "learning_rate": 1.0925661841931966e-06, "loss": 0.1488, "step": 18151 }, { "epoch": 2.5631177633436883, "grad_norm": 3.573925175723294, "learning_rate": 1.0918734138037113e-06, "loss": 0.1671, "step": 18152 }, { "epoch": 2.5632589663936742, "grad_norm": 3.4212301352630394, "learning_rate": 1.0911808504352405e-06, "loss": 0.1682, "step": 18153 }, { "epoch": 2.56340016944366, "grad_norm": 3.6352337091832427, "learning_rate": 1.0904884941038784e-06, "loss": 0.1788, "step": 18154 }, { "epoch": 2.563541372493646, "grad_norm": 3.778296393861006, "learning_rate": 1.0897963448257165e-06, "loss": 0.1815, "step": 18155 }, { "epoch": 2.563682575543632, "grad_norm": 2.50133110295319, "learning_rate": 1.0891044026168407e-06, "loss": 0.1167, "step": 18156 }, { "epoch": 2.5638237785936178, "grad_norm": 3.741980515475324, "learning_rate": 1.0884126674933293e-06, "loss": 0.1531, "step": 18157 }, { "epoch": 2.5639649816436036, "grad_norm": 3.5704914876166125, "learning_rate": 1.0877211394712617e-06, "loss": 0.1635, "step": 18158 }, { "epoch": 2.5641061846935895, "grad_norm": 3.240132676162482, "learning_rate": 1.0870298185667016e-06, "loss": 0.1503, "step": 18159 }, { "epoch": 2.5642473877435754, "grad_norm": 3.7617660447284846, "learning_rate": 1.086338704795722e-06, "loss": 0.1453, "step": 18160 }, { "epoch": 2.5643885907935613, "grad_norm": 3.7918414763313066, "learning_rate": 1.0856477981743808e-06, "loss": 0.2035, "step": 18161 }, { "epoch": 2.564529793843547, "grad_norm": 3.3431136720726222, "learning_rate": 1.0849570987187341e-06, "loss": 0.1392, "step": 18162 }, { "epoch": 2.564670996893533, "grad_norm": 3.220944775321384, "learning_rate": 1.0842666064448347e-06, "loss": 0.1839, "step": 18163 }, { "epoch": 2.564812199943519, "grad_norm": 2.891574019879635, "learning_rate": 1.08357632136873e-06, "loss": 0.1248, "step": 18164 }, { "epoch": 2.5649534029935044, "grad_norm": 2.4113915616055595, "learning_rate": 1.0828862435064603e-06, "loss": 0.1094, "step": 18165 }, { "epoch": 2.5650946060434903, "grad_norm": 3.3791706142500963, "learning_rate": 1.0821963728740626e-06, "loss": 0.1252, "step": 18166 }, { "epoch": 2.565235809093476, "grad_norm": 2.9113410783353912, "learning_rate": 1.0815067094875708e-06, "loss": 0.1431, "step": 18167 }, { "epoch": 2.565377012143462, "grad_norm": 2.897003436337826, "learning_rate": 1.0808172533630113e-06, "loss": 0.1152, "step": 18168 }, { "epoch": 2.565518215193448, "grad_norm": 3.042873770378299, "learning_rate": 1.0801280045164063e-06, "loss": 0.1659, "step": 18169 }, { "epoch": 2.565659418243434, "grad_norm": 4.321938523503987, "learning_rate": 1.0794389629637747e-06, "loss": 0.1832, "step": 18170 }, { "epoch": 2.5658006212934197, "grad_norm": 4.114005242173793, "learning_rate": 1.0787501287211277e-06, "loss": 0.2262, "step": 18171 }, { "epoch": 2.5659418243434056, "grad_norm": 3.213989771688702, "learning_rate": 1.078061501804476e-06, "loss": 0.1383, "step": 18172 }, { "epoch": 2.5660830273933914, "grad_norm": 3.0151628400762496, "learning_rate": 1.0773730822298223e-06, "loss": 0.1117, "step": 18173 }, { "epoch": 2.5662242304433773, "grad_norm": 3.103031178639752, "learning_rate": 1.076684870013165e-06, "loss": 0.1308, "step": 18174 }, { "epoch": 2.566365433493363, "grad_norm": 2.891695603615598, "learning_rate": 1.0759968651704987e-06, "loss": 0.1347, "step": 18175 }, { "epoch": 2.566506636543349, "grad_norm": 2.465769411976776, "learning_rate": 1.075309067717808e-06, "loss": 0.1207, "step": 18176 }, { "epoch": 2.566647839593335, "grad_norm": 2.7200952550984683, "learning_rate": 1.0746214776710827e-06, "loss": 0.1277, "step": 18177 }, { "epoch": 2.566789042643321, "grad_norm": 3.5850253879045555, "learning_rate": 1.0739340950462996e-06, "loss": 0.1427, "step": 18178 }, { "epoch": 2.5669302456933067, "grad_norm": 2.8567922025712362, "learning_rate": 1.073246919859432e-06, "loss": 0.1527, "step": 18179 }, { "epoch": 2.5670714487432926, "grad_norm": 3.78069613439659, "learning_rate": 1.0725599521264518e-06, "loss": 0.1617, "step": 18180 }, { "epoch": 2.5672126517932785, "grad_norm": 2.7094698462715585, "learning_rate": 1.071873191863323e-06, "loss": 0.1213, "step": 18181 }, { "epoch": 2.5673538548432644, "grad_norm": 2.301144886142465, "learning_rate": 1.071186639086005e-06, "loss": 0.1044, "step": 18182 }, { "epoch": 2.5674950578932503, "grad_norm": 2.87505874717895, "learning_rate": 1.0705002938104537e-06, "loss": 0.1384, "step": 18183 }, { "epoch": 2.567636260943236, "grad_norm": 3.17409717148107, "learning_rate": 1.0698141560526198e-06, "loss": 0.143, "step": 18184 }, { "epoch": 2.567777463993222, "grad_norm": 3.3648154801796095, "learning_rate": 1.0691282258284474e-06, "loss": 0.1661, "step": 18185 }, { "epoch": 2.567918667043208, "grad_norm": 3.1753453082822976, "learning_rate": 1.068442503153878e-06, "loss": 0.1486, "step": 18186 }, { "epoch": 2.568059870093194, "grad_norm": 2.932756331752433, "learning_rate": 1.0677569880448479e-06, "loss": 0.1173, "step": 18187 }, { "epoch": 2.5682010731431797, "grad_norm": 2.972902090876545, "learning_rate": 1.0670716805172865e-06, "loss": 0.1361, "step": 18188 }, { "epoch": 2.5683422761931656, "grad_norm": 3.6542442153204764, "learning_rate": 1.066386580587122e-06, "loss": 0.1798, "step": 18189 }, { "epoch": 2.5684834792431515, "grad_norm": 3.3376484908692223, "learning_rate": 1.0657016882702764e-06, "loss": 0.1385, "step": 18190 }, { "epoch": 2.5686246822931373, "grad_norm": 3.356504209906913, "learning_rate": 1.0650170035826646e-06, "loss": 0.1579, "step": 18191 }, { "epoch": 2.5687658853431232, "grad_norm": 3.5502112551226648, "learning_rate": 1.0643325265402016e-06, "loss": 0.1495, "step": 18192 }, { "epoch": 2.568907088393109, "grad_norm": 2.32659482971839, "learning_rate": 1.063648257158787e-06, "loss": 0.1217, "step": 18193 }, { "epoch": 2.569048291443095, "grad_norm": 2.561751731971891, "learning_rate": 1.062964195454329e-06, "loss": 0.136, "step": 18194 }, { "epoch": 2.569189494493081, "grad_norm": 3.660093123088902, "learning_rate": 1.0622803414427252e-06, "loss": 0.1608, "step": 18195 }, { "epoch": 2.5693306975430668, "grad_norm": 3.398086444076299, "learning_rate": 1.061596695139865e-06, "loss": 0.163, "step": 18196 }, { "epoch": 2.5694719005930526, "grad_norm": 2.635536701318488, "learning_rate": 1.0609132565616376e-06, "loss": 0.1373, "step": 18197 }, { "epoch": 2.5696131036430385, "grad_norm": 3.305819570136027, "learning_rate": 1.0602300257239262e-06, "loss": 0.1376, "step": 18198 }, { "epoch": 2.5697543066930244, "grad_norm": 3.808664689361435, "learning_rate": 1.059547002642608e-06, "loss": 0.157, "step": 18199 }, { "epoch": 2.5698955097430103, "grad_norm": 2.789163044721818, "learning_rate": 1.0588641873335558e-06, "loss": 0.1113, "step": 18200 }, { "epoch": 2.570036712792996, "grad_norm": 2.8943611642770968, "learning_rate": 1.0581815798126393e-06, "loss": 0.1257, "step": 18201 }, { "epoch": 2.570177915842982, "grad_norm": 4.243559460351196, "learning_rate": 1.0574991800957203e-06, "loss": 0.1814, "step": 18202 }, { "epoch": 2.570319118892968, "grad_norm": 3.3450133729402722, "learning_rate": 1.0568169881986589e-06, "loss": 0.1234, "step": 18203 }, { "epoch": 2.570460321942954, "grad_norm": 3.318325888590931, "learning_rate": 1.0561350041373086e-06, "loss": 0.1206, "step": 18204 }, { "epoch": 2.5706015249929397, "grad_norm": 3.009003797308653, "learning_rate": 1.0554532279275154e-06, "loss": 0.1551, "step": 18205 }, { "epoch": 2.5707427280429256, "grad_norm": 3.6622518859616418, "learning_rate": 1.0547716595851298e-06, "loss": 0.1947, "step": 18206 }, { "epoch": 2.5708839310929115, "grad_norm": 3.24804185062602, "learning_rate": 1.054090299125986e-06, "loss": 0.1591, "step": 18207 }, { "epoch": 2.5710251341428974, "grad_norm": 2.922792275753425, "learning_rate": 1.0534091465659212e-06, "loss": 0.1415, "step": 18208 }, { "epoch": 2.5711663371928832, "grad_norm": 3.6057591749240854, "learning_rate": 1.0527282019207663e-06, "loss": 0.1782, "step": 18209 }, { "epoch": 2.571307540242869, "grad_norm": 3.177133369797866, "learning_rate": 1.0520474652063395e-06, "loss": 0.114, "step": 18210 }, { "epoch": 2.571448743292855, "grad_norm": 3.1041901881829195, "learning_rate": 1.0513669364384682e-06, "loss": 0.1275, "step": 18211 }, { "epoch": 2.571589946342841, "grad_norm": 3.86945972660611, "learning_rate": 1.0506866156329632e-06, "loss": 0.172, "step": 18212 }, { "epoch": 2.5717311493928268, "grad_norm": 4.074950466853784, "learning_rate": 1.0500065028056372e-06, "loss": 0.1664, "step": 18213 }, { "epoch": 2.5718723524428126, "grad_norm": 3.5395755949801235, "learning_rate": 1.0493265979722944e-06, "loss": 0.1751, "step": 18214 }, { "epoch": 2.5720135554927985, "grad_norm": 2.6903462539930287, "learning_rate": 1.0486469011487366e-06, "loss": 0.1337, "step": 18215 }, { "epoch": 2.5721547585427844, "grad_norm": 3.284920940289745, "learning_rate": 1.0479674123507588e-06, "loss": 0.1849, "step": 18216 }, { "epoch": 2.5722959615927703, "grad_norm": 3.886629190673133, "learning_rate": 1.0472881315941518e-06, "loss": 0.1814, "step": 18217 }, { "epoch": 2.572437164642756, "grad_norm": 2.5827770557910807, "learning_rate": 1.046609058894703e-06, "loss": 0.125, "step": 18218 }, { "epoch": 2.572578367692742, "grad_norm": 3.6173817555605985, "learning_rate": 1.045930194268192e-06, "loss": 0.1546, "step": 18219 }, { "epoch": 2.572719570742728, "grad_norm": 3.0574163647817723, "learning_rate": 1.0452515377303974e-06, "loss": 0.1631, "step": 18220 }, { "epoch": 2.572860773792714, "grad_norm": 3.0764926182927557, "learning_rate": 1.0445730892970896e-06, "loss": 0.1415, "step": 18221 }, { "epoch": 2.5730019768426997, "grad_norm": 3.288885087861002, "learning_rate": 1.0438948489840327e-06, "loss": 0.1498, "step": 18222 }, { "epoch": 2.5731431798926856, "grad_norm": 4.700359879095835, "learning_rate": 1.0432168168069946e-06, "loss": 0.204, "step": 18223 }, { "epoch": 2.5732843829426715, "grad_norm": 3.322002456412449, "learning_rate": 1.0425389927817298e-06, "loss": 0.1193, "step": 18224 }, { "epoch": 2.5734255859926574, "grad_norm": 2.3577581401731997, "learning_rate": 1.0418613769239893e-06, "loss": 0.0907, "step": 18225 }, { "epoch": 2.5735667890426432, "grad_norm": 3.1867619218355734, "learning_rate": 1.0411839692495241e-06, "loss": 0.1581, "step": 18226 }, { "epoch": 2.573707992092629, "grad_norm": 3.6110460741866346, "learning_rate": 1.0405067697740711e-06, "loss": 0.1703, "step": 18227 }, { "epoch": 2.573849195142615, "grad_norm": 3.579059029724683, "learning_rate": 1.0398297785133727e-06, "loss": 0.1853, "step": 18228 }, { "epoch": 2.573990398192601, "grad_norm": 2.164622420654352, "learning_rate": 1.0391529954831603e-06, "loss": 0.098, "step": 18229 }, { "epoch": 2.5741316012425868, "grad_norm": 2.343851012616829, "learning_rate": 1.0384764206991638e-06, "loss": 0.1146, "step": 18230 }, { "epoch": 2.5742728042925727, "grad_norm": 3.488372132388196, "learning_rate": 1.0378000541771038e-06, "loss": 0.1765, "step": 18231 }, { "epoch": 2.5744140073425585, "grad_norm": 3.224470441164652, "learning_rate": 1.0371238959327001e-06, "loss": 0.1626, "step": 18232 }, { "epoch": 2.5745552103925444, "grad_norm": 3.5011964245965554, "learning_rate": 1.0364479459816668e-06, "loss": 0.1504, "step": 18233 }, { "epoch": 2.5746964134425303, "grad_norm": 3.025529715933069, "learning_rate": 1.0357722043397122e-06, "loss": 0.1489, "step": 18234 }, { "epoch": 2.574837616492516, "grad_norm": 4.153344796615868, "learning_rate": 1.0350966710225408e-06, "loss": 0.1798, "step": 18235 }, { "epoch": 2.574978819542502, "grad_norm": 3.2941692259375777, "learning_rate": 1.0344213460458496e-06, "loss": 0.1527, "step": 18236 }, { "epoch": 2.575120022592488, "grad_norm": 3.8903219656010717, "learning_rate": 1.0337462294253353e-06, "loss": 0.2063, "step": 18237 }, { "epoch": 2.575261225642474, "grad_norm": 3.5045042108634528, "learning_rate": 1.0330713211766864e-06, "loss": 0.1631, "step": 18238 }, { "epoch": 2.5754024286924597, "grad_norm": 2.8978463366662717, "learning_rate": 1.0323966213155856e-06, "loss": 0.1161, "step": 18239 }, { "epoch": 2.5755436317424456, "grad_norm": 2.598559774526446, "learning_rate": 1.0317221298577163e-06, "loss": 0.1372, "step": 18240 }, { "epoch": 2.5756848347924315, "grad_norm": 2.720845988033095, "learning_rate": 1.0310478468187512e-06, "loss": 0.1219, "step": 18241 }, { "epoch": 2.5758260378424174, "grad_norm": 2.7778949173073957, "learning_rate": 1.0303737722143614e-06, "loss": 0.1134, "step": 18242 }, { "epoch": 2.5759672408924033, "grad_norm": 3.478645413779483, "learning_rate": 1.0296999060602132e-06, "loss": 0.1278, "step": 18243 }, { "epoch": 2.576108443942389, "grad_norm": 3.1789726678680976, "learning_rate": 1.0290262483719637e-06, "loss": 0.1227, "step": 18244 }, { "epoch": 2.576249646992375, "grad_norm": 2.9640174092526355, "learning_rate": 1.0283527991652675e-06, "loss": 0.1083, "step": 18245 }, { "epoch": 2.576390850042361, "grad_norm": 3.3832616879377473, "learning_rate": 1.0276795584557796e-06, "loss": 0.1607, "step": 18246 }, { "epoch": 2.576532053092347, "grad_norm": 3.0153525818427673, "learning_rate": 1.0270065262591434e-06, "loss": 0.1556, "step": 18247 }, { "epoch": 2.5766732561423327, "grad_norm": 2.7718519159096564, "learning_rate": 1.0263337025910015e-06, "loss": 0.1059, "step": 18248 }, { "epoch": 2.5768144591923186, "grad_norm": 2.9121698167443584, "learning_rate": 1.025661087466988e-06, "loss": 0.1407, "step": 18249 }, { "epoch": 2.5769556622423044, "grad_norm": 2.832800270914015, "learning_rate": 1.0249886809027355e-06, "loss": 0.1208, "step": 18250 }, { "epoch": 2.5770968652922903, "grad_norm": 3.5259936700038046, "learning_rate": 1.0243164829138697e-06, "loss": 0.1525, "step": 18251 }, { "epoch": 2.577238068342276, "grad_norm": 3.841510813912918, "learning_rate": 1.0236444935160129e-06, "loss": 0.1357, "step": 18252 }, { "epoch": 2.577379271392262, "grad_norm": 3.1505332762522236, "learning_rate": 1.0229727127247812e-06, "loss": 0.1229, "step": 18253 }, { "epoch": 2.577520474442248, "grad_norm": 2.9391792209367043, "learning_rate": 1.022301140555787e-06, "loss": 0.1301, "step": 18254 }, { "epoch": 2.577661677492234, "grad_norm": 2.9446023011871647, "learning_rate": 1.0216297770246374e-06, "loss": 0.1401, "step": 18255 }, { "epoch": 2.5778028805422197, "grad_norm": 3.0687947747388544, "learning_rate": 1.0209586221469336e-06, "loss": 0.1451, "step": 18256 }, { "epoch": 2.5779440835922056, "grad_norm": 3.761039335236753, "learning_rate": 1.0202876759382729e-06, "loss": 0.187, "step": 18257 }, { "epoch": 2.5780852866421915, "grad_norm": 2.995899272726151, "learning_rate": 1.0196169384142495e-06, "loss": 0.1492, "step": 18258 }, { "epoch": 2.5782264896921774, "grad_norm": 3.565628582696319, "learning_rate": 1.01894640959045e-06, "loss": 0.1615, "step": 18259 }, { "epoch": 2.5783676927421633, "grad_norm": 3.128708148480872, "learning_rate": 1.0182760894824607e-06, "loss": 0.1655, "step": 18260 }, { "epoch": 2.578508895792149, "grad_norm": 3.1857521746964945, "learning_rate": 1.0176059781058523e-06, "loss": 0.1242, "step": 18261 }, { "epoch": 2.578650098842135, "grad_norm": 2.1670526820191407, "learning_rate": 1.0169360754762013e-06, "loss": 0.0957, "step": 18262 }, { "epoch": 2.578791301892121, "grad_norm": 3.825303779557316, "learning_rate": 1.016266381609078e-06, "loss": 0.1942, "step": 18263 }, { "epoch": 2.578932504942107, "grad_norm": 2.9613911884992525, "learning_rate": 1.0155968965200435e-06, "loss": 0.115, "step": 18264 }, { "epoch": 2.5790737079920927, "grad_norm": 4.1997669042623835, "learning_rate": 1.0149276202246571e-06, "loss": 0.1718, "step": 18265 }, { "epoch": 2.5792149110420786, "grad_norm": 4.549322040203865, "learning_rate": 1.014258552738473e-06, "loss": 0.215, "step": 18266 }, { "epoch": 2.5793561140920644, "grad_norm": 3.772133904265247, "learning_rate": 1.0135896940770396e-06, "loss": 0.2094, "step": 18267 }, { "epoch": 2.5794973171420503, "grad_norm": 2.8829291606143514, "learning_rate": 1.0129210442559011e-06, "loss": 0.1578, "step": 18268 }, { "epoch": 2.579638520192036, "grad_norm": 3.2274315491051784, "learning_rate": 1.0122526032905956e-06, "loss": 0.1465, "step": 18269 }, { "epoch": 2.579779723242022, "grad_norm": 2.8738337211240514, "learning_rate": 1.0115843711966577e-06, "loss": 0.1386, "step": 18270 }, { "epoch": 2.579920926292008, "grad_norm": 3.8014679463134144, "learning_rate": 1.0109163479896179e-06, "loss": 0.1743, "step": 18271 }, { "epoch": 2.580062129341994, "grad_norm": 3.2462179452088438, "learning_rate": 1.0102485336849998e-06, "loss": 0.1353, "step": 18272 }, { "epoch": 2.5802033323919797, "grad_norm": 3.1370534420246488, "learning_rate": 1.0095809282983238e-06, "loss": 0.1186, "step": 18273 }, { "epoch": 2.5803445354419656, "grad_norm": 2.915764062365713, "learning_rate": 1.0089135318451026e-06, "loss": 0.1314, "step": 18274 }, { "epoch": 2.5804857384919515, "grad_norm": 3.568857921978798, "learning_rate": 1.0082463443408496e-06, "loss": 0.1646, "step": 18275 }, { "epoch": 2.5806269415419374, "grad_norm": 3.49192486269061, "learning_rate": 1.00757936580107e-06, "loss": 0.1475, "step": 18276 }, { "epoch": 2.5807681445919233, "grad_norm": 3.464518473456693, "learning_rate": 1.0069125962412606e-06, "loss": 0.1562, "step": 18277 }, { "epoch": 2.580909347641909, "grad_norm": 3.6667727195765853, "learning_rate": 1.0062460356769189e-06, "loss": 0.1423, "step": 18278 }, { "epoch": 2.581050550691895, "grad_norm": 3.020328314051827, "learning_rate": 1.0055796841235331e-06, "loss": 0.1053, "step": 18279 }, { "epoch": 2.581191753741881, "grad_norm": 3.2576259707113926, "learning_rate": 1.0049135415965926e-06, "loss": 0.1407, "step": 18280 }, { "epoch": 2.581332956791867, "grad_norm": 4.378202415880571, "learning_rate": 1.004247608111577e-06, "loss": 0.1748, "step": 18281 }, { "epoch": 2.5814741598418527, "grad_norm": 3.405192986948481, "learning_rate": 1.0035818836839618e-06, "loss": 0.1405, "step": 18282 }, { "epoch": 2.5816153628918386, "grad_norm": 3.4529055194134926, "learning_rate": 1.0029163683292187e-06, "loss": 0.1437, "step": 18283 }, { "epoch": 2.5817565659418245, "grad_norm": 3.3258724199179834, "learning_rate": 1.0022510620628145e-06, "loss": 0.161, "step": 18284 }, { "epoch": 2.5818977689918103, "grad_norm": 3.020470991083388, "learning_rate": 1.001585964900208e-06, "loss": 0.1512, "step": 18285 }, { "epoch": 2.582038972041796, "grad_norm": 3.426111447057368, "learning_rate": 1.000921076856859e-06, "loss": 0.1457, "step": 18286 }, { "epoch": 2.582180175091782, "grad_norm": 3.769053291077551, "learning_rate": 1.000256397948217e-06, "loss": 0.195, "step": 18287 }, { "epoch": 2.582321378141768, "grad_norm": 3.095072890120823, "learning_rate": 9.995919281897304e-07, "loss": 0.1309, "step": 18288 }, { "epoch": 2.582462581191754, "grad_norm": 3.4529234124657546, "learning_rate": 9.989276675968395e-07, "loss": 0.1605, "step": 18289 }, { "epoch": 2.5826037842417398, "grad_norm": 2.77200983885765, "learning_rate": 9.982636161849824e-07, "loss": 0.1402, "step": 18290 }, { "epoch": 2.5827449872917256, "grad_norm": 2.834870186750404, "learning_rate": 9.975997739695898e-07, "loss": 0.1347, "step": 18291 }, { "epoch": 2.5828861903417115, "grad_norm": 2.965429847664991, "learning_rate": 9.969361409660927e-07, "loss": 0.1306, "step": 18292 }, { "epoch": 2.5830273933916974, "grad_norm": 3.906828374747792, "learning_rate": 9.962727171899134e-07, "loss": 0.1879, "step": 18293 }, { "epoch": 2.5831685964416833, "grad_norm": 2.8111316104734656, "learning_rate": 9.95609502656465e-07, "loss": 0.1111, "step": 18294 }, { "epoch": 2.583309799491669, "grad_norm": 3.5890050696312095, "learning_rate": 9.94946497381164e-07, "loss": 0.1424, "step": 18295 }, { "epoch": 2.583451002541655, "grad_norm": 3.1060321689955024, "learning_rate": 9.942837013794149e-07, "loss": 0.152, "step": 18296 }, { "epoch": 2.583592205591641, "grad_norm": 3.983963877187781, "learning_rate": 9.936211146666253e-07, "loss": 0.2171, "step": 18297 }, { "epoch": 2.583733408641627, "grad_norm": 3.7080011791762058, "learning_rate": 9.929587372581917e-07, "loss": 0.1538, "step": 18298 }, { "epoch": 2.5838746116916127, "grad_norm": 4.166461891936424, "learning_rate": 9.922965691695076e-07, "loss": 0.1513, "step": 18299 }, { "epoch": 2.5840158147415986, "grad_norm": 3.194478156320871, "learning_rate": 9.916346104159602e-07, "loss": 0.1541, "step": 18300 }, { "epoch": 2.5841570177915845, "grad_norm": 4.072256611435509, "learning_rate": 9.909728610129355e-07, "loss": 0.1462, "step": 18301 }, { "epoch": 2.5842982208415703, "grad_norm": 2.648147033425766, "learning_rate": 9.903113209758098e-07, "loss": 0.1164, "step": 18302 }, { "epoch": 2.5844394238915562, "grad_norm": 3.464268031029866, "learning_rate": 9.896499903199575e-07, "loss": 0.1441, "step": 18303 }, { "epoch": 2.584580626941542, "grad_norm": 3.7384543989484027, "learning_rate": 9.889888690607485e-07, "loss": 0.1064, "step": 18304 }, { "epoch": 2.584721829991528, "grad_norm": 2.1705098476602553, "learning_rate": 9.883279572135474e-07, "loss": 0.0871, "step": 18305 }, { "epoch": 2.584863033041514, "grad_norm": 3.079720609827303, "learning_rate": 9.876672547937117e-07, "loss": 0.1572, "step": 18306 }, { "epoch": 2.5850042360914998, "grad_norm": 4.368219523345479, "learning_rate": 9.870067618165968e-07, "loss": 0.1603, "step": 18307 }, { "epoch": 2.5851454391414856, "grad_norm": 3.663606302726044, "learning_rate": 9.86346478297552e-07, "loss": 0.1811, "step": 18308 }, { "epoch": 2.5852866421914715, "grad_norm": 3.6037107515814126, "learning_rate": 9.856864042519232e-07, "loss": 0.1336, "step": 18309 }, { "epoch": 2.5854278452414574, "grad_norm": 3.9741541888766507, "learning_rate": 9.85026539695051e-07, "loss": 0.2004, "step": 18310 }, { "epoch": 2.5855690482914433, "grad_norm": 3.4864291764706783, "learning_rate": 9.843668846422672e-07, "loss": 0.1628, "step": 18311 }, { "epoch": 2.585710251341429, "grad_norm": 2.6339455431015413, "learning_rate": 9.83707439108903e-07, "loss": 0.1222, "step": 18312 }, { "epoch": 2.585851454391415, "grad_norm": 2.5777712369622927, "learning_rate": 9.830482031102828e-07, "loss": 0.1164, "step": 18313 }, { "epoch": 2.585992657441401, "grad_norm": 3.7476792968413597, "learning_rate": 9.8238917666173e-07, "loss": 0.1586, "step": 18314 }, { "epoch": 2.586133860491387, "grad_norm": 3.0225690930985976, "learning_rate": 9.817303597785577e-07, "loss": 0.152, "step": 18315 }, { "epoch": 2.5862750635413727, "grad_norm": 3.6375325584193425, "learning_rate": 9.810717524760783e-07, "loss": 0.152, "step": 18316 }, { "epoch": 2.5864162665913586, "grad_norm": 3.335363696005989, "learning_rate": 9.804133547695948e-07, "loss": 0.1509, "step": 18317 }, { "epoch": 2.5865574696413445, "grad_norm": 2.5882458028251984, "learning_rate": 9.79755166674411e-07, "loss": 0.135, "step": 18318 }, { "epoch": 2.5866986726913304, "grad_norm": 3.3573792771875066, "learning_rate": 9.790971882058208e-07, "loss": 0.1412, "step": 18319 }, { "epoch": 2.5868398757413162, "grad_norm": 3.700453024047953, "learning_rate": 9.784394193791169e-07, "loss": 0.1911, "step": 18320 }, { "epoch": 2.586981078791302, "grad_norm": 3.143032632048506, "learning_rate": 9.777818602095846e-07, "loss": 0.1478, "step": 18321 }, { "epoch": 2.587122281841288, "grad_norm": 3.922324275590088, "learning_rate": 9.77124510712505e-07, "loss": 0.2086, "step": 18322 }, { "epoch": 2.587263484891274, "grad_norm": 3.5645506121854953, "learning_rate": 9.764673709031558e-07, "loss": 0.1516, "step": 18323 }, { "epoch": 2.5874046879412598, "grad_norm": 3.160020826657492, "learning_rate": 9.758104407968073e-07, "loss": 0.1682, "step": 18324 }, { "epoch": 2.5875458909912457, "grad_norm": 3.8383875208206293, "learning_rate": 9.751537204087258e-07, "loss": 0.1691, "step": 18325 }, { "epoch": 2.5876870940412315, "grad_norm": 3.667706949317372, "learning_rate": 9.74497209754175e-07, "loss": 0.199, "step": 18326 }, { "epoch": 2.5878282970912174, "grad_norm": 3.6154091843481218, "learning_rate": 9.738409088484135e-07, "loss": 0.1693, "step": 18327 }, { "epoch": 2.5879695001412033, "grad_norm": 3.1755985701535385, "learning_rate": 9.731848177066905e-07, "loss": 0.1455, "step": 18328 }, { "epoch": 2.588110703191189, "grad_norm": 3.65646232189554, "learning_rate": 9.725289363442526e-07, "loss": 0.2244, "step": 18329 }, { "epoch": 2.588251906241175, "grad_norm": 3.3581870565933016, "learning_rate": 9.718732647763419e-07, "loss": 0.1646, "step": 18330 }, { "epoch": 2.588393109291161, "grad_norm": 3.4476195504016522, "learning_rate": 9.712178030181996e-07, "loss": 0.1736, "step": 18331 }, { "epoch": 2.588534312341147, "grad_norm": 2.6530411755060617, "learning_rate": 9.705625510850557e-07, "loss": 0.1257, "step": 18332 }, { "epoch": 2.5886755153911327, "grad_norm": 4.5146833004295495, "learning_rate": 9.699075089921396e-07, "loss": 0.2019, "step": 18333 }, { "epoch": 2.5888167184411186, "grad_norm": 3.413491115029616, "learning_rate": 9.692526767546727e-07, "loss": 0.1512, "step": 18334 }, { "epoch": 2.588957921491104, "grad_norm": 2.8209104705375903, "learning_rate": 9.685980543878736e-07, "loss": 0.1549, "step": 18335 }, { "epoch": 2.58909912454109, "grad_norm": 2.9867407600733933, "learning_rate": 9.679436419069555e-07, "loss": 0.1688, "step": 18336 }, { "epoch": 2.589240327591076, "grad_norm": 4.3618084763326275, "learning_rate": 9.67289439327127e-07, "loss": 0.185, "step": 18337 }, { "epoch": 2.5893815306410617, "grad_norm": 2.871417702706307, "learning_rate": 9.666354466635908e-07, "loss": 0.1273, "step": 18338 }, { "epoch": 2.5895227336910476, "grad_norm": 3.7238023210463953, "learning_rate": 9.659816639315444e-07, "loss": 0.166, "step": 18339 }, { "epoch": 2.5896639367410335, "grad_norm": 3.6545060035030374, "learning_rate": 9.653280911461837e-07, "loss": 0.1468, "step": 18340 }, { "epoch": 2.5898051397910193, "grad_norm": 3.1487144212494758, "learning_rate": 9.646747283226965e-07, "loss": 0.152, "step": 18341 }, { "epoch": 2.5899463428410052, "grad_norm": 3.6240648546337226, "learning_rate": 9.640215754762638e-07, "loss": 0.1745, "step": 18342 }, { "epoch": 2.590087545890991, "grad_norm": 3.006838392716478, "learning_rate": 9.633686326220704e-07, "loss": 0.1342, "step": 18343 }, { "epoch": 2.590228748940977, "grad_norm": 3.483413851927523, "learning_rate": 9.627158997752883e-07, "loss": 0.1998, "step": 18344 }, { "epoch": 2.590369951990963, "grad_norm": 3.580504155648478, "learning_rate": 9.620633769510846e-07, "loss": 0.1955, "step": 18345 }, { "epoch": 2.5905111550409488, "grad_norm": 3.3970746722716214, "learning_rate": 9.614110641646235e-07, "loss": 0.1489, "step": 18346 }, { "epoch": 2.5906523580909346, "grad_norm": 3.4032976106450548, "learning_rate": 9.607589614310674e-07, "loss": 0.1888, "step": 18347 }, { "epoch": 2.5907935611409205, "grad_norm": 5.477658141773086, "learning_rate": 9.601070687655667e-07, "loss": 0.1731, "step": 18348 }, { "epoch": 2.5909347641909064, "grad_norm": 2.9018112518835864, "learning_rate": 9.594553861832755e-07, "loss": 0.1231, "step": 18349 }, { "epoch": 2.5910759672408923, "grad_norm": 3.1139767000686347, "learning_rate": 9.588039136993366e-07, "loss": 0.1157, "step": 18350 }, { "epoch": 2.591217170290878, "grad_norm": 2.675594700603319, "learning_rate": 9.58152651328891e-07, "loss": 0.1271, "step": 18351 }, { "epoch": 2.591358373340864, "grad_norm": 3.097668781536899, "learning_rate": 9.575015990870717e-07, "loss": 0.1367, "step": 18352 }, { "epoch": 2.59149957639085, "grad_norm": 2.7188537147662237, "learning_rate": 9.568507569890117e-07, "loss": 0.1094, "step": 18353 }, { "epoch": 2.591640779440836, "grad_norm": 3.390244849828202, "learning_rate": 9.562001250498333e-07, "loss": 0.1685, "step": 18354 }, { "epoch": 2.5917819824908217, "grad_norm": 3.1706209564964642, "learning_rate": 9.555497032846583e-07, "loss": 0.1357, "step": 18355 }, { "epoch": 2.5919231855408076, "grad_norm": 3.5925862259729833, "learning_rate": 9.54899491708603e-07, "loss": 0.1744, "step": 18356 }, { "epoch": 2.5920643885907935, "grad_norm": 3.7043083874845144, "learning_rate": 9.542494903367772e-07, "loss": 0.1687, "step": 18357 }, { "epoch": 2.5922055916407793, "grad_norm": 2.55443452553865, "learning_rate": 9.535996991842855e-07, "loss": 0.1286, "step": 18358 }, { "epoch": 2.5923467946907652, "grad_norm": 3.0555556555411436, "learning_rate": 9.529501182662315e-07, "loss": 0.1329, "step": 18359 }, { "epoch": 2.592487997740751, "grad_norm": 3.1935043627304243, "learning_rate": 9.523007475977064e-07, "loss": 0.1522, "step": 18360 }, { "epoch": 2.592629200790737, "grad_norm": 5.374659568591887, "learning_rate": 9.516515871938093e-07, "loss": 0.184, "step": 18361 }, { "epoch": 2.592770403840723, "grad_norm": 3.3664454117300293, "learning_rate": 9.51002637069619e-07, "loss": 0.1271, "step": 18362 }, { "epoch": 2.5929116068907088, "grad_norm": 3.63843438693323, "learning_rate": 9.503538972402204e-07, "loss": 0.1574, "step": 18363 }, { "epoch": 2.5930528099406946, "grad_norm": 2.9219045216203536, "learning_rate": 9.49705367720688e-07, "loss": 0.1528, "step": 18364 }, { "epoch": 2.5931940129906805, "grad_norm": 3.931551353838078, "learning_rate": 9.49057048526093e-07, "loss": 0.1708, "step": 18365 }, { "epoch": 2.5933352160406664, "grad_norm": 2.7029178333704063, "learning_rate": 9.484089396715057e-07, "loss": 0.1632, "step": 18366 }, { "epoch": 2.5934764190906523, "grad_norm": 2.8129230331645103, "learning_rate": 9.47761041171985e-07, "loss": 0.117, "step": 18367 }, { "epoch": 2.593617622140638, "grad_norm": 3.497498093699572, "learning_rate": 9.47113353042588e-07, "loss": 0.1396, "step": 18368 }, { "epoch": 2.593758825190624, "grad_norm": 3.324400076739421, "learning_rate": 9.464658752983669e-07, "loss": 0.1744, "step": 18369 }, { "epoch": 2.59390002824061, "grad_norm": 2.598166800816424, "learning_rate": 9.458186079543697e-07, "loss": 0.1119, "step": 18370 }, { "epoch": 2.594041231290596, "grad_norm": 3.710417554423941, "learning_rate": 9.451715510256377e-07, "loss": 0.1859, "step": 18371 }, { "epoch": 2.5941824343405817, "grad_norm": 2.9052018960840913, "learning_rate": 9.445247045272077e-07, "loss": 0.1293, "step": 18372 }, { "epoch": 2.5943236373905676, "grad_norm": 3.9169243696528397, "learning_rate": 9.438780684741134e-07, "loss": 0.1597, "step": 18373 }, { "epoch": 2.5944648404405535, "grad_norm": 3.8937127838526466, "learning_rate": 9.432316428813826e-07, "loss": 0.1495, "step": 18374 }, { "epoch": 2.5946060434905394, "grad_norm": 3.4781496635815263, "learning_rate": 9.425854277640356e-07, "loss": 0.1498, "step": 18375 }, { "epoch": 2.5947472465405252, "grad_norm": 3.200457339988378, "learning_rate": 9.419394231370926e-07, "loss": 0.1204, "step": 18376 }, { "epoch": 2.594888449590511, "grad_norm": 2.7953736557944997, "learning_rate": 9.412936290155627e-07, "loss": 0.1346, "step": 18377 }, { "epoch": 2.595029652640497, "grad_norm": 3.069516892070966, "learning_rate": 9.406480454144617e-07, "loss": 0.1333, "step": 18378 }, { "epoch": 2.595170855690483, "grad_norm": 3.3482820924392653, "learning_rate": 9.400026723487854e-07, "loss": 0.1442, "step": 18379 }, { "epoch": 2.5953120587404688, "grad_norm": 2.1447076899335977, "learning_rate": 9.393575098335339e-07, "loss": 0.0999, "step": 18380 }, { "epoch": 2.5954532617904547, "grad_norm": 3.580533487446578, "learning_rate": 9.387125578837008e-07, "loss": 0.1547, "step": 18381 }, { "epoch": 2.5955944648404405, "grad_norm": 2.8221158808776825, "learning_rate": 9.380678165142732e-07, "loss": 0.1272, "step": 18382 }, { "epoch": 2.5957356678904264, "grad_norm": 2.9199868201191124, "learning_rate": 9.374232857402376e-07, "loss": 0.1464, "step": 18383 }, { "epoch": 2.5958768709404123, "grad_norm": 2.957001221242242, "learning_rate": 9.367789655765703e-07, "loss": 0.1123, "step": 18384 }, { "epoch": 2.596018073990398, "grad_norm": 2.7994137762280547, "learning_rate": 9.361348560382467e-07, "loss": 0.1318, "step": 18385 }, { "epoch": 2.596159277040384, "grad_norm": 3.192407554789306, "learning_rate": 9.354909571402349e-07, "loss": 0.1746, "step": 18386 }, { "epoch": 2.59630048009037, "grad_norm": 4.189350189763951, "learning_rate": 9.348472688974974e-07, "loss": 0.1525, "step": 18387 }, { "epoch": 2.596441683140356, "grad_norm": 4.3105662811083345, "learning_rate": 9.342037913249957e-07, "loss": 0.2338, "step": 18388 }, { "epoch": 2.5965828861903417, "grad_norm": 2.9950639123945675, "learning_rate": 9.335605244376821e-07, "loss": 0.1409, "step": 18389 }, { "epoch": 2.5967240892403276, "grad_norm": 3.8586787307094856, "learning_rate": 9.32917468250506e-07, "loss": 0.1611, "step": 18390 }, { "epoch": 2.5968652922903135, "grad_norm": 3.387232757427889, "learning_rate": 9.32274622778413e-07, "loss": 0.1699, "step": 18391 }, { "epoch": 2.5970064953402994, "grad_norm": 3.5722260950533875, "learning_rate": 9.316319880363411e-07, "loss": 0.1591, "step": 18392 }, { "epoch": 2.5971476983902853, "grad_norm": 2.215633431206377, "learning_rate": 9.309895640392263e-07, "loss": 0.1071, "step": 18393 }, { "epoch": 2.597288901440271, "grad_norm": 2.850018744369343, "learning_rate": 9.303473508019944e-07, "loss": 0.1197, "step": 18394 }, { "epoch": 2.597430104490257, "grad_norm": 3.355483088207705, "learning_rate": 9.297053483395779e-07, "loss": 0.1467, "step": 18395 }, { "epoch": 2.597571307540243, "grad_norm": 3.0265153871154036, "learning_rate": 9.290635566668893e-07, "loss": 0.1511, "step": 18396 }, { "epoch": 2.597712510590229, "grad_norm": 4.126638371637896, "learning_rate": 9.284219757988466e-07, "loss": 0.1838, "step": 18397 }, { "epoch": 2.5978537136402147, "grad_norm": 2.482472404601543, "learning_rate": 9.277806057503592e-07, "loss": 0.088, "step": 18398 }, { "epoch": 2.5979949166902006, "grad_norm": 2.6968350259810805, "learning_rate": 9.271394465363314e-07, "loss": 0.1232, "step": 18399 }, { "epoch": 2.5981361197401864, "grad_norm": 3.217370274763721, "learning_rate": 9.264984981716663e-07, "loss": 0.1341, "step": 18400 }, { "epoch": 2.5982773227901723, "grad_norm": 2.8785520261275797, "learning_rate": 9.258577606712571e-07, "loss": 0.1583, "step": 18401 }, { "epoch": 2.598418525840158, "grad_norm": 2.5570120755396193, "learning_rate": 9.252172340499943e-07, "loss": 0.1019, "step": 18402 }, { "epoch": 2.598559728890144, "grad_norm": 3.654547606714179, "learning_rate": 9.245769183227649e-07, "loss": 0.1429, "step": 18403 }, { "epoch": 2.59870093194013, "grad_norm": 3.5924303894474408, "learning_rate": 9.23936813504448e-07, "loss": 0.159, "step": 18404 }, { "epoch": 2.598842134990116, "grad_norm": 4.086384323285141, "learning_rate": 9.232969196099195e-07, "loss": 0.1815, "step": 18405 }, { "epoch": 2.5989833380401017, "grad_norm": 2.91285595742083, "learning_rate": 9.22657236654051e-07, "loss": 0.1381, "step": 18406 }, { "epoch": 2.5991245410900876, "grad_norm": 2.697494060377228, "learning_rate": 9.220177646517081e-07, "loss": 0.1015, "step": 18407 }, { "epoch": 2.5992657441400735, "grad_norm": 3.7240491147614123, "learning_rate": 9.213785036177525e-07, "loss": 0.14, "step": 18408 }, { "epoch": 2.5994069471900594, "grad_norm": 3.49013499423758, "learning_rate": 9.207394535670389e-07, "loss": 0.1996, "step": 18409 }, { "epoch": 2.5995481502400453, "grad_norm": 3.337850397139976, "learning_rate": 9.201006145144198e-07, "loss": 0.1547, "step": 18410 }, { "epoch": 2.599689353290031, "grad_norm": 2.5572678529748174, "learning_rate": 9.194619864747389e-07, "loss": 0.115, "step": 18411 }, { "epoch": 2.599830556340017, "grad_norm": 3.161816526250413, "learning_rate": 9.188235694628445e-07, "loss": 0.1399, "step": 18412 }, { "epoch": 2.599971759390003, "grad_norm": 2.6140498499566376, "learning_rate": 9.181853634935656e-07, "loss": 0.1222, "step": 18413 }, { "epoch": 2.600112962439989, "grad_norm": 2.9292006238781387, "learning_rate": 9.175473685817371e-07, "loss": 0.105, "step": 18414 }, { "epoch": 2.6002541654899747, "grad_norm": 3.650667952536601, "learning_rate": 9.16909584742186e-07, "loss": 0.1952, "step": 18415 }, { "epoch": 2.6003953685399606, "grad_norm": 3.2151218005012043, "learning_rate": 9.162720119897306e-07, "loss": 0.1365, "step": 18416 }, { "epoch": 2.6005365715899464, "grad_norm": 3.2100474186268646, "learning_rate": 9.156346503391922e-07, "loss": 0.1717, "step": 18417 }, { "epoch": 2.6006777746399323, "grad_norm": 3.503381222526579, "learning_rate": 9.149974998053823e-07, "loss": 0.1816, "step": 18418 }, { "epoch": 2.600818977689918, "grad_norm": 3.8816447198989485, "learning_rate": 9.14360560403107e-07, "loss": 0.1829, "step": 18419 }, { "epoch": 2.600960180739904, "grad_norm": 2.639529852748756, "learning_rate": 9.137238321471675e-07, "loss": 0.1137, "step": 18420 }, { "epoch": 2.60110138378989, "grad_norm": 3.3457615390741275, "learning_rate": 9.130873150523656e-07, "loss": 0.1463, "step": 18421 }, { "epoch": 2.601242586839876, "grad_norm": 3.6427778278888985, "learning_rate": 9.124510091334849e-07, "loss": 0.171, "step": 18422 }, { "epoch": 2.6013837898898617, "grad_norm": 3.02820606358471, "learning_rate": 9.118149144053201e-07, "loss": 0.159, "step": 18423 }, { "epoch": 2.6015249929398476, "grad_norm": 2.5180200573879836, "learning_rate": 9.111790308826529e-07, "loss": 0.1064, "step": 18424 }, { "epoch": 2.6016661959898335, "grad_norm": 2.553887105706108, "learning_rate": 9.105433585802592e-07, "loss": 0.1378, "step": 18425 }, { "epoch": 2.6018073990398194, "grad_norm": 3.041914433670801, "learning_rate": 9.099078975129116e-07, "loss": 0.1161, "step": 18426 }, { "epoch": 2.6019486020898053, "grad_norm": 3.3652955356559935, "learning_rate": 9.092726476953794e-07, "loss": 0.1544, "step": 18427 }, { "epoch": 2.602089805139791, "grad_norm": 2.9595507964058823, "learning_rate": 9.086376091424243e-07, "loss": 0.1357, "step": 18428 }, { "epoch": 2.602231008189777, "grad_norm": 2.872505083197353, "learning_rate": 9.080027818688064e-07, "loss": 0.1355, "step": 18429 }, { "epoch": 2.602372211239763, "grad_norm": 2.757134933735135, "learning_rate": 9.073681658892775e-07, "loss": 0.1169, "step": 18430 }, { "epoch": 2.602513414289749, "grad_norm": 3.4392805224163094, "learning_rate": 9.067337612185845e-07, "loss": 0.1678, "step": 18431 }, { "epoch": 2.6026546173397347, "grad_norm": 3.3693641582759466, "learning_rate": 9.060995678714712e-07, "loss": 0.1605, "step": 18432 }, { "epoch": 2.6027958203897206, "grad_norm": 3.0164463998860733, "learning_rate": 9.054655858626782e-07, "loss": 0.1134, "step": 18433 }, { "epoch": 2.6029370234397065, "grad_norm": 2.687804952594379, "learning_rate": 9.048318152069346e-07, "loss": 0.0943, "step": 18434 }, { "epoch": 2.6030782264896923, "grad_norm": 3.270929603810049, "learning_rate": 9.041982559189732e-07, "loss": 0.1646, "step": 18435 }, { "epoch": 2.603219429539678, "grad_norm": 2.5018703304229777, "learning_rate": 9.035649080135167e-07, "loss": 0.1245, "step": 18436 }, { "epoch": 2.603360632589664, "grad_norm": 3.714554571802927, "learning_rate": 9.029317715052855e-07, "loss": 0.1377, "step": 18437 }, { "epoch": 2.6035018356396495, "grad_norm": 3.3281005944695075, "learning_rate": 9.022988464089888e-07, "loss": 0.1513, "step": 18438 }, { "epoch": 2.6036430386896354, "grad_norm": 2.8103669334882353, "learning_rate": 9.016661327393361e-07, "loss": 0.1499, "step": 18439 }, { "epoch": 2.6037842417396213, "grad_norm": 3.294043931982125, "learning_rate": 9.010336305110345e-07, "loss": 0.1475, "step": 18440 }, { "epoch": 2.603925444789607, "grad_norm": 2.4517513756523375, "learning_rate": 9.004013397387823e-07, "loss": 0.1038, "step": 18441 }, { "epoch": 2.604066647839593, "grad_norm": 3.281614221632581, "learning_rate": 8.997692604372743e-07, "loss": 0.1211, "step": 18442 }, { "epoch": 2.604207850889579, "grad_norm": 3.6716178466434872, "learning_rate": 8.991373926211966e-07, "loss": 0.1819, "step": 18443 }, { "epoch": 2.604349053939565, "grad_norm": 3.942594090236599, "learning_rate": 8.985057363052374e-07, "loss": 0.1926, "step": 18444 }, { "epoch": 2.6044902569895507, "grad_norm": 2.7435912187068427, "learning_rate": 8.978742915040706e-07, "loss": 0.1422, "step": 18445 }, { "epoch": 2.6046314600395366, "grad_norm": 2.991532503664667, "learning_rate": 8.972430582323788e-07, "loss": 0.1417, "step": 18446 }, { "epoch": 2.6047726630895225, "grad_norm": 3.6636987975316226, "learning_rate": 8.966120365048259e-07, "loss": 0.1654, "step": 18447 }, { "epoch": 2.6049138661395084, "grad_norm": 3.4496642637245287, "learning_rate": 8.959812263360779e-07, "loss": 0.1562, "step": 18448 }, { "epoch": 2.6050550691894943, "grad_norm": 2.2929680611100234, "learning_rate": 8.953506277407931e-07, "loss": 0.0942, "step": 18449 }, { "epoch": 2.60519627223948, "grad_norm": 4.398204266104556, "learning_rate": 8.947202407336286e-07, "loss": 0.1674, "step": 18450 }, { "epoch": 2.605337475289466, "grad_norm": 2.6966464633907243, "learning_rate": 8.940900653292317e-07, "loss": 0.1181, "step": 18451 }, { "epoch": 2.605478678339452, "grad_norm": 2.9546396210945374, "learning_rate": 8.934601015422506e-07, "loss": 0.1383, "step": 18452 }, { "epoch": 2.605619881389438, "grad_norm": 2.2994411965072628, "learning_rate": 8.928303493873247e-07, "loss": 0.1224, "step": 18453 }, { "epoch": 2.6057610844394237, "grad_norm": 2.850176117313274, "learning_rate": 8.92200808879089e-07, "loss": 0.1138, "step": 18454 }, { "epoch": 2.6059022874894096, "grad_norm": 3.231349598333421, "learning_rate": 8.915714800321729e-07, "loss": 0.1356, "step": 18455 }, { "epoch": 2.6060434905393954, "grad_norm": 3.372588333660419, "learning_rate": 8.909423628611991e-07, "loss": 0.1246, "step": 18456 }, { "epoch": 2.6061846935893813, "grad_norm": 2.874614374435151, "learning_rate": 8.903134573807925e-07, "loss": 0.1316, "step": 18457 }, { "epoch": 2.606325896639367, "grad_norm": 2.452533830928694, "learning_rate": 8.896847636055672e-07, "loss": 0.0896, "step": 18458 }, { "epoch": 2.606467099689353, "grad_norm": 3.403642681478167, "learning_rate": 8.890562815501336e-07, "loss": 0.1343, "step": 18459 }, { "epoch": 2.606608302739339, "grad_norm": 3.6140994230477914, "learning_rate": 8.884280112290977e-07, "loss": 0.1631, "step": 18460 }, { "epoch": 2.606749505789325, "grad_norm": 2.9175638237559274, "learning_rate": 8.877999526570591e-07, "loss": 0.1423, "step": 18461 }, { "epoch": 2.6068907088393107, "grad_norm": 2.731426571107013, "learning_rate": 8.871721058486149e-07, "loss": 0.1337, "step": 18462 }, { "epoch": 2.6070319118892966, "grad_norm": 4.163641610833574, "learning_rate": 8.865444708183558e-07, "loss": 0.1581, "step": 18463 }, { "epoch": 2.6071731149392825, "grad_norm": 3.0885503425784906, "learning_rate": 8.859170475808665e-07, "loss": 0.1526, "step": 18464 }, { "epoch": 2.6073143179892684, "grad_norm": 3.266011038043512, "learning_rate": 8.8528983615073e-07, "loss": 0.1672, "step": 18465 }, { "epoch": 2.6074555210392543, "grad_norm": 3.6001289235591405, "learning_rate": 8.846628365425203e-07, "loss": 0.1612, "step": 18466 }, { "epoch": 2.60759672408924, "grad_norm": 3.1661660858851404, "learning_rate": 8.8403604877081e-07, "loss": 0.1094, "step": 18467 }, { "epoch": 2.607737927139226, "grad_norm": 3.4193864168594734, "learning_rate": 8.83409472850163e-07, "loss": 0.1622, "step": 18468 }, { "epoch": 2.607879130189212, "grad_norm": 2.8247905413736336, "learning_rate": 8.827831087951455e-07, "loss": 0.1445, "step": 18469 }, { "epoch": 2.608020333239198, "grad_norm": 3.61594523666992, "learning_rate": 8.821569566203103e-07, "loss": 0.1504, "step": 18470 }, { "epoch": 2.6081615362891837, "grad_norm": 3.0434120301705687, "learning_rate": 8.815310163402113e-07, "loss": 0.1629, "step": 18471 }, { "epoch": 2.6083027393391696, "grad_norm": 4.468330861745514, "learning_rate": 8.809052879693925e-07, "loss": 0.1944, "step": 18472 }, { "epoch": 2.6084439423891554, "grad_norm": 3.0048683731910777, "learning_rate": 8.802797715223943e-07, "loss": 0.1247, "step": 18473 }, { "epoch": 2.6085851454391413, "grad_norm": 2.4312899958701895, "learning_rate": 8.796544670137574e-07, "loss": 0.1042, "step": 18474 }, { "epoch": 2.608726348489127, "grad_norm": 2.742654715858938, "learning_rate": 8.790293744580125e-07, "loss": 0.0976, "step": 18475 }, { "epoch": 2.608867551539113, "grad_norm": 2.7750074174560093, "learning_rate": 8.784044938696856e-07, "loss": 0.1319, "step": 18476 }, { "epoch": 2.609008754589099, "grad_norm": 4.157019796271099, "learning_rate": 8.777798252632986e-07, "loss": 0.2118, "step": 18477 }, { "epoch": 2.609149957639085, "grad_norm": 4.0704833123593245, "learning_rate": 8.771553686533684e-07, "loss": 0.1373, "step": 18478 }, { "epoch": 2.6092911606890707, "grad_norm": 3.7187210076875363, "learning_rate": 8.765311240544083e-07, "loss": 0.1779, "step": 18479 }, { "epoch": 2.6094323637390566, "grad_norm": 3.5393868696138036, "learning_rate": 8.759070914809253e-07, "loss": 0.155, "step": 18480 }, { "epoch": 2.6095735667890425, "grad_norm": 3.0891236363084604, "learning_rate": 8.752832709474202e-07, "loss": 0.1268, "step": 18481 }, { "epoch": 2.6097147698390284, "grad_norm": 2.985675989102364, "learning_rate": 8.746596624683922e-07, "loss": 0.1237, "step": 18482 }, { "epoch": 2.6098559728890143, "grad_norm": 2.6651186194635814, "learning_rate": 8.740362660583312e-07, "loss": 0.1076, "step": 18483 }, { "epoch": 2.609997175939, "grad_norm": 3.3805677008532147, "learning_rate": 8.734130817317277e-07, "loss": 0.1626, "step": 18484 }, { "epoch": 2.610138378988986, "grad_norm": 2.768556217940723, "learning_rate": 8.7279010950306e-07, "loss": 0.1144, "step": 18485 }, { "epoch": 2.610279582038972, "grad_norm": 2.6117266811163438, "learning_rate": 8.721673493868111e-07, "loss": 0.1351, "step": 18486 }, { "epoch": 2.610420785088958, "grad_norm": 2.9223396570606, "learning_rate": 8.715448013974493e-07, "loss": 0.146, "step": 18487 }, { "epoch": 2.6105619881389437, "grad_norm": 3.0047885739597238, "learning_rate": 8.709224655494475e-07, "loss": 0.1668, "step": 18488 }, { "epoch": 2.6107031911889296, "grad_norm": 2.7512304488414476, "learning_rate": 8.703003418572631e-07, "loss": 0.1156, "step": 18489 }, { "epoch": 2.6108443942389155, "grad_norm": 2.7040568245715004, "learning_rate": 8.696784303353534e-07, "loss": 0.1309, "step": 18490 }, { "epoch": 2.6109855972889013, "grad_norm": 2.7510720061071527, "learning_rate": 8.690567309981756e-07, "loss": 0.1526, "step": 18491 }, { "epoch": 2.611126800338887, "grad_norm": 2.8856062380922824, "learning_rate": 8.684352438601762e-07, "loss": 0.1285, "step": 18492 }, { "epoch": 2.611268003388873, "grad_norm": 3.8526380084047545, "learning_rate": 8.67813968935799e-07, "loss": 0.1907, "step": 18493 }, { "epoch": 2.611409206438859, "grad_norm": 3.1514153275588024, "learning_rate": 8.671929062394802e-07, "loss": 0.1194, "step": 18494 }, { "epoch": 2.611550409488845, "grad_norm": 3.084821146359261, "learning_rate": 8.66572055785655e-07, "loss": 0.1835, "step": 18495 }, { "epoch": 2.6116916125388308, "grad_norm": 3.035041502218832, "learning_rate": 8.659514175887495e-07, "loss": 0.1156, "step": 18496 }, { "epoch": 2.6118328155888166, "grad_norm": 3.064193633824566, "learning_rate": 8.653309916631891e-07, "loss": 0.1378, "step": 18497 }, { "epoch": 2.6119740186388025, "grad_norm": 3.510994964693351, "learning_rate": 8.647107780233921e-07, "loss": 0.1566, "step": 18498 }, { "epoch": 2.6121152216887884, "grad_norm": 2.6680705367330537, "learning_rate": 8.640907766837703e-07, "loss": 0.1223, "step": 18499 }, { "epoch": 2.6122564247387743, "grad_norm": 3.9284119326951177, "learning_rate": 8.634709876587344e-07, "loss": 0.1389, "step": 18500 }, { "epoch": 2.61239762778876, "grad_norm": 2.6334504647931065, "learning_rate": 8.628514109626863e-07, "loss": 0.1197, "step": 18501 }, { "epoch": 2.612538830838746, "grad_norm": 3.418581090427093, "learning_rate": 8.622320466100242e-07, "loss": 0.1404, "step": 18502 }, { "epoch": 2.612680033888732, "grad_norm": 2.889263219676667, "learning_rate": 8.616128946151436e-07, "loss": 0.09, "step": 18503 }, { "epoch": 2.612821236938718, "grad_norm": 3.1098580543392, "learning_rate": 8.60993954992434e-07, "loss": 0.1296, "step": 18504 }, { "epoch": 2.6129624399887037, "grad_norm": 3.0683724597727147, "learning_rate": 8.603752277562794e-07, "loss": 0.1337, "step": 18505 }, { "epoch": 2.6131036430386896, "grad_norm": 3.308085635623813, "learning_rate": 8.59756712921056e-07, "loss": 0.1456, "step": 18506 }, { "epoch": 2.6132448460886755, "grad_norm": 3.846191803838156, "learning_rate": 8.591384105011369e-07, "loss": 0.1898, "step": 18507 }, { "epoch": 2.6133860491386613, "grad_norm": 3.101796510268107, "learning_rate": 8.585203205108949e-07, "loss": 0.1804, "step": 18508 }, { "epoch": 2.6135272521886472, "grad_norm": 3.4606210252000413, "learning_rate": 8.579024429646932e-07, "loss": 0.1833, "step": 18509 }, { "epoch": 2.613668455238633, "grad_norm": 2.443871383666741, "learning_rate": 8.572847778768912e-07, "loss": 0.1079, "step": 18510 }, { "epoch": 2.613809658288619, "grad_norm": 2.8282772863272085, "learning_rate": 8.566673252618419e-07, "loss": 0.1471, "step": 18511 }, { "epoch": 2.613950861338605, "grad_norm": 3.306219646785648, "learning_rate": 8.560500851338949e-07, "loss": 0.1857, "step": 18512 }, { "epoch": 2.6140920643885908, "grad_norm": 2.627155728185175, "learning_rate": 8.554330575073954e-07, "loss": 0.1153, "step": 18513 }, { "epoch": 2.6142332674385766, "grad_norm": 3.4758714752145585, "learning_rate": 8.548162423966832e-07, "loss": 0.1555, "step": 18514 }, { "epoch": 2.6143744704885625, "grad_norm": 2.878832692572717, "learning_rate": 8.541996398160912e-07, "loss": 0.105, "step": 18515 }, { "epoch": 2.6145156735385484, "grad_norm": 3.5163895708867723, "learning_rate": 8.53583249779949e-07, "loss": 0.1413, "step": 18516 }, { "epoch": 2.6146568765885343, "grad_norm": 3.0580466331721032, "learning_rate": 8.529670723025829e-07, "loss": 0.1443, "step": 18517 }, { "epoch": 2.61479807963852, "grad_norm": 3.9006667615474058, "learning_rate": 8.523511073983127e-07, "loss": 0.2118, "step": 18518 }, { "epoch": 2.614939282688506, "grad_norm": 3.686763496210329, "learning_rate": 8.517353550814488e-07, "loss": 0.1582, "step": 18519 }, { "epoch": 2.615080485738492, "grad_norm": 3.4311146846229956, "learning_rate": 8.511198153663069e-07, "loss": 0.1838, "step": 18520 }, { "epoch": 2.615221688788478, "grad_norm": 2.8511727309338393, "learning_rate": 8.505044882671898e-07, "loss": 0.107, "step": 18521 }, { "epoch": 2.6153628918384637, "grad_norm": 2.989363149261534, "learning_rate": 8.498893737983982e-07, "loss": 0.1259, "step": 18522 }, { "epoch": 2.6155040948884496, "grad_norm": 4.0455964987841, "learning_rate": 8.49274471974224e-07, "loss": 0.1997, "step": 18523 }, { "epoch": 2.6156452979384355, "grad_norm": 3.0527422692495625, "learning_rate": 8.486597828089594e-07, "loss": 0.1854, "step": 18524 }, { "epoch": 2.6157865009884214, "grad_norm": 2.8269618024783503, "learning_rate": 8.48045306316887e-07, "loss": 0.1292, "step": 18525 }, { "epoch": 2.6159277040384072, "grad_norm": 2.8455273781896295, "learning_rate": 8.474310425122923e-07, "loss": 0.1308, "step": 18526 }, { "epoch": 2.616068907088393, "grad_norm": 3.1139280376138254, "learning_rate": 8.46816991409446e-07, "loss": 0.1568, "step": 18527 }, { "epoch": 2.616210110138379, "grad_norm": 3.9554847710306453, "learning_rate": 8.462031530226211e-07, "loss": 0.174, "step": 18528 }, { "epoch": 2.616351313188365, "grad_norm": 2.9066118526979503, "learning_rate": 8.455895273660808e-07, "loss": 0.135, "step": 18529 }, { "epoch": 2.6164925162383508, "grad_norm": 2.7462574457926148, "learning_rate": 8.449761144540869e-07, "loss": 0.0945, "step": 18530 }, { "epoch": 2.6166337192883367, "grad_norm": 2.5041610332760493, "learning_rate": 8.443629143008946e-07, "loss": 0.0991, "step": 18531 }, { "epoch": 2.6167749223383225, "grad_norm": 3.6957642545362277, "learning_rate": 8.437499269207538e-07, "loss": 0.1534, "step": 18532 }, { "epoch": 2.6169161253883084, "grad_norm": 3.591065504391603, "learning_rate": 8.431371523279108e-07, "loss": 0.1651, "step": 18533 }, { "epoch": 2.6170573284382943, "grad_norm": 2.6240083356241497, "learning_rate": 8.425245905366052e-07, "loss": 0.0831, "step": 18534 }, { "epoch": 2.61719853148828, "grad_norm": 3.1574352043571543, "learning_rate": 8.419122415610736e-07, "loss": 0.1569, "step": 18535 }, { "epoch": 2.617339734538266, "grad_norm": 3.472848527420189, "learning_rate": 8.413001054155467e-07, "loss": 0.1488, "step": 18536 }, { "epoch": 2.617480937588252, "grad_norm": 5.120196302408893, "learning_rate": 8.406881821142477e-07, "loss": 0.2207, "step": 18537 }, { "epoch": 2.617622140638238, "grad_norm": 3.358273129573705, "learning_rate": 8.400764716714016e-07, "loss": 0.1524, "step": 18538 }, { "epoch": 2.6177633436882237, "grad_norm": 2.6259012009595146, "learning_rate": 8.394649741012251e-07, "loss": 0.1352, "step": 18539 }, { "epoch": 2.6179045467382096, "grad_norm": 3.91703804465611, "learning_rate": 8.388536894179234e-07, "loss": 0.213, "step": 18540 }, { "epoch": 2.6180457497881955, "grad_norm": 3.206796063010781, "learning_rate": 8.382426176357062e-07, "loss": 0.1743, "step": 18541 }, { "epoch": 2.6181869528381814, "grad_norm": 2.8297321351329954, "learning_rate": 8.376317587687721e-07, "loss": 0.1285, "step": 18542 }, { "epoch": 2.6183281558881673, "grad_norm": 2.9457023988103366, "learning_rate": 8.37021112831321e-07, "loss": 0.1175, "step": 18543 }, { "epoch": 2.618469358938153, "grad_norm": 2.818773920446437, "learning_rate": 8.364106798375416e-07, "loss": 0.1333, "step": 18544 }, { "epoch": 2.618610561988139, "grad_norm": 3.163273668877417, "learning_rate": 8.358004598016212e-07, "loss": 0.1386, "step": 18545 }, { "epoch": 2.618751765038125, "grad_norm": 3.4624315569820365, "learning_rate": 8.351904527377397e-07, "loss": 0.1379, "step": 18546 }, { "epoch": 2.618892968088111, "grad_norm": 3.070519560619336, "learning_rate": 8.345806586600736e-07, "loss": 0.1228, "step": 18547 }, { "epoch": 2.6190341711380967, "grad_norm": 3.4559405679045265, "learning_rate": 8.339710775827958e-07, "loss": 0.1507, "step": 18548 }, { "epoch": 2.6191753741880826, "grad_norm": 3.7790037710013156, "learning_rate": 8.333617095200719e-07, "loss": 0.1707, "step": 18549 }, { "epoch": 2.6193165772380684, "grad_norm": 2.978509378372071, "learning_rate": 8.327525544860626e-07, "loss": 0.1397, "step": 18550 }, { "epoch": 2.6194577802880543, "grad_norm": 3.226123724845677, "learning_rate": 8.321436124949245e-07, "loss": 0.1403, "step": 18551 }, { "epoch": 2.61959898333804, "grad_norm": 3.0090719787076026, "learning_rate": 8.315348835608095e-07, "loss": 0.1068, "step": 18552 }, { "epoch": 2.619740186388026, "grad_norm": 3.482269879969656, "learning_rate": 8.309263676978651e-07, "loss": 0.1497, "step": 18553 }, { "epoch": 2.619881389438012, "grad_norm": 3.979639722052874, "learning_rate": 8.303180649202303e-07, "loss": 0.1735, "step": 18554 }, { "epoch": 2.620022592487998, "grad_norm": 4.156237673347555, "learning_rate": 8.297099752420446e-07, "loss": 0.21, "step": 18555 }, { "epoch": 2.6201637955379837, "grad_norm": 3.239971543062821, "learning_rate": 8.291020986774412e-07, "loss": 0.1419, "step": 18556 }, { "epoch": 2.6203049985879696, "grad_norm": 2.977123413015261, "learning_rate": 8.284944352405421e-07, "loss": 0.1382, "step": 18557 }, { "epoch": 2.6204462016379555, "grad_norm": 3.219361151237995, "learning_rate": 8.278869849454718e-07, "loss": 0.1359, "step": 18558 }, { "epoch": 2.6205874046879414, "grad_norm": 2.9722790252010176, "learning_rate": 8.272797478063444e-07, "loss": 0.0924, "step": 18559 }, { "epoch": 2.6207286077379273, "grad_norm": 2.6638093870443122, "learning_rate": 8.266727238372763e-07, "loss": 0.1027, "step": 18560 }, { "epoch": 2.620869810787913, "grad_norm": 3.578980251017331, "learning_rate": 8.260659130523729e-07, "loss": 0.2191, "step": 18561 }, { "epoch": 2.621011013837899, "grad_norm": 3.8795159145237905, "learning_rate": 8.254593154657353e-07, "loss": 0.1779, "step": 18562 }, { "epoch": 2.621152216887885, "grad_norm": 2.9033614989322056, "learning_rate": 8.248529310914622e-07, "loss": 0.1386, "step": 18563 }, { "epoch": 2.621293419937871, "grad_norm": 3.233262525916338, "learning_rate": 8.242467599436432e-07, "loss": 0.1535, "step": 18564 }, { "epoch": 2.6214346229878567, "grad_norm": 2.536726968121742, "learning_rate": 8.236408020363673e-07, "loss": 0.123, "step": 18565 }, { "epoch": 2.6215758260378426, "grad_norm": 2.758714654726664, "learning_rate": 8.230350573837165e-07, "loss": 0.1249, "step": 18566 }, { "epoch": 2.6217170290878284, "grad_norm": 2.7968602363188553, "learning_rate": 8.224295259997672e-07, "loss": 0.1389, "step": 18567 }, { "epoch": 2.6218582321378143, "grad_norm": 3.66987139141581, "learning_rate": 8.218242078985917e-07, "loss": 0.16, "step": 18568 }, { "epoch": 2.6219994351878, "grad_norm": 3.479434860609257, "learning_rate": 8.212191030942585e-07, "loss": 0.1843, "step": 18569 }, { "epoch": 2.622140638237786, "grad_norm": 3.276790297920599, "learning_rate": 8.206142116008298e-07, "loss": 0.141, "step": 18570 }, { "epoch": 2.622281841287772, "grad_norm": 2.9559581350602713, "learning_rate": 8.20009533432361e-07, "loss": 0.1186, "step": 18571 }, { "epoch": 2.622423044337758, "grad_norm": 3.431912455833615, "learning_rate": 8.194050686029065e-07, "loss": 0.1457, "step": 18572 }, { "epoch": 2.6225642473877437, "grad_norm": 3.381263864116239, "learning_rate": 8.18800817126516e-07, "loss": 0.1614, "step": 18573 }, { "epoch": 2.6227054504377296, "grad_norm": 3.615609036331113, "learning_rate": 8.181967790172274e-07, "loss": 0.1747, "step": 18574 }, { "epoch": 2.6228466534877155, "grad_norm": 3.0238749172850574, "learning_rate": 8.175929542890804e-07, "loss": 0.1325, "step": 18575 }, { "epoch": 2.6229878565377014, "grad_norm": 3.254156174912789, "learning_rate": 8.16989342956106e-07, "loss": 0.1847, "step": 18576 }, { "epoch": 2.6231290595876873, "grad_norm": 2.8594496496045627, "learning_rate": 8.163859450323352e-07, "loss": 0.0957, "step": 18577 }, { "epoch": 2.623270262637673, "grad_norm": 3.2082068641768267, "learning_rate": 8.157827605317892e-07, "loss": 0.1535, "step": 18578 }, { "epoch": 2.623411465687659, "grad_norm": 3.275585560640901, "learning_rate": 8.151797894684855e-07, "loss": 0.1821, "step": 18579 }, { "epoch": 2.623552668737645, "grad_norm": 2.4903124203877898, "learning_rate": 8.145770318564361e-07, "loss": 0.0868, "step": 18580 }, { "epoch": 2.623693871787631, "grad_norm": 3.212897743169331, "learning_rate": 8.139744877096501e-07, "loss": 0.153, "step": 18581 }, { "epoch": 2.6238350748376167, "grad_norm": 3.737551157070766, "learning_rate": 8.133721570421305e-07, "loss": 0.1569, "step": 18582 }, { "epoch": 2.6239762778876026, "grad_norm": 2.7148767719416367, "learning_rate": 8.127700398678728e-07, "loss": 0.1077, "step": 18583 }, { "epoch": 2.6241174809375885, "grad_norm": 4.078864081976734, "learning_rate": 8.121681362008737e-07, "loss": 0.2019, "step": 18584 }, { "epoch": 2.6242586839875743, "grad_norm": 3.601278645883798, "learning_rate": 8.115664460551176e-07, "loss": 0.1376, "step": 18585 }, { "epoch": 2.62439988703756, "grad_norm": 3.1338338331331284, "learning_rate": 8.109649694445898e-07, "loss": 0.148, "step": 18586 }, { "epoch": 2.624541090087546, "grad_norm": 3.570285763155862, "learning_rate": 8.103637063832681e-07, "loss": 0.1473, "step": 18587 }, { "epoch": 2.624682293137532, "grad_norm": 2.9199389773191715, "learning_rate": 8.097626568851224e-07, "loss": 0.1553, "step": 18588 }, { "epoch": 2.624823496187518, "grad_norm": 3.406069107733788, "learning_rate": 8.09161820964126e-07, "loss": 0.1713, "step": 18589 }, { "epoch": 2.6249646992375038, "grad_norm": 3.302911973130976, "learning_rate": 8.085611986342423e-07, "loss": 0.1586, "step": 18590 }, { "epoch": 2.6251059022874896, "grad_norm": 3.9790853663164643, "learning_rate": 8.079607899094233e-07, "loss": 0.2155, "step": 18591 }, { "epoch": 2.6252471053374755, "grad_norm": 3.411352472129503, "learning_rate": 8.073605948036267e-07, "loss": 0.1436, "step": 18592 }, { "epoch": 2.6253883083874614, "grad_norm": 3.2311082206538217, "learning_rate": 8.067606133307981e-07, "loss": 0.1286, "step": 18593 }, { "epoch": 2.6255295114374473, "grad_norm": 3.2993853586225845, "learning_rate": 8.061608455048841e-07, "loss": 0.1504, "step": 18594 }, { "epoch": 2.625670714487433, "grad_norm": 4.471999599331549, "learning_rate": 8.055612913398226e-07, "loss": 0.2412, "step": 18595 }, { "epoch": 2.625811917537419, "grad_norm": 2.870247639407375, "learning_rate": 8.049619508495454e-07, "loss": 0.1366, "step": 18596 }, { "epoch": 2.625953120587405, "grad_norm": 2.9492708095911855, "learning_rate": 8.043628240479806e-07, "loss": 0.1151, "step": 18597 }, { "epoch": 2.626094323637391, "grad_norm": 3.4433865576111966, "learning_rate": 8.037639109490524e-07, "loss": 0.1712, "step": 18598 }, { "epoch": 2.6262355266873767, "grad_norm": 2.582860090478288, "learning_rate": 8.03165211566681e-07, "loss": 0.1455, "step": 18599 }, { "epoch": 2.6263767297373626, "grad_norm": 3.309675267075451, "learning_rate": 8.025667259147773e-07, "loss": 0.1233, "step": 18600 }, { "epoch": 2.6265179327873485, "grad_norm": 2.8408444592545146, "learning_rate": 8.019684540072503e-07, "loss": 0.1283, "step": 18601 }, { "epoch": 2.6266591358373343, "grad_norm": 2.8907048939162685, "learning_rate": 8.013703958580044e-07, "loss": 0.1537, "step": 18602 }, { "epoch": 2.6268003388873202, "grad_norm": 2.6250776455997777, "learning_rate": 8.007725514809384e-07, "loss": 0.1294, "step": 18603 }, { "epoch": 2.626941541937306, "grad_norm": 3.4953752357274404, "learning_rate": 8.001749208899445e-07, "loss": 0.1194, "step": 18604 }, { "epoch": 2.627082744987292, "grad_norm": 2.7664118571509775, "learning_rate": 7.995775040989118e-07, "loss": 0.1312, "step": 18605 }, { "epoch": 2.627223948037278, "grad_norm": 2.6355052276358757, "learning_rate": 7.989803011217256e-07, "loss": 0.1415, "step": 18606 }, { "epoch": 2.6273651510872638, "grad_norm": 3.2279794991620046, "learning_rate": 7.98383311972265e-07, "loss": 0.1391, "step": 18607 }, { "epoch": 2.627506354137249, "grad_norm": 2.8667038568317884, "learning_rate": 7.977865366644011e-07, "loss": 0.133, "step": 18608 }, { "epoch": 2.627647557187235, "grad_norm": 3.377232113929168, "learning_rate": 7.97189975212005e-07, "loss": 0.145, "step": 18609 }, { "epoch": 2.627788760237221, "grad_norm": 3.5200425137646936, "learning_rate": 7.965936276289366e-07, "loss": 0.1945, "step": 18610 }, { "epoch": 2.627929963287207, "grad_norm": 4.189134215480756, "learning_rate": 7.959974939290593e-07, "loss": 0.1854, "step": 18611 }, { "epoch": 2.6280711663371927, "grad_norm": 2.4454489038190594, "learning_rate": 7.954015741262255e-07, "loss": 0.1119, "step": 18612 }, { "epoch": 2.6282123693871786, "grad_norm": 2.5690779391069065, "learning_rate": 7.94805868234284e-07, "loss": 0.1183, "step": 18613 }, { "epoch": 2.6283535724371645, "grad_norm": 2.886270456390757, "learning_rate": 7.942103762670783e-07, "loss": 0.1099, "step": 18614 }, { "epoch": 2.6284947754871504, "grad_norm": 3.2508145471254704, "learning_rate": 7.936150982384495e-07, "loss": 0.1522, "step": 18615 }, { "epoch": 2.6286359785371363, "grad_norm": 2.9614887135061867, "learning_rate": 7.930200341622274e-07, "loss": 0.1432, "step": 18616 }, { "epoch": 2.628777181587122, "grad_norm": 3.055715332925409, "learning_rate": 7.924251840522446e-07, "loss": 0.1055, "step": 18617 }, { "epoch": 2.628918384637108, "grad_norm": 4.035306041012885, "learning_rate": 7.918305479223243e-07, "loss": 0.1893, "step": 18618 }, { "epoch": 2.629059587687094, "grad_norm": 3.3567633726863755, "learning_rate": 7.912361257862844e-07, "loss": 0.1274, "step": 18619 }, { "epoch": 2.62920079073708, "grad_norm": 3.089284125387942, "learning_rate": 7.906419176579416e-07, "loss": 0.1354, "step": 18620 }, { "epoch": 2.6293419937870657, "grad_norm": 3.0205798873461003, "learning_rate": 7.900479235511016e-07, "loss": 0.1094, "step": 18621 }, { "epoch": 2.6294831968370516, "grad_norm": 2.6684725865561227, "learning_rate": 7.8945414347957e-07, "loss": 0.1281, "step": 18622 }, { "epoch": 2.6296243998870374, "grad_norm": 2.8130759633673987, "learning_rate": 7.888605774571478e-07, "loss": 0.115, "step": 18623 }, { "epoch": 2.6297656029370233, "grad_norm": 3.0461499952144053, "learning_rate": 7.882672254976298e-07, "loss": 0.1403, "step": 18624 }, { "epoch": 2.629906805987009, "grad_norm": 2.8182833831602196, "learning_rate": 7.876740876148015e-07, "loss": 0.1032, "step": 18625 }, { "epoch": 2.630048009036995, "grad_norm": 3.2974311084349046, "learning_rate": 7.870811638224485e-07, "loss": 0.1677, "step": 18626 }, { "epoch": 2.630189212086981, "grad_norm": 3.1527869981286143, "learning_rate": 7.864884541343499e-07, "loss": 0.1391, "step": 18627 }, { "epoch": 2.630330415136967, "grad_norm": 2.777433668904338, "learning_rate": 7.8589595856428e-07, "loss": 0.1555, "step": 18628 }, { "epoch": 2.6304716181869527, "grad_norm": 2.503295163868392, "learning_rate": 7.853036771260103e-07, "loss": 0.1255, "step": 18629 }, { "epoch": 2.6306128212369386, "grad_norm": 3.0160741661427473, "learning_rate": 7.847116098333029e-07, "loss": 0.1691, "step": 18630 }, { "epoch": 2.6307540242869245, "grad_norm": 3.04272543787079, "learning_rate": 7.841197566999182e-07, "loss": 0.1055, "step": 18631 }, { "epoch": 2.6308952273369104, "grad_norm": 3.184722222467459, "learning_rate": 7.835281177396126e-07, "loss": 0.1397, "step": 18632 }, { "epoch": 2.6310364303868963, "grad_norm": 3.6975111967257304, "learning_rate": 7.829366929661298e-07, "loss": 0.1703, "step": 18633 }, { "epoch": 2.631177633436882, "grad_norm": 3.526690466516146, "learning_rate": 7.823454823932186e-07, "loss": 0.1609, "step": 18634 }, { "epoch": 2.631318836486868, "grad_norm": 3.348851558042337, "learning_rate": 7.817544860346183e-07, "loss": 0.147, "step": 18635 }, { "epoch": 2.631460039536854, "grad_norm": 2.7411656110871894, "learning_rate": 7.811637039040621e-07, "loss": 0.1238, "step": 18636 }, { "epoch": 2.63160124258684, "grad_norm": 3.097704537002722, "learning_rate": 7.805731360152802e-07, "loss": 0.1548, "step": 18637 }, { "epoch": 2.6317424456368257, "grad_norm": 3.49136516806522, "learning_rate": 7.799827823819972e-07, "loss": 0.1655, "step": 18638 }, { "epoch": 2.6318836486868116, "grad_norm": 3.3325888316391237, "learning_rate": 7.793926430179333e-07, "loss": 0.1557, "step": 18639 }, { "epoch": 2.6320248517367975, "grad_norm": 2.638931240401629, "learning_rate": 7.788027179367997e-07, "loss": 0.1397, "step": 18640 }, { "epoch": 2.6321660547867833, "grad_norm": 3.276240878519179, "learning_rate": 7.78213007152312e-07, "loss": 0.152, "step": 18641 }, { "epoch": 2.632307257836769, "grad_norm": 3.5781330152463537, "learning_rate": 7.776235106781704e-07, "loss": 0.191, "step": 18642 }, { "epoch": 2.632448460886755, "grad_norm": 3.0147325349507192, "learning_rate": 7.770342285280752e-07, "loss": 0.1683, "step": 18643 }, { "epoch": 2.632589663936741, "grad_norm": 3.1681859674887307, "learning_rate": 7.764451607157208e-07, "loss": 0.1123, "step": 18644 }, { "epoch": 2.632730866986727, "grad_norm": 2.6491575201766366, "learning_rate": 7.758563072547965e-07, "loss": 0.1351, "step": 18645 }, { "epoch": 2.6328720700367128, "grad_norm": 3.0698317813218203, "learning_rate": 7.752676681589899e-07, "loss": 0.147, "step": 18646 }, { "epoch": 2.6330132730866986, "grad_norm": 3.8233838379955993, "learning_rate": 7.74679243441978e-07, "loss": 0.1562, "step": 18647 }, { "epoch": 2.6331544761366845, "grad_norm": 2.71688171973595, "learning_rate": 7.740910331174378e-07, "loss": 0.1275, "step": 18648 }, { "epoch": 2.6332956791866704, "grad_norm": 3.0486793894334316, "learning_rate": 7.735030371990382e-07, "loss": 0.1149, "step": 18649 }, { "epoch": 2.6334368822366563, "grad_norm": 3.2676697419097214, "learning_rate": 7.729152557004405e-07, "loss": 0.1237, "step": 18650 }, { "epoch": 2.633578085286642, "grad_norm": 2.875871611916708, "learning_rate": 7.723276886353081e-07, "loss": 0.1239, "step": 18651 }, { "epoch": 2.633719288336628, "grad_norm": 2.9149887072580416, "learning_rate": 7.717403360172959e-07, "loss": 0.0993, "step": 18652 }, { "epoch": 2.633860491386614, "grad_norm": 3.541140948234796, "learning_rate": 7.711531978600529e-07, "loss": 0.1412, "step": 18653 }, { "epoch": 2.6340016944366, "grad_norm": 3.1880139286813476, "learning_rate": 7.705662741772235e-07, "loss": 0.1397, "step": 18654 }, { "epoch": 2.6341428974865857, "grad_norm": 4.291653403972578, "learning_rate": 7.699795649824493e-07, "loss": 0.226, "step": 18655 }, { "epoch": 2.6342841005365716, "grad_norm": 3.621784990766145, "learning_rate": 7.693930702893626e-07, "loss": 0.1578, "step": 18656 }, { "epoch": 2.6344253035865575, "grad_norm": 3.703705689813216, "learning_rate": 7.688067901115926e-07, "loss": 0.1608, "step": 18657 }, { "epoch": 2.6345665066365433, "grad_norm": 2.7629488982814165, "learning_rate": 7.682207244627704e-07, "loss": 0.0965, "step": 18658 }, { "epoch": 2.6347077096865292, "grad_norm": 3.6702579555758303, "learning_rate": 7.676348733565098e-07, "loss": 0.1669, "step": 18659 }, { "epoch": 2.634848912736515, "grad_norm": 3.3758262466904285, "learning_rate": 7.670492368064275e-07, "loss": 0.1523, "step": 18660 }, { "epoch": 2.634990115786501, "grad_norm": 4.524352119979482, "learning_rate": 7.664638148261339e-07, "loss": 0.1763, "step": 18661 }, { "epoch": 2.635131318836487, "grad_norm": 3.0605161871581563, "learning_rate": 7.658786074292312e-07, "loss": 0.1452, "step": 18662 }, { "epoch": 2.6352725218864728, "grad_norm": 3.4603724898501613, "learning_rate": 7.652936146293244e-07, "loss": 0.1361, "step": 18663 }, { "epoch": 2.6354137249364586, "grad_norm": 3.244552748541896, "learning_rate": 7.647088364400046e-07, "loss": 0.1642, "step": 18664 }, { "epoch": 2.6355549279864445, "grad_norm": 3.6544719469243376, "learning_rate": 7.641242728748632e-07, "loss": 0.1655, "step": 18665 }, { "epoch": 2.6356961310364304, "grad_norm": 3.271614582268736, "learning_rate": 7.635399239474872e-07, "loss": 0.1325, "step": 18666 }, { "epoch": 2.6358373340864163, "grad_norm": 4.073323714110204, "learning_rate": 7.629557896714512e-07, "loss": 0.1211, "step": 18667 }, { "epoch": 2.635978537136402, "grad_norm": 2.294860807091073, "learning_rate": 7.623718700603356e-07, "loss": 0.0948, "step": 18668 }, { "epoch": 2.636119740186388, "grad_norm": 3.235620071850725, "learning_rate": 7.617881651277071e-07, "loss": 0.1345, "step": 18669 }, { "epoch": 2.636260943236374, "grad_norm": 4.153366230534762, "learning_rate": 7.612046748871327e-07, "loss": 0.2007, "step": 18670 }, { "epoch": 2.63640214628636, "grad_norm": 3.035151981858048, "learning_rate": 7.606213993521716e-07, "loss": 0.1447, "step": 18671 }, { "epoch": 2.6365433493363457, "grad_norm": 2.8541704585044787, "learning_rate": 7.600383385363797e-07, "loss": 0.1238, "step": 18672 }, { "epoch": 2.6366845523863316, "grad_norm": 3.8217958475483944, "learning_rate": 7.594554924533048e-07, "loss": 0.139, "step": 18673 }, { "epoch": 2.6368257554363175, "grad_norm": 3.8757053772792784, "learning_rate": 7.58872861116493e-07, "loss": 0.1824, "step": 18674 }, { "epoch": 2.6369669584863034, "grad_norm": 3.01391636814118, "learning_rate": 7.582904445394878e-07, "loss": 0.1413, "step": 18675 }, { "epoch": 2.6371081615362892, "grad_norm": 3.234067759763652, "learning_rate": 7.577082427358207e-07, "loss": 0.1529, "step": 18676 }, { "epoch": 2.637249364586275, "grad_norm": 3.0061096325972225, "learning_rate": 7.571262557190218e-07, "loss": 0.1329, "step": 18677 }, { "epoch": 2.637390567636261, "grad_norm": 3.3705180547855362, "learning_rate": 7.56544483502617e-07, "loss": 0.1245, "step": 18678 }, { "epoch": 2.637531770686247, "grad_norm": 4.049016333376115, "learning_rate": 7.559629261001256e-07, "loss": 0.1922, "step": 18679 }, { "epoch": 2.6376729737362328, "grad_norm": 2.758296803388759, "learning_rate": 7.553815835250644e-07, "loss": 0.1235, "step": 18680 }, { "epoch": 2.6378141767862187, "grad_norm": 3.139196930863653, "learning_rate": 7.548004557909428e-07, "loss": 0.1195, "step": 18681 }, { "epoch": 2.6379553798362045, "grad_norm": 2.8262638131869284, "learning_rate": 7.542195429112664e-07, "loss": 0.1313, "step": 18682 }, { "epoch": 2.6380965828861904, "grad_norm": 3.122881371417505, "learning_rate": 7.536388448995357e-07, "loss": 0.1284, "step": 18683 }, { "epoch": 2.6382377859361763, "grad_norm": 3.001000157990892, "learning_rate": 7.530583617692433e-07, "loss": 0.1245, "step": 18684 }, { "epoch": 2.638378988986162, "grad_norm": 3.2129902242135207, "learning_rate": 7.524780935338815e-07, "loss": 0.1311, "step": 18685 }, { "epoch": 2.638520192036148, "grad_norm": 2.948115075048922, "learning_rate": 7.518980402069354e-07, "loss": 0.1333, "step": 18686 }, { "epoch": 2.638661395086134, "grad_norm": 3.4492105783846267, "learning_rate": 7.51318201801885e-07, "loss": 0.1401, "step": 18687 }, { "epoch": 2.63880259813612, "grad_norm": 2.8797020921755694, "learning_rate": 7.507385783322052e-07, "loss": 0.1236, "step": 18688 }, { "epoch": 2.6389438011861057, "grad_norm": 2.927318628139526, "learning_rate": 7.501591698113663e-07, "loss": 0.143, "step": 18689 }, { "epoch": 2.6390850042360916, "grad_norm": 2.5952464990903423, "learning_rate": 7.495799762528333e-07, "loss": 0.1355, "step": 18690 }, { "epoch": 2.6392262072860775, "grad_norm": 3.3220776667776795, "learning_rate": 7.490009976700663e-07, "loss": 0.1554, "step": 18691 }, { "epoch": 2.6393674103360634, "grad_norm": 2.9322310803915803, "learning_rate": 7.484222340765235e-07, "loss": 0.1454, "step": 18692 }, { "epoch": 2.6395086133860493, "grad_norm": 3.018574905771618, "learning_rate": 7.478436854856508e-07, "loss": 0.1306, "step": 18693 }, { "epoch": 2.639649816436035, "grad_norm": 3.3052282531676345, "learning_rate": 7.472653519108952e-07, "loss": 0.1636, "step": 18694 }, { "epoch": 2.639791019486021, "grad_norm": 2.9666400963726676, "learning_rate": 7.46687233365696e-07, "loss": 0.1386, "step": 18695 }, { "epoch": 2.639932222536007, "grad_norm": 3.801878151311475, "learning_rate": 7.461093298634892e-07, "loss": 0.2027, "step": 18696 }, { "epoch": 2.640073425585993, "grad_norm": 3.1906617764152747, "learning_rate": 7.45531641417706e-07, "loss": 0.1541, "step": 18697 }, { "epoch": 2.6402146286359787, "grad_norm": 3.7357605767319253, "learning_rate": 7.449541680417704e-07, "loss": 0.1429, "step": 18698 }, { "epoch": 2.6403558316859645, "grad_norm": 2.9604219289589855, "learning_rate": 7.443769097491038e-07, "loss": 0.1461, "step": 18699 }, { "epoch": 2.6404970347359504, "grad_norm": 3.0468322855327648, "learning_rate": 7.437998665531221e-07, "loss": 0.1449, "step": 18700 }, { "epoch": 2.6406382377859363, "grad_norm": 2.7944765880101317, "learning_rate": 7.432230384672301e-07, "loss": 0.1277, "step": 18701 }, { "epoch": 2.640779440835922, "grad_norm": 3.71133128915966, "learning_rate": 7.426464255048393e-07, "loss": 0.1556, "step": 18702 }, { "epoch": 2.640920643885908, "grad_norm": 3.652316824497758, "learning_rate": 7.420700276793469e-07, "loss": 0.1858, "step": 18703 }, { "epoch": 2.641061846935894, "grad_norm": 4.023334924397379, "learning_rate": 7.414938450041497e-07, "loss": 0.2379, "step": 18704 }, { "epoch": 2.64120304998588, "grad_norm": 4.11634175597947, "learning_rate": 7.409178774926373e-07, "loss": 0.2092, "step": 18705 }, { "epoch": 2.6413442530358657, "grad_norm": 3.2953660839349816, "learning_rate": 7.403421251581933e-07, "loss": 0.1356, "step": 18706 }, { "epoch": 2.6414854560858516, "grad_norm": 2.7624465375288394, "learning_rate": 7.397665880142013e-07, "loss": 0.1239, "step": 18707 }, { "epoch": 2.6416266591358375, "grad_norm": 3.1133789090746924, "learning_rate": 7.391912660740319e-07, "loss": 0.1493, "step": 18708 }, { "epoch": 2.6417678621858234, "grad_norm": 3.3146218826250577, "learning_rate": 7.38616159351061e-07, "loss": 0.1651, "step": 18709 }, { "epoch": 2.641909065235809, "grad_norm": 3.1388506257116666, "learning_rate": 7.38041267858649e-07, "loss": 0.1612, "step": 18710 }, { "epoch": 2.6420502682857947, "grad_norm": 3.679264910469496, "learning_rate": 7.374665916101587e-07, "loss": 0.2001, "step": 18711 }, { "epoch": 2.6421914713357806, "grad_norm": 3.2704729571303597, "learning_rate": 7.368921306189447e-07, "loss": 0.1565, "step": 18712 }, { "epoch": 2.6423326743857665, "grad_norm": 3.0030532113750055, "learning_rate": 7.363178848983554e-07, "loss": 0.1054, "step": 18713 }, { "epoch": 2.6424738774357523, "grad_norm": 2.964196222520876, "learning_rate": 7.35743854461739e-07, "loss": 0.1489, "step": 18714 }, { "epoch": 2.6426150804857382, "grad_norm": 3.352544267954871, "learning_rate": 7.351700393224359e-07, "loss": 0.1427, "step": 18715 }, { "epoch": 2.642756283535724, "grad_norm": 3.1995846251288547, "learning_rate": 7.345964394937788e-07, "loss": 0.1245, "step": 18716 }, { "epoch": 2.64289748658571, "grad_norm": 3.449750253477038, "learning_rate": 7.340230549891003e-07, "loss": 0.1546, "step": 18717 }, { "epoch": 2.643038689635696, "grad_norm": 3.896587030848445, "learning_rate": 7.334498858217231e-07, "loss": 0.1817, "step": 18718 }, { "epoch": 2.6431798926856818, "grad_norm": 3.492929173224557, "learning_rate": 7.328769320049667e-07, "loss": 0.1221, "step": 18719 }, { "epoch": 2.6433210957356676, "grad_norm": 2.617408529529238, "learning_rate": 7.323041935521502e-07, "loss": 0.1285, "step": 18720 }, { "epoch": 2.6434622987856535, "grad_norm": 3.3750892500377994, "learning_rate": 7.317316704765821e-07, "loss": 0.1364, "step": 18721 }, { "epoch": 2.6436035018356394, "grad_norm": 3.9786225955747603, "learning_rate": 7.31159362791567e-07, "loss": 0.1829, "step": 18722 }, { "epoch": 2.6437447048856253, "grad_norm": 2.8664840229309227, "learning_rate": 7.305872705104056e-07, "loss": 0.137, "step": 18723 }, { "epoch": 2.643885907935611, "grad_norm": 3.3865222213052064, "learning_rate": 7.300153936463927e-07, "loss": 0.1375, "step": 18724 }, { "epoch": 2.644027110985597, "grad_norm": 3.1636126974209287, "learning_rate": 7.294437322128167e-07, "loss": 0.1504, "step": 18725 }, { "epoch": 2.644168314035583, "grad_norm": 2.782094870821125, "learning_rate": 7.288722862229691e-07, "loss": 0.146, "step": 18726 }, { "epoch": 2.644309517085569, "grad_norm": 3.237902146498894, "learning_rate": 7.283010556901226e-07, "loss": 0.1536, "step": 18727 }, { "epoch": 2.6444507201355547, "grad_norm": 3.049622831942678, "learning_rate": 7.277300406275567e-07, "loss": 0.1319, "step": 18728 }, { "epoch": 2.6445919231855406, "grad_norm": 3.127561957577059, "learning_rate": 7.271592410485395e-07, "loss": 0.1376, "step": 18729 }, { "epoch": 2.6447331262355265, "grad_norm": 2.6758912592120256, "learning_rate": 7.265886569663372e-07, "loss": 0.1046, "step": 18730 }, { "epoch": 2.6448743292855124, "grad_norm": 3.372172642875817, "learning_rate": 7.260182883942079e-07, "loss": 0.1598, "step": 18731 }, { "epoch": 2.6450155323354982, "grad_norm": 4.600696955515216, "learning_rate": 7.254481353454102e-07, "loss": 0.205, "step": 18732 }, { "epoch": 2.645156735385484, "grad_norm": 3.5461829944571974, "learning_rate": 7.24878197833192e-07, "loss": 0.1845, "step": 18733 }, { "epoch": 2.64529793843547, "grad_norm": 3.5443039655612285, "learning_rate": 7.243084758708007e-07, "loss": 0.1481, "step": 18734 }, { "epoch": 2.645439141485456, "grad_norm": 3.6523319508643586, "learning_rate": 7.237389694714736e-07, "loss": 0.147, "step": 18735 }, { "epoch": 2.6455803445354418, "grad_norm": 3.1278368349043135, "learning_rate": 7.231696786484443e-07, "loss": 0.1598, "step": 18736 }, { "epoch": 2.6457215475854277, "grad_norm": 4.867884154661913, "learning_rate": 7.226006034149469e-07, "loss": 0.1822, "step": 18737 }, { "epoch": 2.6458627506354135, "grad_norm": 3.1656492235080256, "learning_rate": 7.22031743784205e-07, "loss": 0.152, "step": 18738 }, { "epoch": 2.6460039536853994, "grad_norm": 3.1778224839121854, "learning_rate": 7.214630997694394e-07, "loss": 0.1393, "step": 18739 }, { "epoch": 2.6461451567353853, "grad_norm": 3.1432656046170924, "learning_rate": 7.208946713838638e-07, "loss": 0.13, "step": 18740 }, { "epoch": 2.646286359785371, "grad_norm": 3.5746465833097014, "learning_rate": 7.203264586406877e-07, "loss": 0.1371, "step": 18741 }, { "epoch": 2.646427562835357, "grad_norm": 2.6182014647673797, "learning_rate": 7.197584615531184e-07, "loss": 0.1468, "step": 18742 }, { "epoch": 2.646568765885343, "grad_norm": 3.0441019361160593, "learning_rate": 7.19190680134354e-07, "loss": 0.1305, "step": 18743 }, { "epoch": 2.646709968935329, "grad_norm": 3.1538799999945715, "learning_rate": 7.186231143975908e-07, "loss": 0.1301, "step": 18744 }, { "epoch": 2.6468511719853147, "grad_norm": 2.5697661611265348, "learning_rate": 7.18055764356017e-07, "loss": 0.1308, "step": 18745 }, { "epoch": 2.6469923750353006, "grad_norm": 3.0522884885442108, "learning_rate": 7.17488630022819e-07, "loss": 0.1705, "step": 18746 }, { "epoch": 2.6471335780852865, "grad_norm": 2.808022142023771, "learning_rate": 7.169217114111771e-07, "loss": 0.1182, "step": 18747 }, { "epoch": 2.6472747811352724, "grad_norm": 3.3520456539394847, "learning_rate": 7.16355008534263e-07, "loss": 0.1629, "step": 18748 }, { "epoch": 2.6474159841852583, "grad_norm": 4.808814403556536, "learning_rate": 7.157885214052518e-07, "loss": 0.2085, "step": 18749 }, { "epoch": 2.647557187235244, "grad_norm": 2.852149647973399, "learning_rate": 7.152222500373052e-07, "loss": 0.1122, "step": 18750 }, { "epoch": 2.64769839028523, "grad_norm": 2.779992315687866, "learning_rate": 7.146561944435859e-07, "loss": 0.1383, "step": 18751 }, { "epoch": 2.647839593335216, "grad_norm": 3.096373057879562, "learning_rate": 7.140903546372446e-07, "loss": 0.1372, "step": 18752 }, { "epoch": 2.647980796385202, "grad_norm": 2.516311792955464, "learning_rate": 7.135247306314308e-07, "loss": 0.1206, "step": 18753 }, { "epoch": 2.6481219994351877, "grad_norm": 3.4152968675941864, "learning_rate": 7.12959322439295e-07, "loss": 0.1427, "step": 18754 }, { "epoch": 2.6482632024851736, "grad_norm": 3.3661778212113496, "learning_rate": 7.123941300739723e-07, "loss": 0.135, "step": 18755 }, { "epoch": 2.6484044055351594, "grad_norm": 3.2544149735634136, "learning_rate": 7.118291535485999e-07, "loss": 0.1435, "step": 18756 }, { "epoch": 2.6485456085851453, "grad_norm": 2.680796999762798, "learning_rate": 7.112643928763064e-07, "loss": 0.108, "step": 18757 }, { "epoch": 2.648686811635131, "grad_norm": 3.400120069267058, "learning_rate": 7.106998480702165e-07, "loss": 0.1529, "step": 18758 }, { "epoch": 2.648828014685117, "grad_norm": 3.636970173195075, "learning_rate": 7.101355191434511e-07, "loss": 0.1498, "step": 18759 }, { "epoch": 2.648969217735103, "grad_norm": 3.510922416715557, "learning_rate": 7.095714061091241e-07, "loss": 0.1823, "step": 18760 }, { "epoch": 2.649110420785089, "grad_norm": 3.465764044833183, "learning_rate": 7.09007508980345e-07, "loss": 0.1534, "step": 18761 }, { "epoch": 2.6492516238350747, "grad_norm": 3.0388705527304585, "learning_rate": 7.084438277702188e-07, "loss": 0.0976, "step": 18762 }, { "epoch": 2.6493928268850606, "grad_norm": 3.8498535455563214, "learning_rate": 7.078803624918463e-07, "loss": 0.1707, "step": 18763 }, { "epoch": 2.6495340299350465, "grad_norm": 3.110519901240474, "learning_rate": 7.073171131583201e-07, "loss": 0.1596, "step": 18764 }, { "epoch": 2.6496752329850324, "grad_norm": 3.0194878120958037, "learning_rate": 7.067540797827299e-07, "loss": 0.1544, "step": 18765 }, { "epoch": 2.6498164360350183, "grad_norm": 3.1038435354857907, "learning_rate": 7.06191262378163e-07, "loss": 0.1013, "step": 18766 }, { "epoch": 2.649957639085004, "grad_norm": 3.329584926479046, "learning_rate": 7.056286609576979e-07, "loss": 0.1337, "step": 18767 }, { "epoch": 2.65009884213499, "grad_norm": 4.065825390258055, "learning_rate": 7.050662755344096e-07, "loss": 0.1746, "step": 18768 }, { "epoch": 2.650240045184976, "grad_norm": 2.445080162379736, "learning_rate": 7.045041061213664e-07, "loss": 0.1192, "step": 18769 }, { "epoch": 2.650381248234962, "grad_norm": 3.611202237352108, "learning_rate": 7.039421527316304e-07, "loss": 0.1311, "step": 18770 }, { "epoch": 2.6505224512849477, "grad_norm": 3.3554715880497423, "learning_rate": 7.033804153782664e-07, "loss": 0.1263, "step": 18771 }, { "epoch": 2.6506636543349336, "grad_norm": 3.198775243117756, "learning_rate": 7.028188940743275e-07, "loss": 0.1327, "step": 18772 }, { "epoch": 2.6508048573849194, "grad_norm": 2.935863069931787, "learning_rate": 7.022575888328608e-07, "loss": 0.1343, "step": 18773 }, { "epoch": 2.6509460604349053, "grad_norm": 4.443586812079073, "learning_rate": 7.016964996669129e-07, "loss": 0.1574, "step": 18774 }, { "epoch": 2.651087263484891, "grad_norm": 2.875364163422291, "learning_rate": 7.011356265895231e-07, "loss": 0.1326, "step": 18775 }, { "epoch": 2.651228466534877, "grad_norm": 3.226281992547059, "learning_rate": 7.005749696137254e-07, "loss": 0.1761, "step": 18776 }, { "epoch": 2.651369669584863, "grad_norm": 3.400781699100564, "learning_rate": 7.000145287525484e-07, "loss": 0.1159, "step": 18777 }, { "epoch": 2.651510872634849, "grad_norm": 2.602906908502238, "learning_rate": 6.994543040190183e-07, "loss": 0.1109, "step": 18778 }, { "epoch": 2.6516520756848347, "grad_norm": 2.401671015369705, "learning_rate": 6.988942954261535e-07, "loss": 0.1242, "step": 18779 }, { "epoch": 2.6517932787348206, "grad_norm": 3.1898329110407984, "learning_rate": 6.983345029869681e-07, "loss": 0.1571, "step": 18780 }, { "epoch": 2.6519344817848065, "grad_norm": 2.911394567963712, "learning_rate": 6.977749267144718e-07, "loss": 0.1393, "step": 18781 }, { "epoch": 2.6520756848347924, "grad_norm": 3.684349298046009, "learning_rate": 6.972155666216684e-07, "loss": 0.1599, "step": 18782 }, { "epoch": 2.6522168878847783, "grad_norm": 2.833831992403683, "learning_rate": 6.966564227215578e-07, "loss": 0.124, "step": 18783 }, { "epoch": 2.652358090934764, "grad_norm": 3.714089689243963, "learning_rate": 6.960974950271348e-07, "loss": 0.1597, "step": 18784 }, { "epoch": 2.65249929398475, "grad_norm": 3.742438800231911, "learning_rate": 6.955387835513894e-07, "loss": 0.1867, "step": 18785 }, { "epoch": 2.652640497034736, "grad_norm": 2.7266313720674806, "learning_rate": 6.949802883073031e-07, "loss": 0.124, "step": 18786 }, { "epoch": 2.652781700084722, "grad_norm": 3.0682369076170106, "learning_rate": 6.944220093078546e-07, "loss": 0.1137, "step": 18787 }, { "epoch": 2.6529229031347077, "grad_norm": 3.0762771654845285, "learning_rate": 6.938639465660213e-07, "loss": 0.1386, "step": 18788 }, { "epoch": 2.6530641061846936, "grad_norm": 3.6028375847586545, "learning_rate": 6.933061000947705e-07, "loss": 0.0982, "step": 18789 }, { "epoch": 2.6532053092346795, "grad_norm": 2.982292461262047, "learning_rate": 6.927484699070675e-07, "loss": 0.1533, "step": 18790 }, { "epoch": 2.6533465122846653, "grad_norm": 3.169376033532251, "learning_rate": 6.921910560158696e-07, "loss": 0.1655, "step": 18791 }, { "epoch": 2.653487715334651, "grad_norm": 3.4379592537965125, "learning_rate": 6.91633858434132e-07, "loss": 0.1626, "step": 18792 }, { "epoch": 2.653628918384637, "grad_norm": 2.9466224409091493, "learning_rate": 6.910768771748044e-07, "loss": 0.1361, "step": 18793 }, { "epoch": 2.653770121434623, "grad_norm": 2.7195683243440647, "learning_rate": 6.905201122508299e-07, "loss": 0.1027, "step": 18794 }, { "epoch": 2.653911324484609, "grad_norm": 2.7991434397354493, "learning_rate": 6.899635636751467e-07, "loss": 0.1588, "step": 18795 }, { "epoch": 2.6540525275345948, "grad_norm": 2.660535105582515, "learning_rate": 6.894072314606892e-07, "loss": 0.0995, "step": 18796 }, { "epoch": 2.6541937305845806, "grad_norm": 3.1820039334115213, "learning_rate": 6.888511156203881e-07, "loss": 0.1369, "step": 18797 }, { "epoch": 2.6543349336345665, "grad_norm": 2.9100359447136075, "learning_rate": 6.882952161671652e-07, "loss": 0.1465, "step": 18798 }, { "epoch": 2.6544761366845524, "grad_norm": 3.3067047587434835, "learning_rate": 6.87739533113938e-07, "loss": 0.1193, "step": 18799 }, { "epoch": 2.6546173397345383, "grad_norm": 3.2466027075625417, "learning_rate": 6.871840664736251e-07, "loss": 0.1374, "step": 18800 }, { "epoch": 2.654758542784524, "grad_norm": 3.1155824349568326, "learning_rate": 6.866288162591317e-07, "loss": 0.1614, "step": 18801 }, { "epoch": 2.65489974583451, "grad_norm": 3.1087495171700037, "learning_rate": 6.860737824833652e-07, "loss": 0.1215, "step": 18802 }, { "epoch": 2.655040948884496, "grad_norm": 2.7524013041526447, "learning_rate": 6.855189651592187e-07, "loss": 0.1167, "step": 18803 }, { "epoch": 2.655182151934482, "grad_norm": 3.004576821422797, "learning_rate": 6.849643642995873e-07, "loss": 0.1629, "step": 18804 }, { "epoch": 2.6553233549844677, "grad_norm": 3.3296399183373784, "learning_rate": 6.844099799173643e-07, "loss": 0.1318, "step": 18805 }, { "epoch": 2.6554645580344536, "grad_norm": 3.2030387821551645, "learning_rate": 6.83855812025429e-07, "loss": 0.1515, "step": 18806 }, { "epoch": 2.6556057610844395, "grad_norm": 3.525426025050401, "learning_rate": 6.833018606366615e-07, "loss": 0.1573, "step": 18807 }, { "epoch": 2.6557469641344253, "grad_norm": 3.307343242267232, "learning_rate": 6.827481257639346e-07, "loss": 0.1255, "step": 18808 }, { "epoch": 2.6558881671844112, "grad_norm": 3.7653375290154654, "learning_rate": 6.82194607420118e-07, "loss": 0.1574, "step": 18809 }, { "epoch": 2.656029370234397, "grad_norm": 3.531318950981282, "learning_rate": 6.816413056180748e-07, "loss": 0.1548, "step": 18810 }, { "epoch": 2.656170573284383, "grad_norm": 3.2241847095347915, "learning_rate": 6.810882203706637e-07, "loss": 0.1483, "step": 18811 }, { "epoch": 2.656311776334369, "grad_norm": 3.0192287942404814, "learning_rate": 6.805353516907376e-07, "loss": 0.1128, "step": 18812 }, { "epoch": 2.6564529793843548, "grad_norm": 3.7375157627411895, "learning_rate": 6.799826995911451e-07, "loss": 0.1896, "step": 18813 }, { "epoch": 2.6565941824343406, "grad_norm": 2.7307133158962102, "learning_rate": 6.794302640847294e-07, "loss": 0.1044, "step": 18814 }, { "epoch": 2.6567353854843265, "grad_norm": 2.5005223507998746, "learning_rate": 6.788780451843291e-07, "loss": 0.1253, "step": 18815 }, { "epoch": 2.6568765885343124, "grad_norm": 3.225078941638792, "learning_rate": 6.78326042902776e-07, "loss": 0.1583, "step": 18816 }, { "epoch": 2.6570177915842983, "grad_norm": 2.899073726538033, "learning_rate": 6.777742572529022e-07, "loss": 0.1677, "step": 18817 }, { "epoch": 2.657158994634284, "grad_norm": 2.5096192517929357, "learning_rate": 6.772226882475275e-07, "loss": 0.1269, "step": 18818 }, { "epoch": 2.65730019768427, "grad_norm": 2.7448342640551866, "learning_rate": 6.766713358994736e-07, "loss": 0.1135, "step": 18819 }, { "epoch": 2.657441400734256, "grad_norm": 2.6090899966775973, "learning_rate": 6.761202002215506e-07, "loss": 0.1056, "step": 18820 }, { "epoch": 2.657582603784242, "grad_norm": 3.8677606212709628, "learning_rate": 6.755692812265668e-07, "loss": 0.1645, "step": 18821 }, { "epoch": 2.6577238068342277, "grad_norm": 2.4355756680063365, "learning_rate": 6.750185789273234e-07, "loss": 0.0848, "step": 18822 }, { "epoch": 2.6578650098842136, "grad_norm": 3.5818927202972937, "learning_rate": 6.744680933366243e-07, "loss": 0.1834, "step": 18823 }, { "epoch": 2.6580062129341995, "grad_norm": 2.760530655301133, "learning_rate": 6.739178244672584e-07, "loss": 0.1117, "step": 18824 }, { "epoch": 2.6581474159841854, "grad_norm": 2.9527728358991143, "learning_rate": 6.733677723320142e-07, "loss": 0.1337, "step": 18825 }, { "epoch": 2.6582886190341712, "grad_norm": 3.1393733072190337, "learning_rate": 6.72817936943676e-07, "loss": 0.1555, "step": 18826 }, { "epoch": 2.658429822084157, "grad_norm": 3.3247003441730665, "learning_rate": 6.722683183150203e-07, "loss": 0.1394, "step": 18827 }, { "epoch": 2.658571025134143, "grad_norm": 3.118500538456123, "learning_rate": 6.717189164588212e-07, "loss": 0.139, "step": 18828 }, { "epoch": 2.658712228184129, "grad_norm": 4.087153063595232, "learning_rate": 6.711697313878452e-07, "loss": 0.1889, "step": 18829 }, { "epoch": 2.6588534312341148, "grad_norm": 3.582699243816067, "learning_rate": 6.706207631148564e-07, "loss": 0.1485, "step": 18830 }, { "epoch": 2.6589946342841007, "grad_norm": 3.299397342728459, "learning_rate": 6.700720116526116e-07, "loss": 0.1723, "step": 18831 }, { "epoch": 2.6591358373340865, "grad_norm": 2.374707272394249, "learning_rate": 6.695234770138648e-07, "loss": 0.0955, "step": 18832 }, { "epoch": 2.6592770403840724, "grad_norm": 2.507007990198973, "learning_rate": 6.689751592113614e-07, "loss": 0.1095, "step": 18833 }, { "epoch": 2.6594182434340583, "grad_norm": 3.2966572441971134, "learning_rate": 6.684270582578455e-07, "loss": 0.1322, "step": 18834 }, { "epoch": 2.659559446484044, "grad_norm": 2.8247321009028505, "learning_rate": 6.67879174166055e-07, "loss": 0.1236, "step": 18835 }, { "epoch": 2.65970064953403, "grad_norm": 4.2168546412272025, "learning_rate": 6.673315069487252e-07, "loss": 0.2216, "step": 18836 }, { "epoch": 2.659841852584016, "grad_norm": 3.723367725479668, "learning_rate": 6.667840566185779e-07, "loss": 0.1639, "step": 18837 }, { "epoch": 2.659983055634002, "grad_norm": 3.2488600492444206, "learning_rate": 6.662368231883388e-07, "loss": 0.1103, "step": 18838 }, { "epoch": 2.6601242586839877, "grad_norm": 3.199153680756426, "learning_rate": 6.65689806670724e-07, "loss": 0.0938, "step": 18839 }, { "epoch": 2.6602654617339736, "grad_norm": 2.4408280104782047, "learning_rate": 6.65143007078447e-07, "loss": 0.1315, "step": 18840 }, { "epoch": 2.6604066647839595, "grad_norm": 2.6241972365902018, "learning_rate": 6.645964244242164e-07, "loss": 0.1318, "step": 18841 }, { "epoch": 2.6605478678339454, "grad_norm": 3.4598101324593302, "learning_rate": 6.640500587207333e-07, "loss": 0.1342, "step": 18842 }, { "epoch": 2.6606890708839313, "grad_norm": 3.297387047566107, "learning_rate": 6.635039099806939e-07, "loss": 0.1414, "step": 18843 }, { "epoch": 2.660830273933917, "grad_norm": 3.1051401672594396, "learning_rate": 6.629579782167928e-07, "loss": 0.1297, "step": 18844 }, { "epoch": 2.660971476983903, "grad_norm": 3.8300308018941824, "learning_rate": 6.624122634417152e-07, "loss": 0.1532, "step": 18845 }, { "epoch": 2.661112680033889, "grad_norm": 2.817801724121992, "learning_rate": 6.618667656681444e-07, "loss": 0.1421, "step": 18846 }, { "epoch": 2.661253883083875, "grad_norm": 3.328629898594717, "learning_rate": 6.613214849087568e-07, "loss": 0.1765, "step": 18847 }, { "epoch": 2.6613950861338607, "grad_norm": 2.410170440875257, "learning_rate": 6.607764211762247e-07, "loss": 0.1134, "step": 18848 }, { "epoch": 2.6615362891838465, "grad_norm": 2.527743159578753, "learning_rate": 6.602315744832155e-07, "loss": 0.1397, "step": 18849 }, { "epoch": 2.6616774922338324, "grad_norm": 4.156858133224187, "learning_rate": 6.596869448423903e-07, "loss": 0.1469, "step": 18850 }, { "epoch": 2.6618186952838183, "grad_norm": 2.856770797744925, "learning_rate": 6.591425322664058e-07, "loss": 0.1033, "step": 18851 }, { "epoch": 2.661959898333804, "grad_norm": 4.214442918314302, "learning_rate": 6.585983367679171e-07, "loss": 0.217, "step": 18852 }, { "epoch": 2.66210110138379, "grad_norm": 3.485045219619855, "learning_rate": 6.5805435835957e-07, "loss": 0.1574, "step": 18853 }, { "epoch": 2.662242304433776, "grad_norm": 3.2959058979777542, "learning_rate": 6.57510597054003e-07, "loss": 0.1449, "step": 18854 }, { "epoch": 2.662383507483762, "grad_norm": 3.8980086920036414, "learning_rate": 6.56967052863855e-07, "loss": 0.2072, "step": 18855 }, { "epoch": 2.6625247105337477, "grad_norm": 3.4143245330038, "learning_rate": 6.564237258017558e-07, "loss": 0.1552, "step": 18856 }, { "epoch": 2.6626659135837336, "grad_norm": 3.049068178567785, "learning_rate": 6.558806158803366e-07, "loss": 0.1267, "step": 18857 }, { "epoch": 2.6628071166337195, "grad_norm": 3.203757992987614, "learning_rate": 6.55337723112216e-07, "loss": 0.1488, "step": 18858 }, { "epoch": 2.6629483196837054, "grad_norm": 3.449621162209108, "learning_rate": 6.547950475100118e-07, "loss": 0.1638, "step": 18859 }, { "epoch": 2.6630895227336913, "grad_norm": 3.0023575636505178, "learning_rate": 6.542525890863338e-07, "loss": 0.1165, "step": 18860 }, { "epoch": 2.663230725783677, "grad_norm": 2.81686388478396, "learning_rate": 6.537103478537899e-07, "loss": 0.1339, "step": 18861 }, { "epoch": 2.663371928833663, "grad_norm": 2.3920234729229857, "learning_rate": 6.531683238249809e-07, "loss": 0.113, "step": 18862 }, { "epoch": 2.663513131883649, "grad_norm": 3.709278279276378, "learning_rate": 6.526265170125034e-07, "loss": 0.1619, "step": 18863 }, { "epoch": 2.663654334933635, "grad_norm": 3.0524168280763786, "learning_rate": 6.520849274289498e-07, "loss": 0.1702, "step": 18864 }, { "epoch": 2.6637955379836207, "grad_norm": 2.7479065271774603, "learning_rate": 6.515435550869043e-07, "loss": 0.117, "step": 18865 }, { "epoch": 2.6639367410336066, "grad_norm": 3.7629546191157153, "learning_rate": 6.510023999989501e-07, "loss": 0.1436, "step": 18866 }, { "epoch": 2.6640779440835924, "grad_norm": 3.935609173498883, "learning_rate": 6.504614621776629e-07, "loss": 0.1417, "step": 18867 }, { "epoch": 2.6642191471335783, "grad_norm": 3.444630751796602, "learning_rate": 6.499207416356113e-07, "loss": 0.1573, "step": 18868 }, { "epoch": 2.664360350183564, "grad_norm": 2.901894005866385, "learning_rate": 6.493802383853653e-07, "loss": 0.1338, "step": 18869 }, { "epoch": 2.66450155323355, "grad_norm": 2.6511415451334552, "learning_rate": 6.488399524394851e-07, "loss": 0.1181, "step": 18870 }, { "epoch": 2.664642756283536, "grad_norm": 2.733966287226836, "learning_rate": 6.482998838105259e-07, "loss": 0.1189, "step": 18871 }, { "epoch": 2.664783959333522, "grad_norm": 2.803971146051175, "learning_rate": 6.477600325110378e-07, "loss": 0.1383, "step": 18872 }, { "epoch": 2.6649251623835077, "grad_norm": 3.3132204850371765, "learning_rate": 6.472203985535663e-07, "loss": 0.1369, "step": 18873 }, { "epoch": 2.6650663654334936, "grad_norm": 2.689630841099451, "learning_rate": 6.466809819506548e-07, "loss": 0.1508, "step": 18874 }, { "epoch": 2.6652075684834795, "grad_norm": 3.2388862621273025, "learning_rate": 6.461417827148386e-07, "loss": 0.1517, "step": 18875 }, { "epoch": 2.6653487715334654, "grad_norm": 3.5292006691031332, "learning_rate": 6.456028008586468e-07, "loss": 0.1158, "step": 18876 }, { "epoch": 2.6654899745834513, "grad_norm": 2.7882725013106158, "learning_rate": 6.45064036394607e-07, "loss": 0.1316, "step": 18877 }, { "epoch": 2.665631177633437, "grad_norm": 3.4647001625610185, "learning_rate": 6.445254893352381e-07, "loss": 0.1344, "step": 18878 }, { "epoch": 2.665772380683423, "grad_norm": 2.972934413280098, "learning_rate": 6.439871596930569e-07, "loss": 0.1186, "step": 18879 }, { "epoch": 2.6659135837334085, "grad_norm": 2.548340979535032, "learning_rate": 6.434490474805743e-07, "loss": 0.1067, "step": 18880 }, { "epoch": 2.6660547867833944, "grad_norm": 3.3102653807530267, "learning_rate": 6.429111527102938e-07, "loss": 0.1289, "step": 18881 }, { "epoch": 2.6661959898333802, "grad_norm": 3.8034155833395786, "learning_rate": 6.423734753947175e-07, "loss": 0.1815, "step": 18882 }, { "epoch": 2.666337192883366, "grad_norm": 2.723595507999998, "learning_rate": 6.41836015546341e-07, "loss": 0.131, "step": 18883 }, { "epoch": 2.666478395933352, "grad_norm": 2.6718269954380682, "learning_rate": 6.412987731776532e-07, "loss": 0.1573, "step": 18884 }, { "epoch": 2.666619598983338, "grad_norm": 2.912157313764413, "learning_rate": 6.407617483011385e-07, "loss": 0.1604, "step": 18885 }, { "epoch": 2.6667608020333238, "grad_norm": 3.1123700536266985, "learning_rate": 6.402249409292815e-07, "loss": 0.1435, "step": 18886 }, { "epoch": 2.6669020050833097, "grad_norm": 2.9497825456991884, "learning_rate": 6.396883510745555e-07, "loss": 0.138, "step": 18887 }, { "epoch": 2.6670432081332955, "grad_norm": 3.2879820997874685, "learning_rate": 6.391519787494282e-07, "loss": 0.1456, "step": 18888 }, { "epoch": 2.6671844111832814, "grad_norm": 2.8379663305521934, "learning_rate": 6.386158239663665e-07, "loss": 0.125, "step": 18889 }, { "epoch": 2.6673256142332673, "grad_norm": 2.9796414850731745, "learning_rate": 6.380798867378291e-07, "loss": 0.1384, "step": 18890 }, { "epoch": 2.667466817283253, "grad_norm": 3.1272364777300035, "learning_rate": 6.375441670762727e-07, "loss": 0.1001, "step": 18891 }, { "epoch": 2.667608020333239, "grad_norm": 3.239279467281637, "learning_rate": 6.370086649941465e-07, "loss": 0.1376, "step": 18892 }, { "epoch": 2.667749223383225, "grad_norm": 3.125371149928673, "learning_rate": 6.364733805038958e-07, "loss": 0.1062, "step": 18893 }, { "epoch": 2.667890426433211, "grad_norm": 3.5863486728565688, "learning_rate": 6.359383136179598e-07, "loss": 0.0981, "step": 18894 }, { "epoch": 2.6680316294831967, "grad_norm": 2.6940527973874766, "learning_rate": 6.35403464348775e-07, "loss": 0.1402, "step": 18895 }, { "epoch": 2.6681728325331826, "grad_norm": 3.5272198626032525, "learning_rate": 6.348688327087671e-07, "loss": 0.1497, "step": 18896 }, { "epoch": 2.6683140355831685, "grad_norm": 3.790274060679558, "learning_rate": 6.343344187103628e-07, "loss": 0.1739, "step": 18897 }, { "epoch": 2.6684552386331544, "grad_norm": 3.0913267956654678, "learning_rate": 6.338002223659834e-07, "loss": 0.1383, "step": 18898 }, { "epoch": 2.6685964416831403, "grad_norm": 2.8240809292687112, "learning_rate": 6.33266243688041e-07, "loss": 0.1595, "step": 18899 }, { "epoch": 2.668737644733126, "grad_norm": 2.8979765178360655, "learning_rate": 6.327324826889469e-07, "loss": 0.1481, "step": 18900 }, { "epoch": 2.668878847783112, "grad_norm": 3.6977156288876327, "learning_rate": 6.321989393811034e-07, "loss": 0.1957, "step": 18901 }, { "epoch": 2.669020050833098, "grad_norm": 2.679839979958239, "learning_rate": 6.316656137769095e-07, "loss": 0.118, "step": 18902 }, { "epoch": 2.669161253883084, "grad_norm": 3.3301744764850234, "learning_rate": 6.311325058887629e-07, "loss": 0.1537, "step": 18903 }, { "epoch": 2.6693024569330697, "grad_norm": 2.6350979823503393, "learning_rate": 6.305996157290528e-07, "loss": 0.1209, "step": 18904 }, { "epoch": 2.6694436599830556, "grad_norm": 3.742796353910209, "learning_rate": 6.300669433101592e-07, "loss": 0.1587, "step": 18905 }, { "epoch": 2.6695848630330414, "grad_norm": 3.5792926196921164, "learning_rate": 6.295344886444632e-07, "loss": 0.1616, "step": 18906 }, { "epoch": 2.6697260660830273, "grad_norm": 3.109485954656302, "learning_rate": 6.290022517443372e-07, "loss": 0.1503, "step": 18907 }, { "epoch": 2.669867269133013, "grad_norm": 3.0012079275544616, "learning_rate": 6.284702326221537e-07, "loss": 0.1125, "step": 18908 }, { "epoch": 2.670008472182999, "grad_norm": 3.4199098988453667, "learning_rate": 6.279384312902737e-07, "loss": 0.1844, "step": 18909 }, { "epoch": 2.670149675232985, "grad_norm": 3.507976255156557, "learning_rate": 6.274068477610584e-07, "loss": 0.1616, "step": 18910 }, { "epoch": 2.670290878282971, "grad_norm": 3.0638138425407426, "learning_rate": 6.268754820468592e-07, "loss": 0.1501, "step": 18911 }, { "epoch": 2.6704320813329567, "grad_norm": 3.726844078670672, "learning_rate": 6.263443341600284e-07, "loss": 0.2212, "step": 18912 }, { "epoch": 2.6705732843829426, "grad_norm": 3.495076144501369, "learning_rate": 6.258134041129038e-07, "loss": 0.1681, "step": 18913 }, { "epoch": 2.6707144874329285, "grad_norm": 2.859601030178073, "learning_rate": 6.252826919178278e-07, "loss": 0.1017, "step": 18914 }, { "epoch": 2.6708556904829144, "grad_norm": 2.958710779853216, "learning_rate": 6.247521975871351e-07, "loss": 0.1203, "step": 18915 }, { "epoch": 2.6709968935329003, "grad_norm": 3.450494222246176, "learning_rate": 6.242219211331512e-07, "loss": 0.1262, "step": 18916 }, { "epoch": 2.671138096582886, "grad_norm": 2.8348834407432952, "learning_rate": 6.236918625682009e-07, "loss": 0.1256, "step": 18917 }, { "epoch": 2.671279299632872, "grad_norm": 3.917340651783546, "learning_rate": 6.23162021904603e-07, "loss": 0.162, "step": 18918 }, { "epoch": 2.671420502682858, "grad_norm": 3.0953671721171694, "learning_rate": 6.226323991546679e-07, "loss": 0.1184, "step": 18919 }, { "epoch": 2.671561705732844, "grad_norm": 3.2660482401010063, "learning_rate": 6.221029943307099e-07, "loss": 0.1477, "step": 18920 }, { "epoch": 2.6717029087828297, "grad_norm": 3.3218402058051466, "learning_rate": 6.215738074450262e-07, "loss": 0.1139, "step": 18921 }, { "epoch": 2.6718441118328156, "grad_norm": 3.527092936289585, "learning_rate": 6.210448385099177e-07, "loss": 0.1978, "step": 18922 }, { "epoch": 2.6719853148828014, "grad_norm": 3.0908141127489848, "learning_rate": 6.205160875376759e-07, "loss": 0.1721, "step": 18923 }, { "epoch": 2.6721265179327873, "grad_norm": 3.4263658638722485, "learning_rate": 6.199875545405898e-07, "loss": 0.1757, "step": 18924 }, { "epoch": 2.672267720982773, "grad_norm": 3.2084629600823895, "learning_rate": 6.194592395309407e-07, "loss": 0.1395, "step": 18925 }, { "epoch": 2.672408924032759, "grad_norm": 4.627261424051521, "learning_rate": 6.189311425210087e-07, "loss": 0.1576, "step": 18926 }, { "epoch": 2.672550127082745, "grad_norm": 3.118516505968711, "learning_rate": 6.184032635230663e-07, "loss": 0.1259, "step": 18927 }, { "epoch": 2.672691330132731, "grad_norm": 3.3912718765345695, "learning_rate": 6.178756025493804e-07, "loss": 0.1332, "step": 18928 }, { "epoch": 2.6728325331827167, "grad_norm": 3.2936049964320677, "learning_rate": 6.173481596122143e-07, "loss": 0.1588, "step": 18929 }, { "epoch": 2.6729737362327026, "grad_norm": 3.648172981175714, "learning_rate": 6.16820934723823e-07, "loss": 0.1796, "step": 18930 }, { "epoch": 2.6731149392826885, "grad_norm": 3.4585377684542125, "learning_rate": 6.162939278964608e-07, "loss": 0.1793, "step": 18931 }, { "epoch": 2.6732561423326744, "grad_norm": 4.521818504694328, "learning_rate": 6.157671391423769e-07, "loss": 0.2296, "step": 18932 }, { "epoch": 2.6733973453826603, "grad_norm": 2.8765289358070842, "learning_rate": 6.152405684738116e-07, "loss": 0.1324, "step": 18933 }, { "epoch": 2.673538548432646, "grad_norm": 3.4244397460049596, "learning_rate": 6.147142159030017e-07, "loss": 0.1305, "step": 18934 }, { "epoch": 2.673679751482632, "grad_norm": 3.0572803614255024, "learning_rate": 6.141880814421808e-07, "loss": 0.1493, "step": 18935 }, { "epoch": 2.673820954532618, "grad_norm": 2.6045432725954387, "learning_rate": 6.136621651035756e-07, "loss": 0.1275, "step": 18936 }, { "epoch": 2.673962157582604, "grad_norm": 3.338761615096184, "learning_rate": 6.131364668994078e-07, "loss": 0.1604, "step": 18937 }, { "epoch": 2.6741033606325897, "grad_norm": 3.075642710960734, "learning_rate": 6.126109868418951e-07, "loss": 0.148, "step": 18938 }, { "epoch": 2.6742445636825756, "grad_norm": 2.7881439513064366, "learning_rate": 6.120857249432477e-07, "loss": 0.1344, "step": 18939 }, { "epoch": 2.6743857667325615, "grad_norm": 3.323507131847104, "learning_rate": 6.115606812156749e-07, "loss": 0.1465, "step": 18940 }, { "epoch": 2.6745269697825473, "grad_norm": 3.1496412754899006, "learning_rate": 6.110358556713769e-07, "loss": 0.1212, "step": 18941 }, { "epoch": 2.674668172832533, "grad_norm": 2.831592151291494, "learning_rate": 6.105112483225495e-07, "loss": 0.1335, "step": 18942 }, { "epoch": 2.674809375882519, "grad_norm": 2.99634492273145, "learning_rate": 6.099868591813873e-07, "loss": 0.1189, "step": 18943 }, { "epoch": 2.674950578932505, "grad_norm": 3.9501741324685424, "learning_rate": 6.094626882600751e-07, "loss": 0.1907, "step": 18944 }, { "epoch": 2.675091781982491, "grad_norm": 3.424557956022425, "learning_rate": 6.089387355707943e-07, "loss": 0.1326, "step": 18945 }, { "epoch": 2.6752329850324768, "grad_norm": 3.178118038416846, "learning_rate": 6.084150011257239e-07, "loss": 0.1458, "step": 18946 }, { "epoch": 2.6753741880824626, "grad_norm": 2.442330491037359, "learning_rate": 6.078914849370288e-07, "loss": 0.1105, "step": 18947 }, { "epoch": 2.6755153911324485, "grad_norm": 2.9586857130900097, "learning_rate": 6.073681870168813e-07, "loss": 0.1142, "step": 18948 }, { "epoch": 2.6756565941824344, "grad_norm": 2.98235144871188, "learning_rate": 6.068451073774417e-07, "loss": 0.1694, "step": 18949 }, { "epoch": 2.6757977972324203, "grad_norm": 2.9756901967529474, "learning_rate": 6.063222460308649e-07, "loss": 0.1529, "step": 18950 }, { "epoch": 2.675939000282406, "grad_norm": 2.985309376206234, "learning_rate": 6.057996029893009e-07, "loss": 0.0944, "step": 18951 }, { "epoch": 2.676080203332392, "grad_norm": 3.6452997940352745, "learning_rate": 6.052771782648981e-07, "loss": 0.1917, "step": 18952 }, { "epoch": 2.676221406382378, "grad_norm": 3.3169868049026436, "learning_rate": 6.047549718697965e-07, "loss": 0.1241, "step": 18953 }, { "epoch": 2.676362609432364, "grad_norm": 3.09450034613588, "learning_rate": 6.04232983816132e-07, "loss": 0.1233, "step": 18954 }, { "epoch": 2.6765038124823497, "grad_norm": 3.4057274962761035, "learning_rate": 6.037112141160351e-07, "loss": 0.1705, "step": 18955 }, { "epoch": 2.6766450155323356, "grad_norm": 3.3998278069501264, "learning_rate": 6.031896627816314e-07, "loss": 0.1771, "step": 18956 }, { "epoch": 2.6767862185823215, "grad_norm": 3.9665534467310035, "learning_rate": 6.026683298250424e-07, "loss": 0.1883, "step": 18957 }, { "epoch": 2.6769274216323073, "grad_norm": 2.919377672701439, "learning_rate": 6.021472152583818e-07, "loss": 0.1261, "step": 18958 }, { "epoch": 2.6770686246822932, "grad_norm": 2.9017765491854224, "learning_rate": 6.01626319093761e-07, "loss": 0.1226, "step": 18959 }, { "epoch": 2.677209827732279, "grad_norm": 3.478671329326382, "learning_rate": 6.01105641343287e-07, "loss": 0.1577, "step": 18960 }, { "epoch": 2.677351030782265, "grad_norm": 4.406722954060124, "learning_rate": 6.005851820190578e-07, "loss": 0.2114, "step": 18961 }, { "epoch": 2.677492233832251, "grad_norm": 2.534073005374882, "learning_rate": 6.000649411331705e-07, "loss": 0.0784, "step": 18962 }, { "epoch": 2.6776334368822368, "grad_norm": 3.9342303093407307, "learning_rate": 5.995449186977164e-07, "loss": 0.1716, "step": 18963 }, { "epoch": 2.6777746399322226, "grad_norm": 3.8112865307673847, "learning_rate": 5.99025114724775e-07, "loss": 0.1614, "step": 18964 }, { "epoch": 2.6779158429822085, "grad_norm": 3.3592786745772716, "learning_rate": 5.985055292264308e-07, "loss": 0.1514, "step": 18965 }, { "epoch": 2.6780570460321944, "grad_norm": 2.2162224684191965, "learning_rate": 5.979861622147587e-07, "loss": 0.1031, "step": 18966 }, { "epoch": 2.6781982490821803, "grad_norm": 2.573171147229366, "learning_rate": 5.974670137018279e-07, "loss": 0.1248, "step": 18967 }, { "epoch": 2.678339452132166, "grad_norm": 2.773468450619444, "learning_rate": 5.969480836997032e-07, "loss": 0.1133, "step": 18968 }, { "epoch": 2.678480655182152, "grad_norm": 3.3618166237548857, "learning_rate": 5.96429372220444e-07, "loss": 0.1879, "step": 18969 }, { "epoch": 2.678621858232138, "grad_norm": 3.672083321203852, "learning_rate": 5.959108792761048e-07, "loss": 0.1615, "step": 18970 }, { "epoch": 2.678763061282124, "grad_norm": 2.884444891363575, "learning_rate": 5.953926048787361e-07, "loss": 0.1189, "step": 18971 }, { "epoch": 2.6789042643321097, "grad_norm": 2.6209565055653554, "learning_rate": 5.948745490403806e-07, "loss": 0.1275, "step": 18972 }, { "epoch": 2.6790454673820956, "grad_norm": 2.3840550587603446, "learning_rate": 5.943567117730797e-07, "loss": 0.116, "step": 18973 }, { "epoch": 2.6791866704320815, "grad_norm": 3.2331158277296153, "learning_rate": 5.938390930888671e-07, "loss": 0.1215, "step": 18974 }, { "epoch": 2.6793278734820674, "grad_norm": 2.7294215476185144, "learning_rate": 5.933216929997709e-07, "loss": 0.148, "step": 18975 }, { "epoch": 2.6794690765320532, "grad_norm": 2.73788899170489, "learning_rate": 5.92804511517815e-07, "loss": 0.1224, "step": 18976 }, { "epoch": 2.679610279582039, "grad_norm": 2.9687303832681424, "learning_rate": 5.922875486550206e-07, "loss": 0.1365, "step": 18977 }, { "epoch": 2.679751482632025, "grad_norm": 2.862470549006838, "learning_rate": 5.917708044234017e-07, "loss": 0.103, "step": 18978 }, { "epoch": 2.679892685682011, "grad_norm": 3.027712361093174, "learning_rate": 5.912542788349651e-07, "loss": 0.1015, "step": 18979 }, { "epoch": 2.6800338887319968, "grad_norm": 2.9296993236493596, "learning_rate": 5.907379719017181e-07, "loss": 0.1286, "step": 18980 }, { "epoch": 2.6801750917819827, "grad_norm": 2.986466967467467, "learning_rate": 5.902218836356543e-07, "loss": 0.1188, "step": 18981 }, { "epoch": 2.680316294831968, "grad_norm": 2.7195662842223696, "learning_rate": 5.897060140487709e-07, "loss": 0.1151, "step": 18982 }, { "epoch": 2.680457497881954, "grad_norm": 3.138543268353344, "learning_rate": 5.89190363153056e-07, "loss": 0.1454, "step": 18983 }, { "epoch": 2.68059870093194, "grad_norm": 3.2519711239472384, "learning_rate": 5.886749309604922e-07, "loss": 0.1348, "step": 18984 }, { "epoch": 2.6807399039819257, "grad_norm": 3.2365358663359314, "learning_rate": 5.8815971748306e-07, "loss": 0.1512, "step": 18985 }, { "epoch": 2.6808811070319116, "grad_norm": 3.1473212646039013, "learning_rate": 5.876447227327298e-07, "loss": 0.1518, "step": 18986 }, { "epoch": 2.6810223100818975, "grad_norm": 2.846596125400192, "learning_rate": 5.871299467214719e-07, "loss": 0.1437, "step": 18987 }, { "epoch": 2.6811635131318834, "grad_norm": 3.04775406038311, "learning_rate": 5.866153894612492e-07, "loss": 0.1266, "step": 18988 }, { "epoch": 2.6813047161818693, "grad_norm": 3.2110310908984667, "learning_rate": 5.861010509640197e-07, "loss": 0.147, "step": 18989 }, { "epoch": 2.681445919231855, "grad_norm": 2.8757317806946068, "learning_rate": 5.855869312417362e-07, "loss": 0.1364, "step": 18990 }, { "epoch": 2.681587122281841, "grad_norm": 2.7487249745635887, "learning_rate": 5.850730303063467e-07, "loss": 0.0932, "step": 18991 }, { "epoch": 2.681728325331827, "grad_norm": 2.6496738666761788, "learning_rate": 5.845593481697931e-07, "loss": 0.1097, "step": 18992 }, { "epoch": 2.681869528381813, "grad_norm": 2.8386188259177816, "learning_rate": 5.840458848440133e-07, "loss": 0.1172, "step": 18993 }, { "epoch": 2.6820107314317987, "grad_norm": 3.6080483375773045, "learning_rate": 5.835326403409414e-07, "loss": 0.1585, "step": 18994 }, { "epoch": 2.6821519344817846, "grad_norm": 2.6287751868177125, "learning_rate": 5.830196146725054e-07, "loss": 0.0791, "step": 18995 }, { "epoch": 2.6822931375317705, "grad_norm": 3.848219676871724, "learning_rate": 5.825068078506257e-07, "loss": 0.1706, "step": 18996 }, { "epoch": 2.6824343405817563, "grad_norm": 2.4340363925809245, "learning_rate": 5.819942198872231e-07, "loss": 0.1063, "step": 18997 }, { "epoch": 2.682575543631742, "grad_norm": 3.355244965771629, "learning_rate": 5.814818507942055e-07, "loss": 0.1478, "step": 18998 }, { "epoch": 2.682716746681728, "grad_norm": 4.1048796921205, "learning_rate": 5.809697005834803e-07, "loss": 0.191, "step": 18999 }, { "epoch": 2.682857949731714, "grad_norm": 2.974247169667982, "learning_rate": 5.804577692669533e-07, "loss": 0.1601, "step": 19000 }, { "epoch": 2.6829991527817, "grad_norm": 2.6784393263527098, "learning_rate": 5.799460568565207e-07, "loss": 0.1177, "step": 19001 }, { "epoch": 2.6831403558316858, "grad_norm": 3.0415582309252214, "learning_rate": 5.794345633640718e-07, "loss": 0.1169, "step": 19002 }, { "epoch": 2.6832815588816716, "grad_norm": 4.07905813492894, "learning_rate": 5.789232888014962e-07, "loss": 0.196, "step": 19003 }, { "epoch": 2.6834227619316575, "grad_norm": 2.8016702977154444, "learning_rate": 5.784122331806751e-07, "loss": 0.0967, "step": 19004 }, { "epoch": 2.6835639649816434, "grad_norm": 3.2098553429522503, "learning_rate": 5.779013965134839e-07, "loss": 0.1438, "step": 19005 }, { "epoch": 2.6837051680316293, "grad_norm": 3.6643995399044784, "learning_rate": 5.77390778811796e-07, "loss": 0.1305, "step": 19006 }, { "epoch": 2.683846371081615, "grad_norm": 4.249650115080932, "learning_rate": 5.768803800874767e-07, "loss": 0.2066, "step": 19007 }, { "epoch": 2.683987574131601, "grad_norm": 3.037628243586277, "learning_rate": 5.763702003523874e-07, "loss": 0.1383, "step": 19008 }, { "epoch": 2.684128777181587, "grad_norm": 3.743267011433064, "learning_rate": 5.758602396183854e-07, "loss": 0.1436, "step": 19009 }, { "epoch": 2.684269980231573, "grad_norm": 2.8516668177977365, "learning_rate": 5.753504978973212e-07, "loss": 0.1308, "step": 19010 }, { "epoch": 2.6844111832815587, "grad_norm": 3.669760151555839, "learning_rate": 5.748409752010397e-07, "loss": 0.1612, "step": 19011 }, { "epoch": 2.6845523863315446, "grad_norm": 3.27029870438877, "learning_rate": 5.743316715413849e-07, "loss": 0.1512, "step": 19012 }, { "epoch": 2.6846935893815305, "grad_norm": 4.1136707174027, "learning_rate": 5.738225869301927e-07, "loss": 0.1723, "step": 19013 }, { "epoch": 2.6848347924315163, "grad_norm": 3.6389164536851077, "learning_rate": 5.733137213792928e-07, "loss": 0.124, "step": 19014 }, { "epoch": 2.6849759954815022, "grad_norm": 3.0938070057594884, "learning_rate": 5.728050749005099e-07, "loss": 0.1461, "step": 19015 }, { "epoch": 2.685117198531488, "grad_norm": 3.0278155383327867, "learning_rate": 5.722966475056646e-07, "loss": 0.132, "step": 19016 }, { "epoch": 2.685258401581474, "grad_norm": 3.929198539438923, "learning_rate": 5.717884392065743e-07, "loss": 0.1569, "step": 19017 }, { "epoch": 2.68539960463146, "grad_norm": 3.887382047937539, "learning_rate": 5.712804500150493e-07, "loss": 0.1701, "step": 19018 }, { "epoch": 2.6855408076814458, "grad_norm": 3.6665968122624784, "learning_rate": 5.707726799428947e-07, "loss": 0.1876, "step": 19019 }, { "epoch": 2.6856820107314316, "grad_norm": 3.2752520589215948, "learning_rate": 5.702651290019112e-07, "loss": 0.1626, "step": 19020 }, { "epoch": 2.6858232137814175, "grad_norm": 3.2461303779296102, "learning_rate": 5.697577972038937e-07, "loss": 0.149, "step": 19021 }, { "epoch": 2.6859644168314034, "grad_norm": 3.4905942725797616, "learning_rate": 5.692506845606327e-07, "loss": 0.126, "step": 19022 }, { "epoch": 2.6861056198813893, "grad_norm": 2.963669569689377, "learning_rate": 5.687437910839121e-07, "loss": 0.128, "step": 19023 }, { "epoch": 2.686246822931375, "grad_norm": 3.7959105595318636, "learning_rate": 5.682371167855127e-07, "loss": 0.1507, "step": 19024 }, { "epoch": 2.686388025981361, "grad_norm": 2.932039549353003, "learning_rate": 5.677306616772105e-07, "loss": 0.1508, "step": 19025 }, { "epoch": 2.686529229031347, "grad_norm": 3.1275218451479736, "learning_rate": 5.672244257707738e-07, "loss": 0.1485, "step": 19026 }, { "epoch": 2.686670432081333, "grad_norm": 3.4260351997864427, "learning_rate": 5.667184090779676e-07, "loss": 0.1329, "step": 19027 }, { "epoch": 2.6868116351313187, "grad_norm": 3.9353186569413534, "learning_rate": 5.662126116105504e-07, "loss": 0.1969, "step": 19028 }, { "epoch": 2.6869528381813046, "grad_norm": 2.872269787861905, "learning_rate": 5.657070333802783e-07, "loss": 0.1283, "step": 19029 }, { "epoch": 2.6870940412312905, "grad_norm": 3.542729659227113, "learning_rate": 5.65201674398902e-07, "loss": 0.1558, "step": 19030 }, { "epoch": 2.6872352442812764, "grad_norm": 3.301196922868541, "learning_rate": 5.646965346781641e-07, "loss": 0.1341, "step": 19031 }, { "epoch": 2.6873764473312622, "grad_norm": 3.753113700529591, "learning_rate": 5.641916142298043e-07, "loss": 0.1927, "step": 19032 }, { "epoch": 2.687517650381248, "grad_norm": 3.477632207786333, "learning_rate": 5.636869130655531e-07, "loss": 0.1375, "step": 19033 }, { "epoch": 2.687658853431234, "grad_norm": 2.7970213054980313, "learning_rate": 5.631824311971456e-07, "loss": 0.1084, "step": 19034 }, { "epoch": 2.68780005648122, "grad_norm": 2.5093534412668848, "learning_rate": 5.626781686363025e-07, "loss": 0.1021, "step": 19035 }, { "epoch": 2.6879412595312058, "grad_norm": 3.801036696100907, "learning_rate": 5.621741253947432e-07, "loss": 0.1691, "step": 19036 }, { "epoch": 2.6880824625811917, "grad_norm": 2.8923023227594964, "learning_rate": 5.616703014841807e-07, "loss": 0.152, "step": 19037 }, { "epoch": 2.6882236656311775, "grad_norm": 3.6391323790108996, "learning_rate": 5.611666969163243e-07, "loss": 0.1714, "step": 19038 }, { "epoch": 2.6883648686811634, "grad_norm": 2.940965592436462, "learning_rate": 5.606633117028781e-07, "loss": 0.1426, "step": 19039 }, { "epoch": 2.6885060717311493, "grad_norm": 2.329067740750674, "learning_rate": 5.601601458555406e-07, "loss": 0.0806, "step": 19040 }, { "epoch": 2.688647274781135, "grad_norm": 3.7912875694316877, "learning_rate": 5.596571993860034e-07, "loss": 0.1571, "step": 19041 }, { "epoch": 2.688788477831121, "grad_norm": 2.798507332138452, "learning_rate": 5.591544723059561e-07, "loss": 0.1307, "step": 19042 }, { "epoch": 2.688929680881107, "grad_norm": 3.0614612932910497, "learning_rate": 5.586519646270827e-07, "loss": 0.1131, "step": 19043 }, { "epoch": 2.689070883931093, "grad_norm": 3.3620514072894, "learning_rate": 5.581496763610594e-07, "loss": 0.1567, "step": 19044 }, { "epoch": 2.6892120869810787, "grad_norm": 2.8883448866685164, "learning_rate": 5.57647607519558e-07, "loss": 0.1544, "step": 19045 }, { "epoch": 2.6893532900310646, "grad_norm": 2.364792159952339, "learning_rate": 5.571457581142514e-07, "loss": 0.1135, "step": 19046 }, { "epoch": 2.6894944930810505, "grad_norm": 4.003246247624127, "learning_rate": 5.566441281567981e-07, "loss": 0.1981, "step": 19047 }, { "epoch": 2.6896356961310364, "grad_norm": 3.785324535377466, "learning_rate": 5.561427176588586e-07, "loss": 0.1906, "step": 19048 }, { "epoch": 2.6897768991810223, "grad_norm": 3.5273708150632617, "learning_rate": 5.556415266320824e-07, "loss": 0.128, "step": 19049 }, { "epoch": 2.689918102231008, "grad_norm": 2.7854327723149113, "learning_rate": 5.551405550881173e-07, "loss": 0.1249, "step": 19050 }, { "epoch": 2.690059305280994, "grad_norm": 2.584919544705247, "learning_rate": 5.54639803038608e-07, "loss": 0.1531, "step": 19051 }, { "epoch": 2.69020050833098, "grad_norm": 4.8878967189745515, "learning_rate": 5.541392704951909e-07, "loss": 0.2216, "step": 19052 }, { "epoch": 2.690341711380966, "grad_norm": 3.4535369378998446, "learning_rate": 5.536389574694967e-07, "loss": 0.0977, "step": 19053 }, { "epoch": 2.6904829144309517, "grad_norm": 2.926660762367837, "learning_rate": 5.53138863973155e-07, "loss": 0.1168, "step": 19054 }, { "epoch": 2.6906241174809375, "grad_norm": 3.1953672848195276, "learning_rate": 5.526389900177854e-07, "loss": 0.1509, "step": 19055 }, { "epoch": 2.6907653205309234, "grad_norm": 3.2077296909392037, "learning_rate": 5.521393356150062e-07, "loss": 0.1353, "step": 19056 }, { "epoch": 2.6909065235809093, "grad_norm": 2.856436741511749, "learning_rate": 5.516399007764283e-07, "loss": 0.1147, "step": 19057 }, { "epoch": 2.691047726630895, "grad_norm": 2.8974034987419652, "learning_rate": 5.51140685513658e-07, "loss": 0.1254, "step": 19058 }, { "epoch": 2.691188929680881, "grad_norm": 3.351434309389812, "learning_rate": 5.506416898382982e-07, "loss": 0.1702, "step": 19059 }, { "epoch": 2.691330132730867, "grad_norm": 2.9459254896988294, "learning_rate": 5.501429137619452e-07, "loss": 0.1593, "step": 19060 }, { "epoch": 2.691471335780853, "grad_norm": 2.3664224772663354, "learning_rate": 5.496443572961896e-07, "loss": 0.1042, "step": 19061 }, { "epoch": 2.6916125388308387, "grad_norm": 3.4854904183769415, "learning_rate": 5.491460204526156e-07, "loss": 0.1471, "step": 19062 }, { "epoch": 2.6917537418808246, "grad_norm": 3.993988632101775, "learning_rate": 5.486479032428083e-07, "loss": 0.2047, "step": 19063 }, { "epoch": 2.6918949449308105, "grad_norm": 3.176737406259515, "learning_rate": 5.481500056783429e-07, "loss": 0.1465, "step": 19064 }, { "epoch": 2.6920361479807964, "grad_norm": 3.099314378762003, "learning_rate": 5.476523277707902e-07, "loss": 0.1302, "step": 19065 }, { "epoch": 2.6921773510307823, "grad_norm": 3.807212807544743, "learning_rate": 5.471548695317131e-07, "loss": 0.1769, "step": 19066 }, { "epoch": 2.692318554080768, "grad_norm": 3.2927473763259885, "learning_rate": 5.466576309726735e-07, "loss": 0.1868, "step": 19067 }, { "epoch": 2.692459757130754, "grad_norm": 3.1765422679691575, "learning_rate": 5.461606121052299e-07, "loss": 0.1339, "step": 19068 }, { "epoch": 2.69260096018074, "grad_norm": 3.201655287983891, "learning_rate": 5.456638129409308e-07, "loss": 0.1327, "step": 19069 }, { "epoch": 2.692742163230726, "grad_norm": 2.5159828852971255, "learning_rate": 5.451672334913216e-07, "loss": 0.1267, "step": 19070 }, { "epoch": 2.6928833662807117, "grad_norm": 2.676305694253513, "learning_rate": 5.446708737679418e-07, "loss": 0.1186, "step": 19071 }, { "epoch": 2.6930245693306976, "grad_norm": 3.628806429074437, "learning_rate": 5.441747337823289e-07, "loss": 0.1647, "step": 19072 }, { "epoch": 2.6931657723806834, "grad_norm": 2.6914377469511424, "learning_rate": 5.436788135460102e-07, "loss": 0.1225, "step": 19073 }, { "epoch": 2.6933069754306693, "grad_norm": 3.1206745870127905, "learning_rate": 5.431831130705123e-07, "loss": 0.1648, "step": 19074 }, { "epoch": 2.693448178480655, "grad_norm": 3.3849478209562895, "learning_rate": 5.426876323673558e-07, "loss": 0.1598, "step": 19075 }, { "epoch": 2.693589381530641, "grad_norm": 3.1361860004020223, "learning_rate": 5.421923714480537e-07, "loss": 0.1346, "step": 19076 }, { "epoch": 2.693730584580627, "grad_norm": 3.495847556919755, "learning_rate": 5.416973303241158e-07, "loss": 0.1581, "step": 19077 }, { "epoch": 2.693871787630613, "grad_norm": 2.9467534165705964, "learning_rate": 5.412025090070483e-07, "loss": 0.1199, "step": 19078 }, { "epoch": 2.6940129906805987, "grad_norm": 2.3519348593279514, "learning_rate": 5.407079075083476e-07, "loss": 0.1356, "step": 19079 }, { "epoch": 2.6941541937305846, "grad_norm": 2.764203795868386, "learning_rate": 5.402135258395114e-07, "loss": 0.0793, "step": 19080 }, { "epoch": 2.6942953967805705, "grad_norm": 2.8703837246343684, "learning_rate": 5.397193640120291e-07, "loss": 0.1297, "step": 19081 }, { "epoch": 2.6944365998305564, "grad_norm": 3.011323806329975, "learning_rate": 5.392254220373816e-07, "loss": 0.1389, "step": 19082 }, { "epoch": 2.6945778028805423, "grad_norm": 3.8588163367263264, "learning_rate": 5.387316999270487e-07, "loss": 0.2012, "step": 19083 }, { "epoch": 2.694719005930528, "grad_norm": 2.6934225142624943, "learning_rate": 5.382381976925044e-07, "loss": 0.1266, "step": 19084 }, { "epoch": 2.694860208980514, "grad_norm": 2.683012773383494, "learning_rate": 5.377449153452196e-07, "loss": 0.1201, "step": 19085 }, { "epoch": 2.6950014120305, "grad_norm": 2.3597152758200584, "learning_rate": 5.372518528966575e-07, "loss": 0.1033, "step": 19086 }, { "epoch": 2.695142615080486, "grad_norm": 2.749032819719462, "learning_rate": 5.367590103582742e-07, "loss": 0.1105, "step": 19087 }, { "epoch": 2.6952838181304717, "grad_norm": 2.9806080585773342, "learning_rate": 5.362663877415252e-07, "loss": 0.1537, "step": 19088 }, { "epoch": 2.6954250211804576, "grad_norm": 3.4066412878261123, "learning_rate": 5.357739850578581e-07, "loss": 0.1627, "step": 19089 }, { "epoch": 2.6955662242304435, "grad_norm": 2.8726060734438867, "learning_rate": 5.352818023187167e-07, "loss": 0.1726, "step": 19090 }, { "epoch": 2.6957074272804293, "grad_norm": 3.2129525936777292, "learning_rate": 5.347898395355388e-07, "loss": 0.1518, "step": 19091 }, { "epoch": 2.695848630330415, "grad_norm": 3.342875459569788, "learning_rate": 5.342980967197564e-07, "loss": 0.1665, "step": 19092 }, { "epoch": 2.695989833380401, "grad_norm": 3.7244511142724295, "learning_rate": 5.338065738827991e-07, "loss": 0.1633, "step": 19093 }, { "epoch": 2.696131036430387, "grad_norm": 3.4881894970801177, "learning_rate": 5.33315271036089e-07, "loss": 0.1729, "step": 19094 }, { "epoch": 2.696272239480373, "grad_norm": 3.1755292926553156, "learning_rate": 5.328241881910434e-07, "loss": 0.1506, "step": 19095 }, { "epoch": 2.6964134425303588, "grad_norm": 3.2129004908709, "learning_rate": 5.323333253590734e-07, "loss": 0.1477, "step": 19096 }, { "epoch": 2.6965546455803446, "grad_norm": 2.9758710882694643, "learning_rate": 5.318426825515898e-07, "loss": 0.1357, "step": 19097 }, { "epoch": 2.6966958486303305, "grad_norm": 2.9957215924801965, "learning_rate": 5.313522597799947e-07, "loss": 0.1323, "step": 19098 }, { "epoch": 2.6968370516803164, "grad_norm": 3.5747130631605675, "learning_rate": 5.308620570556833e-07, "loss": 0.2152, "step": 19099 }, { "epoch": 2.6969782547303023, "grad_norm": 3.6043938478452793, "learning_rate": 5.303720743900475e-07, "loss": 0.1226, "step": 19100 }, { "epoch": 2.697119457780288, "grad_norm": 3.4154618436744704, "learning_rate": 5.298823117944752e-07, "loss": 0.1671, "step": 19101 }, { "epoch": 2.697260660830274, "grad_norm": 3.0653670978270506, "learning_rate": 5.293927692803458e-07, "loss": 0.1568, "step": 19102 }, { "epoch": 2.69740186388026, "grad_norm": 2.825492856742143, "learning_rate": 5.289034468590404e-07, "loss": 0.1188, "step": 19103 }, { "epoch": 2.697543066930246, "grad_norm": 2.7985941607173266, "learning_rate": 5.284143445419288e-07, "loss": 0.1266, "step": 19104 }, { "epoch": 2.6976842699802317, "grad_norm": 3.587516550529802, "learning_rate": 5.279254623403773e-07, "loss": 0.1581, "step": 19105 }, { "epoch": 2.6978254730302176, "grad_norm": 3.2589486563636005, "learning_rate": 5.274368002657482e-07, "loss": 0.1549, "step": 19106 }, { "epoch": 2.6979666760802035, "grad_norm": 3.366952863050015, "learning_rate": 5.269483583293966e-07, "loss": 0.1394, "step": 19107 }, { "epoch": 2.6981078791301893, "grad_norm": 3.1120548424397727, "learning_rate": 5.264601365426736e-07, "loss": 0.1522, "step": 19108 }, { "epoch": 2.6982490821801752, "grad_norm": 3.3326655369205977, "learning_rate": 5.259721349169256e-07, "loss": 0.1447, "step": 19109 }, { "epoch": 2.698390285230161, "grad_norm": 2.7084479490214677, "learning_rate": 5.254843534634934e-07, "loss": 0.1074, "step": 19110 }, { "epoch": 2.698531488280147, "grad_norm": 2.6876121202472034, "learning_rate": 5.249967921937137e-07, "loss": 0.1235, "step": 19111 }, { "epoch": 2.698672691330133, "grad_norm": 2.846348170412563, "learning_rate": 5.245094511189163e-07, "loss": 0.1568, "step": 19112 }, { "epoch": 2.6988138943801188, "grad_norm": 2.4340867000945052, "learning_rate": 5.240223302504277e-07, "loss": 0.1026, "step": 19113 }, { "epoch": 2.6989550974301046, "grad_norm": 3.119337968242527, "learning_rate": 5.235354295995665e-07, "loss": 0.1306, "step": 19114 }, { "epoch": 2.6990963004800905, "grad_norm": 3.132767127765841, "learning_rate": 5.230487491776514e-07, "loss": 0.1375, "step": 19115 }, { "epoch": 2.6992375035300764, "grad_norm": 3.27569020704532, "learning_rate": 5.225622889959892e-07, "loss": 0.181, "step": 19116 }, { "epoch": 2.6993787065800623, "grad_norm": 2.9725271297245577, "learning_rate": 5.220760490658872e-07, "loss": 0.1238, "step": 19117 }, { "epoch": 2.699519909630048, "grad_norm": 3.152266444593001, "learning_rate": 5.215900293986431e-07, "loss": 0.1408, "step": 19118 }, { "epoch": 2.699661112680034, "grad_norm": 3.404397665100183, "learning_rate": 5.211042300055535e-07, "loss": 0.1667, "step": 19119 }, { "epoch": 2.69980231573002, "grad_norm": 2.8045646448817525, "learning_rate": 5.206186508979083e-07, "loss": 0.1217, "step": 19120 }, { "epoch": 2.699943518780006, "grad_norm": 3.613183855529652, "learning_rate": 5.201332920869928e-07, "loss": 0.1357, "step": 19121 }, { "epoch": 2.7000847218299917, "grad_norm": 3.8244422388171935, "learning_rate": 5.196481535840847e-07, "loss": 0.1547, "step": 19122 }, { "epoch": 2.7002259248799776, "grad_norm": 2.930325720451192, "learning_rate": 5.191632354004595e-07, "loss": 0.1159, "step": 19123 }, { "epoch": 2.7003671279299635, "grad_norm": 2.9024432548467676, "learning_rate": 5.186785375473869e-07, "loss": 0.1549, "step": 19124 }, { "epoch": 2.7005083309799494, "grad_norm": 2.9512865250024176, "learning_rate": 5.18194060036129e-07, "loss": 0.1372, "step": 19125 }, { "epoch": 2.7006495340299352, "grad_norm": 4.075332340107143, "learning_rate": 5.17709802877947e-07, "loss": 0.1957, "step": 19126 }, { "epoch": 2.700790737079921, "grad_norm": 3.6335445279620977, "learning_rate": 5.172257660840951e-07, "loss": 0.151, "step": 19127 }, { "epoch": 2.700931940129907, "grad_norm": 3.143482016645844, "learning_rate": 5.1674194966582e-07, "loss": 0.1148, "step": 19128 }, { "epoch": 2.701073143179893, "grad_norm": 3.4514544439328985, "learning_rate": 5.162583536343668e-07, "loss": 0.1445, "step": 19129 }, { "epoch": 2.7012143462298788, "grad_norm": 3.7648446873436696, "learning_rate": 5.157749780009735e-07, "loss": 0.1514, "step": 19130 }, { "epoch": 2.7013555492798647, "grad_norm": 3.425156825667327, "learning_rate": 5.152918227768722e-07, "loss": 0.1663, "step": 19131 }, { "epoch": 2.7014967523298505, "grad_norm": 3.5964489354492652, "learning_rate": 5.14808887973296e-07, "loss": 0.186, "step": 19132 }, { "epoch": 2.7016379553798364, "grad_norm": 3.105044089785203, "learning_rate": 5.143261736014638e-07, "loss": 0.1392, "step": 19133 }, { "epoch": 2.7017791584298223, "grad_norm": 3.0233078225139796, "learning_rate": 5.138436796725942e-07, "loss": 0.1507, "step": 19134 }, { "epoch": 2.701920361479808, "grad_norm": 3.2831338007440523, "learning_rate": 5.133614061979009e-07, "loss": 0.1516, "step": 19135 }, { "epoch": 2.702061564529794, "grad_norm": 3.076370467422102, "learning_rate": 5.1287935318859e-07, "loss": 0.1446, "step": 19136 }, { "epoch": 2.70220276757978, "grad_norm": 3.022253648097624, "learning_rate": 5.123975206558673e-07, "loss": 0.1307, "step": 19137 }, { "epoch": 2.702343970629766, "grad_norm": 3.235807656540043, "learning_rate": 5.119159086109293e-07, "loss": 0.1475, "step": 19138 }, { "epoch": 2.7024851736797517, "grad_norm": 3.650282487818748, "learning_rate": 5.11434517064967e-07, "loss": 0.1471, "step": 19139 }, { "epoch": 2.7026263767297376, "grad_norm": 3.5715892629243218, "learning_rate": 5.109533460291694e-07, "loss": 0.134, "step": 19140 }, { "epoch": 2.7027675797797235, "grad_norm": 3.0856119858027458, "learning_rate": 5.104723955147184e-07, "loss": 0.1083, "step": 19141 }, { "epoch": 2.7029087828297094, "grad_norm": 3.499097674482075, "learning_rate": 5.099916655327907e-07, "loss": 0.1455, "step": 19142 }, { "epoch": 2.7030499858796952, "grad_norm": 2.9478391757589075, "learning_rate": 5.095111560945575e-07, "loss": 0.1383, "step": 19143 }, { "epoch": 2.703191188929681, "grad_norm": 3.4206204930941544, "learning_rate": 5.090308672111866e-07, "loss": 0.1455, "step": 19144 }, { "epoch": 2.703332391979667, "grad_norm": 2.370123077117807, "learning_rate": 5.085507988938398e-07, "loss": 0.1095, "step": 19145 }, { "epoch": 2.703473595029653, "grad_norm": 3.8179380296983436, "learning_rate": 5.08070951153673e-07, "loss": 0.2021, "step": 19146 }, { "epoch": 2.703614798079639, "grad_norm": 3.7506062555578974, "learning_rate": 5.075913240018382e-07, "loss": 0.225, "step": 19147 }, { "epoch": 2.7037560011296247, "grad_norm": 3.1041313820730423, "learning_rate": 5.07111917449481e-07, "loss": 0.1353, "step": 19148 }, { "epoch": 2.7038972041796105, "grad_norm": 2.62418338088162, "learning_rate": 5.066327315077446e-07, "loss": 0.113, "step": 19149 }, { "epoch": 2.7040384072295964, "grad_norm": 3.5242467536730233, "learning_rate": 5.061537661877636e-07, "loss": 0.1635, "step": 19150 }, { "epoch": 2.7041796102795823, "grad_norm": 3.3973290248577572, "learning_rate": 5.056750215006678e-07, "loss": 0.1486, "step": 19151 }, { "epoch": 2.704320813329568, "grad_norm": 2.8452539593671244, "learning_rate": 5.051964974575851e-07, "loss": 0.1573, "step": 19152 }, { "epoch": 2.7044620163795536, "grad_norm": 3.3145380476691897, "learning_rate": 5.047181940696333e-07, "loss": 0.1309, "step": 19153 }, { "epoch": 2.7046032194295395, "grad_norm": 2.8988189842057275, "learning_rate": 5.042401113479312e-07, "loss": 0.1399, "step": 19154 }, { "epoch": 2.7047444224795254, "grad_norm": 3.308795906612785, "learning_rate": 5.037622493035888e-07, "loss": 0.1594, "step": 19155 }, { "epoch": 2.7048856255295113, "grad_norm": 2.9664548304323515, "learning_rate": 5.032846079477105e-07, "loss": 0.1215, "step": 19156 }, { "epoch": 2.705026828579497, "grad_norm": 2.974756048972944, "learning_rate": 5.028071872913953e-07, "loss": 0.1521, "step": 19157 }, { "epoch": 2.705168031629483, "grad_norm": 2.569499971666096, "learning_rate": 5.02329987345741e-07, "loss": 0.1159, "step": 19158 }, { "epoch": 2.705309234679469, "grad_norm": 3.4365466061319965, "learning_rate": 5.018530081218353e-07, "loss": 0.1521, "step": 19159 }, { "epoch": 2.705450437729455, "grad_norm": 2.430354210954474, "learning_rate": 5.01376249630764e-07, "loss": 0.1069, "step": 19160 }, { "epoch": 2.7055916407794407, "grad_norm": 3.2108701681068226, "learning_rate": 5.008997118836067e-07, "loss": 0.1319, "step": 19161 }, { "epoch": 2.7057328438294266, "grad_norm": 3.177527287010105, "learning_rate": 5.004233948914383e-07, "loss": 0.1591, "step": 19162 }, { "epoch": 2.7058740468794125, "grad_norm": 3.185614153356821, "learning_rate": 4.999472986653264e-07, "loss": 0.1235, "step": 19163 }, { "epoch": 2.7060152499293983, "grad_norm": 3.32487888739914, "learning_rate": 4.994714232163378e-07, "loss": 0.1291, "step": 19164 }, { "epoch": 2.7061564529793842, "grad_norm": 3.0178970591947722, "learning_rate": 4.98995768555528e-07, "loss": 0.1111, "step": 19165 }, { "epoch": 2.70629765602937, "grad_norm": 2.9313414052942095, "learning_rate": 4.98520334693956e-07, "loss": 0.1288, "step": 19166 }, { "epoch": 2.706438859079356, "grad_norm": 2.8835610879835945, "learning_rate": 4.980451216426674e-07, "loss": 0.1203, "step": 19167 }, { "epoch": 2.706580062129342, "grad_norm": 3.1313529663020634, "learning_rate": 4.975701294127067e-07, "loss": 0.1806, "step": 19168 }, { "epoch": 2.7067212651793278, "grad_norm": 3.734462058301065, "learning_rate": 4.970953580151117e-07, "loss": 0.1645, "step": 19169 }, { "epoch": 2.7068624682293136, "grad_norm": 3.4066574993368524, "learning_rate": 4.966208074609158e-07, "loss": 0.157, "step": 19170 }, { "epoch": 2.7070036712792995, "grad_norm": 3.641324660665661, "learning_rate": 4.961464777611491e-07, "loss": 0.1643, "step": 19171 }, { "epoch": 2.7071448743292854, "grad_norm": 3.2046531039605837, "learning_rate": 4.956723689268339e-07, "loss": 0.1337, "step": 19172 }, { "epoch": 2.7072860773792713, "grad_norm": 2.3522578498000537, "learning_rate": 4.95198480968988e-07, "loss": 0.077, "step": 19173 }, { "epoch": 2.707427280429257, "grad_norm": 3.254443853910979, "learning_rate": 4.947248138986249e-07, "loss": 0.1765, "step": 19174 }, { "epoch": 2.707568483479243, "grad_norm": 2.189146991900782, "learning_rate": 4.942513677267524e-07, "loss": 0.103, "step": 19175 }, { "epoch": 2.707709686529229, "grad_norm": 3.2603533055469507, "learning_rate": 4.937781424643728e-07, "loss": 0.1717, "step": 19176 }, { "epoch": 2.707850889579215, "grad_norm": 2.8274225869714793, "learning_rate": 4.933051381224829e-07, "loss": 0.1429, "step": 19177 }, { "epoch": 2.7079920926292007, "grad_norm": 3.6245000969569916, "learning_rate": 4.928323547120772e-07, "loss": 0.202, "step": 19178 }, { "epoch": 2.7081332956791866, "grad_norm": 3.9004067288020448, "learning_rate": 4.923597922441415e-07, "loss": 0.1647, "step": 19179 }, { "epoch": 2.7082744987291725, "grad_norm": 3.0567238458083, "learning_rate": 4.918874507296578e-07, "loss": 0.1293, "step": 19180 }, { "epoch": 2.7084157017791584, "grad_norm": 3.4772722979544772, "learning_rate": 4.914153301796032e-07, "loss": 0.1626, "step": 19181 }, { "epoch": 2.7085569048291442, "grad_norm": 3.069978103363989, "learning_rate": 4.909434306049487e-07, "loss": 0.1485, "step": 19182 }, { "epoch": 2.70869810787913, "grad_norm": 2.6180322063661925, "learning_rate": 4.904717520166657e-07, "loss": 0.1359, "step": 19183 }, { "epoch": 2.708839310929116, "grad_norm": 3.0996338138837674, "learning_rate": 4.900002944257098e-07, "loss": 0.1208, "step": 19184 }, { "epoch": 2.708980513979102, "grad_norm": 3.02052736736637, "learning_rate": 4.895290578430412e-07, "loss": 0.1335, "step": 19185 }, { "epoch": 2.7091217170290878, "grad_norm": 2.739568707729761, "learning_rate": 4.890580422796087e-07, "loss": 0.1286, "step": 19186 }, { "epoch": 2.7092629200790737, "grad_norm": 3.5795781801879762, "learning_rate": 4.885872477463594e-07, "loss": 0.1516, "step": 19187 }, { "epoch": 2.7094041231290595, "grad_norm": 2.5464340554107383, "learning_rate": 4.881166742542365e-07, "loss": 0.1107, "step": 19188 }, { "epoch": 2.7095453261790454, "grad_norm": 3.220747071286372, "learning_rate": 4.876463218141736e-07, "loss": 0.1389, "step": 19189 }, { "epoch": 2.7096865292290313, "grad_norm": 3.296652425655922, "learning_rate": 4.871761904371019e-07, "loss": 0.1708, "step": 19190 }, { "epoch": 2.709827732279017, "grad_norm": 2.9485872184523787, "learning_rate": 4.867062801339484e-07, "loss": 0.1483, "step": 19191 }, { "epoch": 2.709968935329003, "grad_norm": 3.0661346906058955, "learning_rate": 4.86236590915633e-07, "loss": 0.1344, "step": 19192 }, { "epoch": 2.710110138378989, "grad_norm": 3.70543734035251, "learning_rate": 4.857671227930671e-07, "loss": 0.1414, "step": 19193 }, { "epoch": 2.710251341428975, "grad_norm": 2.914800950411566, "learning_rate": 4.852978757771664e-07, "loss": 0.1192, "step": 19194 }, { "epoch": 2.7103925444789607, "grad_norm": 3.4023212314245797, "learning_rate": 4.848288498788345e-07, "loss": 0.1848, "step": 19195 }, { "epoch": 2.7105337475289466, "grad_norm": 3.225212466985664, "learning_rate": 4.843600451089702e-07, "loss": 0.1566, "step": 19196 }, { "epoch": 2.7106749505789325, "grad_norm": 3.6365806184782428, "learning_rate": 4.838914614784695e-07, "loss": 0.1369, "step": 19197 }, { "epoch": 2.7108161536289184, "grad_norm": 2.90698030058396, "learning_rate": 4.834230989982214e-07, "loss": 0.1015, "step": 19198 }, { "epoch": 2.7109573566789043, "grad_norm": 1.8324210277064046, "learning_rate": 4.829549576791092e-07, "loss": 0.0875, "step": 19199 }, { "epoch": 2.71109855972889, "grad_norm": 3.0410464218075393, "learning_rate": 4.824870375320156e-07, "loss": 0.1476, "step": 19200 }, { "epoch": 2.711239762778876, "grad_norm": 3.089854737965832, "learning_rate": 4.820193385678129e-07, "loss": 0.1395, "step": 19201 }, { "epoch": 2.711380965828862, "grad_norm": 2.7063104452208417, "learning_rate": 4.81551860797369e-07, "loss": 0.1306, "step": 19202 }, { "epoch": 2.711522168878848, "grad_norm": 2.915578363650722, "learning_rate": 4.810846042315498e-07, "loss": 0.1487, "step": 19203 }, { "epoch": 2.7116633719288337, "grad_norm": 2.476024734621655, "learning_rate": 4.806175688812142e-07, "loss": 0.0886, "step": 19204 }, { "epoch": 2.7118045749788195, "grad_norm": 3.7479816491117934, "learning_rate": 4.801507547572126e-07, "loss": 0.1706, "step": 19205 }, { "epoch": 2.7119457780288054, "grad_norm": 3.6498926957204505, "learning_rate": 4.796841618703984e-07, "loss": 0.1569, "step": 19206 }, { "epoch": 2.7120869810787913, "grad_norm": 3.220599714716518, "learning_rate": 4.79217790231612e-07, "loss": 0.1497, "step": 19207 }, { "epoch": 2.712228184128777, "grad_norm": 2.92189804423502, "learning_rate": 4.787516398516934e-07, "loss": 0.1108, "step": 19208 }, { "epoch": 2.712369387178763, "grad_norm": 3.2946555351901767, "learning_rate": 4.782857107414752e-07, "loss": 0.1695, "step": 19209 }, { "epoch": 2.712510590228749, "grad_norm": 2.6499037061812802, "learning_rate": 4.77820002911783e-07, "loss": 0.1319, "step": 19210 }, { "epoch": 2.712651793278735, "grad_norm": 3.2849126754899234, "learning_rate": 4.773545163734416e-07, "loss": 0.1072, "step": 19211 }, { "epoch": 2.7127929963287207, "grad_norm": 4.143036197127106, "learning_rate": 4.768892511372703e-07, "loss": 0.1962, "step": 19212 }, { "epoch": 2.7129341993787066, "grad_norm": 3.098026916731239, "learning_rate": 4.76424207214079e-07, "loss": 0.1289, "step": 19213 }, { "epoch": 2.7130754024286925, "grad_norm": 3.7238058060850223, "learning_rate": 4.7595938461467706e-07, "loss": 0.1518, "step": 19214 }, { "epoch": 2.7132166054786784, "grad_norm": 3.1278462193718553, "learning_rate": 4.7549478334986576e-07, "loss": 0.1209, "step": 19215 }, { "epoch": 2.7133578085286643, "grad_norm": 3.713035725374499, "learning_rate": 4.7503040343044205e-07, "loss": 0.1716, "step": 19216 }, { "epoch": 2.71349901157865, "grad_norm": 3.8532281829076593, "learning_rate": 4.745662448671984e-07, "loss": 0.2078, "step": 19217 }, { "epoch": 2.713640214628636, "grad_norm": 2.91283348184974, "learning_rate": 4.741023076709217e-07, "loss": 0.1093, "step": 19218 }, { "epoch": 2.713781417678622, "grad_norm": 3.0852114428496953, "learning_rate": 4.7363859185239336e-07, "loss": 0.1432, "step": 19219 }, { "epoch": 2.713922620728608, "grad_norm": 4.121960971125105, "learning_rate": 4.731750974223892e-07, "loss": 0.1724, "step": 19220 }, { "epoch": 2.7140638237785937, "grad_norm": 4.268687106148387, "learning_rate": 4.7271182439168286e-07, "loss": 0.1726, "step": 19221 }, { "epoch": 2.7142050268285796, "grad_norm": 2.7477110199302484, "learning_rate": 4.7224877277103673e-07, "loss": 0.1299, "step": 19222 }, { "epoch": 2.7143462298785654, "grad_norm": 2.634416807993679, "learning_rate": 4.717859425712168e-07, "loss": 0.1294, "step": 19223 }, { "epoch": 2.7144874329285513, "grad_norm": 2.8739496667680546, "learning_rate": 4.7132333380297546e-07, "loss": 0.1282, "step": 19224 }, { "epoch": 2.714628635978537, "grad_norm": 2.8780627686775677, "learning_rate": 4.708609464770653e-07, "loss": 0.1413, "step": 19225 }, { "epoch": 2.714769839028523, "grad_norm": 2.7906808707204336, "learning_rate": 4.703987806042332e-07, "loss": 0.1281, "step": 19226 }, { "epoch": 2.714911042078509, "grad_norm": 4.39402470441956, "learning_rate": 4.6993683619521393e-07, "loss": 0.2436, "step": 19227 }, { "epoch": 2.715052245128495, "grad_norm": 3.4233360878829093, "learning_rate": 4.6947511326074893e-07, "loss": 0.1382, "step": 19228 }, { "epoch": 2.7151934481784807, "grad_norm": 2.4282727195683576, "learning_rate": 4.6901361181156737e-07, "loss": 0.0809, "step": 19229 }, { "epoch": 2.7153346512284666, "grad_norm": 3.2492857985860732, "learning_rate": 4.6855233185839175e-07, "loss": 0.1347, "step": 19230 }, { "epoch": 2.7154758542784525, "grad_norm": 3.0697059400859006, "learning_rate": 4.680912734119447e-07, "loss": 0.1522, "step": 19231 }, { "epoch": 2.7156170573284384, "grad_norm": 3.863461679627103, "learning_rate": 4.676304364829398e-07, "loss": 0.138, "step": 19232 }, { "epoch": 2.7157582603784243, "grad_norm": 2.5785026017418318, "learning_rate": 4.671698210820863e-07, "loss": 0.1242, "step": 19233 }, { "epoch": 2.71589946342841, "grad_norm": 2.9054022144311618, "learning_rate": 4.6670942722009004e-07, "loss": 0.1399, "step": 19234 }, { "epoch": 2.716040666478396, "grad_norm": 3.002043387922445, "learning_rate": 4.6624925490764914e-07, "loss": 0.1459, "step": 19235 }, { "epoch": 2.716181869528382, "grad_norm": 2.624766126396182, "learning_rate": 4.657893041554584e-07, "loss": 0.0913, "step": 19236 }, { "epoch": 2.716323072578368, "grad_norm": 3.214429830232471, "learning_rate": 4.6532957497420593e-07, "loss": 0.1333, "step": 19237 }, { "epoch": 2.7164642756283537, "grad_norm": 3.1289717506881636, "learning_rate": 4.6487006737457765e-07, "loss": 0.1338, "step": 19238 }, { "epoch": 2.7166054786783396, "grad_norm": 4.104688588330098, "learning_rate": 4.644107813672483e-07, "loss": 0.1686, "step": 19239 }, { "epoch": 2.7167466817283255, "grad_norm": 4.04194819287088, "learning_rate": 4.639517169628971e-07, "loss": 0.1883, "step": 19240 }, { "epoch": 2.7168878847783113, "grad_norm": 3.602147730580308, "learning_rate": 4.634928741721889e-07, "loss": 0.1691, "step": 19241 }, { "epoch": 2.717029087828297, "grad_norm": 4.482372626534816, "learning_rate": 4.6303425300578964e-07, "loss": 0.1498, "step": 19242 }, { "epoch": 2.717170290878283, "grad_norm": 2.998464151417447, "learning_rate": 4.6257585347435406e-07, "loss": 0.1153, "step": 19243 }, { "epoch": 2.717311493928269, "grad_norm": 3.480652850696283, "learning_rate": 4.6211767558853484e-07, "loss": 0.1409, "step": 19244 }, { "epoch": 2.717452696978255, "grad_norm": 2.77088736631154, "learning_rate": 4.6165971935898337e-07, "loss": 0.1124, "step": 19245 }, { "epoch": 2.7175939000282407, "grad_norm": 3.1586981849033995, "learning_rate": 4.6120198479634117e-07, "loss": 0.1406, "step": 19246 }, { "epoch": 2.7177351030782266, "grad_norm": 3.660098883821135, "learning_rate": 4.607444719112453e-07, "loss": 0.1186, "step": 19247 }, { "epoch": 2.7178763061282125, "grad_norm": 3.225571851719596, "learning_rate": 4.6028718071432834e-07, "loss": 0.1445, "step": 19248 }, { "epoch": 2.7180175091781984, "grad_norm": 3.010212924523239, "learning_rate": 4.598301112162162e-07, "loss": 0.1194, "step": 19249 }, { "epoch": 2.7181587122281843, "grad_norm": 3.4034043376127223, "learning_rate": 4.5937326342753384e-07, "loss": 0.1379, "step": 19250 }, { "epoch": 2.71829991527817, "grad_norm": 3.127890448934661, "learning_rate": 4.58916637358896e-07, "loss": 0.1523, "step": 19251 }, { "epoch": 2.718441118328156, "grad_norm": 3.5667772946219722, "learning_rate": 4.5846023302091424e-07, "loss": 0.1591, "step": 19252 }, { "epoch": 2.718582321378142, "grad_norm": 3.2972223958039133, "learning_rate": 4.580040504241967e-07, "loss": 0.1462, "step": 19253 }, { "epoch": 2.718723524428128, "grad_norm": 3.8714292468998965, "learning_rate": 4.575480895793438e-07, "loss": 0.1839, "step": 19254 }, { "epoch": 2.7188647274781133, "grad_norm": 3.483074740719657, "learning_rate": 4.5709235049695267e-07, "loss": 0.1598, "step": 19255 }, { "epoch": 2.719005930528099, "grad_norm": 3.464113272168803, "learning_rate": 4.5663683318761255e-07, "loss": 0.1786, "step": 19256 }, { "epoch": 2.719147133578085, "grad_norm": 2.9733374354105955, "learning_rate": 4.5618153766191275e-07, "loss": 0.1248, "step": 19257 }, { "epoch": 2.719288336628071, "grad_norm": 3.5228203704000043, "learning_rate": 4.557264639304315e-07, "loss": 0.1623, "step": 19258 }, { "epoch": 2.719429539678057, "grad_norm": 2.91906396992724, "learning_rate": 4.55271612003747e-07, "loss": 0.102, "step": 19259 }, { "epoch": 2.7195707427280427, "grad_norm": 2.4507272292406324, "learning_rate": 4.548169818924275e-07, "loss": 0.1087, "step": 19260 }, { "epoch": 2.7197119457780286, "grad_norm": 3.448222907812001, "learning_rate": 4.543625736070367e-07, "loss": 0.1585, "step": 19261 }, { "epoch": 2.7198531488280144, "grad_norm": 3.5149674755116744, "learning_rate": 4.5390838715813956e-07, "loss": 0.1707, "step": 19262 }, { "epoch": 2.7199943518780003, "grad_norm": 3.5544207112155273, "learning_rate": 4.534544225562876e-07, "loss": 0.1381, "step": 19263 }, { "epoch": 2.720135554927986, "grad_norm": 3.3522076215146823, "learning_rate": 4.5300067981203346e-07, "loss": 0.1393, "step": 19264 }, { "epoch": 2.720276757977972, "grad_norm": 3.380881757760855, "learning_rate": 4.525471589359198e-07, "loss": 0.1576, "step": 19265 }, { "epoch": 2.720417961027958, "grad_norm": 4.115792652030727, "learning_rate": 4.520938599384872e-07, "loss": 0.1875, "step": 19266 }, { "epoch": 2.720559164077944, "grad_norm": 3.3593118143094904, "learning_rate": 4.5164078283026934e-07, "loss": 0.171, "step": 19267 }, { "epoch": 2.7207003671279297, "grad_norm": 3.172074869477012, "learning_rate": 4.511879276217967e-07, "loss": 0.134, "step": 19268 }, { "epoch": 2.7208415701779156, "grad_norm": 4.066777026516414, "learning_rate": 4.507352943235921e-07, "loss": 0.1759, "step": 19269 }, { "epoch": 2.7209827732279015, "grad_norm": 2.871895412107255, "learning_rate": 4.5028288294617583e-07, "loss": 0.1277, "step": 19270 }, { "epoch": 2.7211239762778874, "grad_norm": 3.4806963090116976, "learning_rate": 4.498306935000607e-07, "loss": 0.1341, "step": 19271 }, { "epoch": 2.7212651793278733, "grad_norm": 3.0558044302368876, "learning_rate": 4.4937872599575605e-07, "loss": 0.15, "step": 19272 }, { "epoch": 2.721406382377859, "grad_norm": 3.098678587524678, "learning_rate": 4.4892698044376346e-07, "loss": 0.1285, "step": 19273 }, { "epoch": 2.721547585427845, "grad_norm": 2.6102018645896776, "learning_rate": 4.484754568545857e-07, "loss": 0.108, "step": 19274 }, { "epoch": 2.721688788477831, "grad_norm": 2.799940361902545, "learning_rate": 4.4802415523871214e-07, "loss": 0.0977, "step": 19275 }, { "epoch": 2.721829991527817, "grad_norm": 3.3602300957065205, "learning_rate": 4.475730756066332e-07, "loss": 0.1657, "step": 19276 }, { "epoch": 2.7219711945778027, "grad_norm": 3.3926056319262976, "learning_rate": 4.471222179688306e-07, "loss": 0.1387, "step": 19277 }, { "epoch": 2.7221123976277886, "grad_norm": 3.6564454765361782, "learning_rate": 4.4667158233577925e-07, "loss": 0.1393, "step": 19278 }, { "epoch": 2.7222536006777744, "grad_norm": 2.3322279498954415, "learning_rate": 4.462211687179574e-07, "loss": 0.0912, "step": 19279 }, { "epoch": 2.7223948037277603, "grad_norm": 3.3057067655498678, "learning_rate": 4.4577097712582897e-07, "loss": 0.1446, "step": 19280 }, { "epoch": 2.722536006777746, "grad_norm": 2.9585724052675757, "learning_rate": 4.4532100756985663e-07, "loss": 0.1262, "step": 19281 }, { "epoch": 2.722677209827732, "grad_norm": 3.168021595235617, "learning_rate": 4.4487126006049764e-07, "loss": 0.158, "step": 19282 }, { "epoch": 2.722818412877718, "grad_norm": 2.474409723978474, "learning_rate": 4.444217346082036e-07, "loss": 0.1123, "step": 19283 }, { "epoch": 2.722959615927704, "grad_norm": 2.5815016459588587, "learning_rate": 4.4397243122342284e-07, "loss": 0.1106, "step": 19284 }, { "epoch": 2.7231008189776897, "grad_norm": 3.396116449044327, "learning_rate": 4.4352334991659475e-07, "loss": 0.1824, "step": 19285 }, { "epoch": 2.7232420220276756, "grad_norm": 3.3599132158699705, "learning_rate": 4.430744906981577e-07, "loss": 0.1281, "step": 19286 }, { "epoch": 2.7233832250776615, "grad_norm": 4.247307123070073, "learning_rate": 4.4262585357854217e-07, "loss": 0.1675, "step": 19287 }, { "epoch": 2.7235244281276474, "grad_norm": 4.267281582293608, "learning_rate": 4.421774385681743e-07, "loss": 0.1749, "step": 19288 }, { "epoch": 2.7236656311776333, "grad_norm": 3.0014287337737167, "learning_rate": 4.4172924567747467e-07, "loss": 0.1436, "step": 19289 }, { "epoch": 2.723806834227619, "grad_norm": 3.2406929888348213, "learning_rate": 4.412812749168582e-07, "loss": 0.1443, "step": 19290 }, { "epoch": 2.723948037277605, "grad_norm": 2.710843115511303, "learning_rate": 4.408335262967378e-07, "loss": 0.1433, "step": 19291 }, { "epoch": 2.724089240327591, "grad_norm": 3.665094879720396, "learning_rate": 4.403859998275184e-07, "loss": 0.1598, "step": 19292 }, { "epoch": 2.724230443377577, "grad_norm": 3.771711170906613, "learning_rate": 4.3993869551960165e-07, "loss": 0.1741, "step": 19293 }, { "epoch": 2.7243716464275627, "grad_norm": 3.433241342825456, "learning_rate": 4.394916133833782e-07, "loss": 0.1506, "step": 19294 }, { "epoch": 2.7245128494775486, "grad_norm": 3.4737072051924796, "learning_rate": 4.390447534292419e-07, "loss": 0.1416, "step": 19295 }, { "epoch": 2.7246540525275345, "grad_norm": 2.840175954508181, "learning_rate": 4.385981156675756e-07, "loss": 0.1271, "step": 19296 }, { "epoch": 2.7247952555775203, "grad_norm": 3.7790893162369024, "learning_rate": 4.3815170010875984e-07, "loss": 0.1879, "step": 19297 }, { "epoch": 2.724936458627506, "grad_norm": 2.7096361549091283, "learning_rate": 4.377055067631697e-07, "loss": 0.1097, "step": 19298 }, { "epoch": 2.725077661677492, "grad_norm": 2.87399765431248, "learning_rate": 4.372595356411746e-07, "loss": 0.0994, "step": 19299 }, { "epoch": 2.725218864727478, "grad_norm": 3.226149349046855, "learning_rate": 4.3681378675313747e-07, "loss": 0.1314, "step": 19300 }, { "epoch": 2.725360067777464, "grad_norm": 3.175470841121101, "learning_rate": 4.363682601094177e-07, "loss": 0.1365, "step": 19301 }, { "epoch": 2.7255012708274498, "grad_norm": 2.944246816245402, "learning_rate": 4.3592295572037037e-07, "loss": 0.1201, "step": 19302 }, { "epoch": 2.7256424738774356, "grad_norm": 2.9145409899455497, "learning_rate": 4.3547787359634163e-07, "loss": 0.1297, "step": 19303 }, { "epoch": 2.7257836769274215, "grad_norm": 2.4233049570135043, "learning_rate": 4.350330137476777e-07, "loss": 0.0853, "step": 19304 }, { "epoch": 2.7259248799774074, "grad_norm": 2.9551454863413515, "learning_rate": 4.345883761847147e-07, "loss": 0.1202, "step": 19305 }, { "epoch": 2.7260660830273933, "grad_norm": 3.214487631843572, "learning_rate": 4.3414396091778774e-07, "loss": 0.121, "step": 19306 }, { "epoch": 2.726207286077379, "grad_norm": 2.9205039097159826, "learning_rate": 4.336997679572241e-07, "loss": 0.13, "step": 19307 }, { "epoch": 2.726348489127365, "grad_norm": 3.3872230361092215, "learning_rate": 4.3325579731334444e-07, "loss": 0.1646, "step": 19308 }, { "epoch": 2.726489692177351, "grad_norm": 4.332493821514747, "learning_rate": 4.3281204899647046e-07, "loss": 0.2077, "step": 19309 }, { "epoch": 2.726630895227337, "grad_norm": 3.6919561303104738, "learning_rate": 4.323685230169128e-07, "loss": 0.1819, "step": 19310 }, { "epoch": 2.7267720982773227, "grad_norm": 2.9150266514042107, "learning_rate": 4.319252193849788e-07, "loss": 0.1251, "step": 19311 }, { "epoch": 2.7269133013273086, "grad_norm": 3.4605387714812252, "learning_rate": 4.314821381109702e-07, "loss": 0.134, "step": 19312 }, { "epoch": 2.7270545043772945, "grad_norm": 3.389098905854682, "learning_rate": 4.310392792051832e-07, "loss": 0.1197, "step": 19313 }, { "epoch": 2.7271957074272803, "grad_norm": 3.603679730509811, "learning_rate": 4.305966426779118e-07, "loss": 0.1377, "step": 19314 }, { "epoch": 2.7273369104772662, "grad_norm": 3.1507424255957113, "learning_rate": 4.301542285394411e-07, "loss": 0.1261, "step": 19315 }, { "epoch": 2.727478113527252, "grad_norm": 3.0627372124840804, "learning_rate": 4.2971203680005404e-07, "loss": 0.1513, "step": 19316 }, { "epoch": 2.727619316577238, "grad_norm": 2.8015209895052613, "learning_rate": 4.2927006747002563e-07, "loss": 0.1401, "step": 19317 }, { "epoch": 2.727760519627224, "grad_norm": 2.747523376300121, "learning_rate": 4.2882832055962885e-07, "loss": 0.1105, "step": 19318 }, { "epoch": 2.7279017226772098, "grad_norm": 2.837397829415899, "learning_rate": 4.283867960791277e-07, "loss": 0.1159, "step": 19319 }, { "epoch": 2.7280429257271956, "grad_norm": 3.2545690514459653, "learning_rate": 4.279454940387828e-07, "loss": 0.1563, "step": 19320 }, { "epoch": 2.7281841287771815, "grad_norm": 3.102814449289982, "learning_rate": 4.275044144488516e-07, "loss": 0.1289, "step": 19321 }, { "epoch": 2.7283253318271674, "grad_norm": 3.5554220455615546, "learning_rate": 4.270635573195836e-07, "loss": 0.1719, "step": 19322 }, { "epoch": 2.7284665348771533, "grad_norm": 3.3861074933420414, "learning_rate": 4.2662292266122505e-07, "loss": 0.1649, "step": 19323 }, { "epoch": 2.728607737927139, "grad_norm": 3.8291550121846116, "learning_rate": 4.261825104840145e-07, "loss": 0.1459, "step": 19324 }, { "epoch": 2.728748940977125, "grad_norm": 3.009529640473334, "learning_rate": 4.25742320798187e-07, "loss": 0.1487, "step": 19325 }, { "epoch": 2.728890144027111, "grad_norm": 3.4944575019006185, "learning_rate": 4.253023536139733e-07, "loss": 0.1522, "step": 19326 }, { "epoch": 2.729031347077097, "grad_norm": 3.0920606236633743, "learning_rate": 4.2486260894160083e-07, "loss": 0.1201, "step": 19327 }, { "epoch": 2.7291725501270827, "grad_norm": 2.479350469386191, "learning_rate": 4.244230867912835e-07, "loss": 0.1209, "step": 19328 }, { "epoch": 2.7293137531770686, "grad_norm": 2.648647626174326, "learning_rate": 4.2398378717323887e-07, "loss": 0.11, "step": 19329 }, { "epoch": 2.7294549562270545, "grad_norm": 3.516232143322579, "learning_rate": 4.2354471009767415e-07, "loss": 0.1359, "step": 19330 }, { "epoch": 2.7295961592770404, "grad_norm": 2.9186052773527686, "learning_rate": 4.231058555747958e-07, "loss": 0.1257, "step": 19331 }, { "epoch": 2.7297373623270262, "grad_norm": 3.194427760395779, "learning_rate": 4.226672236148022e-07, "loss": 0.1342, "step": 19332 }, { "epoch": 2.729878565377012, "grad_norm": 3.777446516699935, "learning_rate": 4.222288142278852e-07, "loss": 0.1674, "step": 19333 }, { "epoch": 2.730019768426998, "grad_norm": 2.880262445015603, "learning_rate": 4.217906274242345e-07, "loss": 0.1024, "step": 19334 }, { "epoch": 2.730160971476984, "grad_norm": 2.5484760674390072, "learning_rate": 4.21352663214033e-07, "loss": 0.1225, "step": 19335 }, { "epoch": 2.7303021745269698, "grad_norm": 3.581657664675056, "learning_rate": 4.209149216074593e-07, "loss": 0.182, "step": 19336 }, { "epoch": 2.7304433775769557, "grad_norm": 3.1109538614465535, "learning_rate": 4.2047740261468516e-07, "loss": 0.1497, "step": 19337 }, { "epoch": 2.7305845806269415, "grad_norm": 4.585157840500381, "learning_rate": 4.2004010624588033e-07, "loss": 0.2139, "step": 19338 }, { "epoch": 2.7307257836769274, "grad_norm": 4.17259703919618, "learning_rate": 4.1960303251120547e-07, "loss": 0.187, "step": 19339 }, { "epoch": 2.7308669867269133, "grad_norm": 2.6313559528959805, "learning_rate": 4.191661814208181e-07, "loss": 0.1444, "step": 19340 }, { "epoch": 2.731008189776899, "grad_norm": 3.490652132438848, "learning_rate": 4.1872955298487227e-07, "loss": 0.1327, "step": 19341 }, { "epoch": 2.731149392826885, "grad_norm": 2.65701000641957, "learning_rate": 4.1829314721351213e-07, "loss": 0.1148, "step": 19342 }, { "epoch": 2.731290595876871, "grad_norm": 2.9978406139678286, "learning_rate": 4.178569641168817e-07, "loss": 0.1286, "step": 19343 }, { "epoch": 2.731431798926857, "grad_norm": 2.594870044023713, "learning_rate": 4.1742100370511853e-07, "loss": 0.1157, "step": 19344 }, { "epoch": 2.7315730019768427, "grad_norm": 3.1509020715851386, "learning_rate": 4.169852659883522e-07, "loss": 0.1658, "step": 19345 }, { "epoch": 2.7317142050268286, "grad_norm": 3.4093079628565404, "learning_rate": 4.1654975097671025e-07, "loss": 0.141, "step": 19346 }, { "epoch": 2.7318554080768145, "grad_norm": 3.5720588166000202, "learning_rate": 4.161144586803112e-07, "loss": 0.1221, "step": 19347 }, { "epoch": 2.7319966111268004, "grad_norm": 2.599719986247328, "learning_rate": 4.1567938910927475e-07, "loss": 0.114, "step": 19348 }, { "epoch": 2.7321378141767863, "grad_norm": 3.0804425133731588, "learning_rate": 4.1524454227370945e-07, "loss": 0.1482, "step": 19349 }, { "epoch": 2.732279017226772, "grad_norm": 2.8618081328075857, "learning_rate": 4.1480991818372284e-07, "loss": 0.1269, "step": 19350 }, { "epoch": 2.732420220276758, "grad_norm": 3.4214907498618903, "learning_rate": 4.1437551684941345e-07, "loss": 0.1462, "step": 19351 }, { "epoch": 2.732561423326744, "grad_norm": 3.068140104805384, "learning_rate": 4.1394133828087654e-07, "loss": 0.1681, "step": 19352 }, { "epoch": 2.73270262637673, "grad_norm": 2.758852363440679, "learning_rate": 4.135073824882041e-07, "loss": 0.1233, "step": 19353 }, { "epoch": 2.7328438294267157, "grad_norm": 2.3448765553633613, "learning_rate": 4.130736494814802e-07, "loss": 0.1229, "step": 19354 }, { "epoch": 2.7329850324767015, "grad_norm": 3.7593709314048853, "learning_rate": 4.126401392707835e-07, "loss": 0.1788, "step": 19355 }, { "epoch": 2.7331262355266874, "grad_norm": 2.6450518509268495, "learning_rate": 4.1220685186619037e-07, "loss": 0.0996, "step": 19356 }, { "epoch": 2.7332674385766733, "grad_norm": 2.6264390069004855, "learning_rate": 4.117737872777694e-07, "loss": 0.1024, "step": 19357 }, { "epoch": 2.733408641626659, "grad_norm": 2.924053295572232, "learning_rate": 4.113409455155837e-07, "loss": 0.1343, "step": 19358 }, { "epoch": 2.733549844676645, "grad_norm": 3.3634477029364294, "learning_rate": 4.1090832658969294e-07, "loss": 0.1781, "step": 19359 }, { "epoch": 2.733691047726631, "grad_norm": 2.7239417193093525, "learning_rate": 4.1047593051015245e-07, "loss": 0.1232, "step": 19360 }, { "epoch": 2.733832250776617, "grad_norm": 2.7267377539482496, "learning_rate": 4.1004375728701193e-07, "loss": 0.131, "step": 19361 }, { "epoch": 2.7339734538266027, "grad_norm": 2.883985074495877, "learning_rate": 4.0961180693031123e-07, "loss": 0.1093, "step": 19362 }, { "epoch": 2.7341146568765886, "grad_norm": 2.7811100587525375, "learning_rate": 4.0918007945009e-07, "loss": 0.1063, "step": 19363 }, { "epoch": 2.7342558599265745, "grad_norm": 3.6461206304841385, "learning_rate": 4.087485748563813e-07, "loss": 0.1469, "step": 19364 }, { "epoch": 2.7343970629765604, "grad_norm": 3.4665865630322137, "learning_rate": 4.083172931592139e-07, "loss": 0.1343, "step": 19365 }, { "epoch": 2.7345382660265463, "grad_norm": 3.1956189671207618, "learning_rate": 4.0788623436861077e-07, "loss": 0.092, "step": 19366 }, { "epoch": 2.734679469076532, "grad_norm": 2.742902940935254, "learning_rate": 4.0745539849458837e-07, "loss": 0.1365, "step": 19367 }, { "epoch": 2.734820672126518, "grad_norm": 3.1799613548796803, "learning_rate": 4.0702478554716094e-07, "loss": 0.1559, "step": 19368 }, { "epoch": 2.734961875176504, "grad_norm": 3.2288130230216154, "learning_rate": 4.0659439553633385e-07, "loss": 0.1803, "step": 19369 }, { "epoch": 2.73510307822649, "grad_norm": 2.8675436998819084, "learning_rate": 4.0616422847211013e-07, "loss": 0.1499, "step": 19370 }, { "epoch": 2.7352442812764757, "grad_norm": 3.9430708514995834, "learning_rate": 4.0573428436448627e-07, "loss": 0.1791, "step": 19371 }, { "epoch": 2.7353854843264616, "grad_norm": 2.4161780812475175, "learning_rate": 4.053045632234542e-07, "loss": 0.103, "step": 19372 }, { "epoch": 2.7355266873764474, "grad_norm": 3.5211675489720826, "learning_rate": 4.0487506505900056e-07, "loss": 0.1653, "step": 19373 }, { "epoch": 2.7356678904264333, "grad_norm": 2.911435394894441, "learning_rate": 4.0444578988110715e-07, "loss": 0.1337, "step": 19374 }, { "epoch": 2.735809093476419, "grad_norm": 2.73153175142303, "learning_rate": 4.040167376997484e-07, "loss": 0.126, "step": 19375 }, { "epoch": 2.735950296526405, "grad_norm": 2.491834674179084, "learning_rate": 4.0358790852489616e-07, "loss": 0.1039, "step": 19376 }, { "epoch": 2.736091499576391, "grad_norm": 2.5827246369661054, "learning_rate": 4.031593023665181e-07, "loss": 0.1135, "step": 19377 }, { "epoch": 2.736232702626377, "grad_norm": 2.79673933152445, "learning_rate": 4.0273091923457297e-07, "loss": 0.1125, "step": 19378 }, { "epoch": 2.7363739056763627, "grad_norm": 3.1183923679206127, "learning_rate": 4.0230275913901716e-07, "loss": 0.1412, "step": 19379 }, { "epoch": 2.7365151087263486, "grad_norm": 2.971780911422613, "learning_rate": 4.018748220897994e-07, "loss": 0.1274, "step": 19380 }, { "epoch": 2.7366563117763345, "grad_norm": 2.471778481445548, "learning_rate": 4.0144710809686407e-07, "loss": 0.1156, "step": 19381 }, { "epoch": 2.7367975148263204, "grad_norm": 2.9387206315714867, "learning_rate": 4.0101961717015416e-07, "loss": 0.1357, "step": 19382 }, { "epoch": 2.7369387178763063, "grad_norm": 3.149247195834162, "learning_rate": 4.005923493196029e-07, "loss": 0.1125, "step": 19383 }, { "epoch": 2.737079920926292, "grad_norm": 3.738936492749957, "learning_rate": 4.0016530455514013e-07, "loss": 0.1689, "step": 19384 }, { "epoch": 2.737221123976278, "grad_norm": 2.9328198058316626, "learning_rate": 3.9973848288669013e-07, "loss": 0.1073, "step": 19385 }, { "epoch": 2.737362327026264, "grad_norm": 3.9041212711779756, "learning_rate": 3.9931188432417057e-07, "loss": 0.1789, "step": 19386 }, { "epoch": 2.73750353007625, "grad_norm": 2.8744570015529183, "learning_rate": 3.9888550887749787e-07, "loss": 0.1294, "step": 19387 }, { "epoch": 2.7376447331262357, "grad_norm": 2.9034007361933285, "learning_rate": 3.9845935655657866e-07, "loss": 0.1382, "step": 19388 }, { "epoch": 2.7377859361762216, "grad_norm": 3.516622550036228, "learning_rate": 3.9803342737131713e-07, "loss": 0.1554, "step": 19389 }, { "epoch": 2.7379271392262075, "grad_norm": 3.250628724976959, "learning_rate": 3.976077213316132e-07, "loss": 0.1469, "step": 19390 }, { "epoch": 2.7380683422761933, "grad_norm": 3.6764501877211004, "learning_rate": 3.9718223844735784e-07, "loss": 0.1599, "step": 19391 }, { "epoch": 2.738209545326179, "grad_norm": 2.70512956443334, "learning_rate": 3.967569787284409e-07, "loss": 0.1206, "step": 19392 }, { "epoch": 2.738350748376165, "grad_norm": 2.9530516783214082, "learning_rate": 3.9633194218474223e-07, "loss": 0.1389, "step": 19393 }, { "epoch": 2.738491951426151, "grad_norm": 2.9914672192022103, "learning_rate": 3.95907128826144e-07, "loss": 0.129, "step": 19394 }, { "epoch": 2.738633154476137, "grad_norm": 3.5164886087025784, "learning_rate": 3.954825386625172e-07, "loss": 0.1412, "step": 19395 }, { "epoch": 2.7387743575261227, "grad_norm": 2.97364672961565, "learning_rate": 3.9505817170372606e-07, "loss": 0.128, "step": 19396 }, { "epoch": 2.7389155605761086, "grad_norm": 2.709447080355022, "learning_rate": 3.946340279596361e-07, "loss": 0.1376, "step": 19397 }, { "epoch": 2.7390567636260945, "grad_norm": 3.2302855594915827, "learning_rate": 3.942101074401028e-07, "loss": 0.1695, "step": 19398 }, { "epoch": 2.7391979666760804, "grad_norm": 2.744602159476807, "learning_rate": 3.937864101549771e-07, "loss": 0.1248, "step": 19399 }, { "epoch": 2.7393391697260663, "grad_norm": 3.670593089537176, "learning_rate": 3.933629361141078e-07, "loss": 0.164, "step": 19400 }, { "epoch": 2.739480372776052, "grad_norm": 3.212028759504199, "learning_rate": 3.9293968532733593e-07, "loss": 0.1495, "step": 19401 }, { "epoch": 2.739621575826038, "grad_norm": 3.242219052507598, "learning_rate": 3.9251665780449587e-07, "loss": 0.1158, "step": 19402 }, { "epoch": 2.739762778876024, "grad_norm": 4.0302874181253605, "learning_rate": 3.9209385355542085e-07, "loss": 0.1733, "step": 19403 }, { "epoch": 2.73990398192601, "grad_norm": 3.5100756351832185, "learning_rate": 3.91671272589933e-07, "loss": 0.1671, "step": 19404 }, { "epoch": 2.7400451849759957, "grad_norm": 2.9424599405535585, "learning_rate": 3.9124891491785553e-07, "loss": 0.1207, "step": 19405 }, { "epoch": 2.7401863880259816, "grad_norm": 3.3752386007416653, "learning_rate": 3.908267805490051e-07, "loss": 0.1578, "step": 19406 }, { "epoch": 2.7403275910759675, "grad_norm": 2.7017307623402584, "learning_rate": 3.9040486949318947e-07, "loss": 0.1297, "step": 19407 }, { "epoch": 2.7404687941259533, "grad_norm": 4.263710886839664, "learning_rate": 3.899831817602151e-07, "loss": 0.1759, "step": 19408 }, { "epoch": 2.7406099971759392, "grad_norm": 3.1975080532405484, "learning_rate": 3.895617173598809e-07, "loss": 0.14, "step": 19409 }, { "epoch": 2.740751200225925, "grad_norm": 2.8384344711283482, "learning_rate": 3.8914047630198237e-07, "loss": 0.1133, "step": 19410 }, { "epoch": 2.740892403275911, "grad_norm": 2.9840727520497117, "learning_rate": 3.887194585963072e-07, "loss": 0.1398, "step": 19411 }, { "epoch": 2.741033606325897, "grad_norm": 3.337745041856669, "learning_rate": 3.8829866425264317e-07, "loss": 0.1479, "step": 19412 }, { "epoch": 2.7411748093758828, "grad_norm": 2.662397719755059, "learning_rate": 3.8787809328076577e-07, "loss": 0.1166, "step": 19413 }, { "epoch": 2.7413160124258686, "grad_norm": 2.6877431853993152, "learning_rate": 3.874577456904516e-07, "loss": 0.1339, "step": 19414 }, { "epoch": 2.7414572154758545, "grad_norm": 2.9378422465770364, "learning_rate": 3.8703762149146726e-07, "loss": 0.1536, "step": 19415 }, { "epoch": 2.7415984185258404, "grad_norm": 2.2068814172022773, "learning_rate": 3.866177206935751e-07, "loss": 0.1176, "step": 19416 }, { "epoch": 2.7417396215758263, "grad_norm": 2.488206317054543, "learning_rate": 3.861980433065382e-07, "loss": 0.1173, "step": 19417 }, { "epoch": 2.741880824625812, "grad_norm": 3.2191962785578387, "learning_rate": 3.857785893401056e-07, "loss": 0.1712, "step": 19418 }, { "epoch": 2.742022027675798, "grad_norm": 2.598995996628854, "learning_rate": 3.853593588040272e-07, "loss": 0.1227, "step": 19419 }, { "epoch": 2.742163230725784, "grad_norm": 2.886007676417886, "learning_rate": 3.849403517080452e-07, "loss": 0.1192, "step": 19420 }, { "epoch": 2.74230443377577, "grad_norm": 2.924489853099131, "learning_rate": 3.845215680618963e-07, "loss": 0.1392, "step": 19421 }, { "epoch": 2.7424456368257557, "grad_norm": 3.42616815884423, "learning_rate": 3.8410300787531385e-07, "loss": 0.143, "step": 19422 }, { "epoch": 2.7425868398757416, "grad_norm": 3.0062191823661597, "learning_rate": 3.8368467115802443e-07, "loss": 0.1511, "step": 19423 }, { "epoch": 2.7427280429257275, "grad_norm": 2.8390465990287512, "learning_rate": 3.832665579197503e-07, "loss": 0.1293, "step": 19424 }, { "epoch": 2.742869245975713, "grad_norm": 4.045965126548105, "learning_rate": 3.8284866817020926e-07, "loss": 0.1978, "step": 19425 }, { "epoch": 2.743010449025699, "grad_norm": 3.120000995160894, "learning_rate": 3.824310019191102e-07, "loss": 0.1265, "step": 19426 }, { "epoch": 2.7431516520756847, "grad_norm": 2.4897109124418124, "learning_rate": 3.820135591761631e-07, "loss": 0.1252, "step": 19427 }, { "epoch": 2.7432928551256706, "grad_norm": 3.6964666806220587, "learning_rate": 3.815963399510647e-07, "loss": 0.1425, "step": 19428 }, { "epoch": 2.7434340581756564, "grad_norm": 3.6532784574715698, "learning_rate": 3.811793442535161e-07, "loss": 0.1362, "step": 19429 }, { "epoch": 2.7435752612256423, "grad_norm": 3.6704568180668344, "learning_rate": 3.80762572093204e-07, "loss": 0.1586, "step": 19430 }, { "epoch": 2.743716464275628, "grad_norm": 3.8388677072589186, "learning_rate": 3.8034602347981617e-07, "loss": 0.1854, "step": 19431 }, { "epoch": 2.743857667325614, "grad_norm": 3.917507084938562, "learning_rate": 3.799296984230316e-07, "loss": 0.2051, "step": 19432 }, { "epoch": 2.7439988703756, "grad_norm": 2.9154865325347723, "learning_rate": 3.795135969325259e-07, "loss": 0.1302, "step": 19433 }, { "epoch": 2.744140073425586, "grad_norm": 3.7584214303499097, "learning_rate": 3.790977190179701e-07, "loss": 0.1766, "step": 19434 }, { "epoch": 2.7442812764755717, "grad_norm": 3.4275963188872582, "learning_rate": 3.786820646890277e-07, "loss": 0.151, "step": 19435 }, { "epoch": 2.7444224795255576, "grad_norm": 3.0981239983200117, "learning_rate": 3.782666339553598e-07, "loss": 0.1372, "step": 19436 }, { "epoch": 2.7445636825755435, "grad_norm": 3.857578307231529, "learning_rate": 3.77851426826622e-07, "loss": 0.1863, "step": 19437 }, { "epoch": 2.7447048856255294, "grad_norm": 3.009554324179775, "learning_rate": 3.774364433124578e-07, "loss": 0.1221, "step": 19438 }, { "epoch": 2.7448460886755153, "grad_norm": 3.106882242432552, "learning_rate": 3.770216834225171e-07, "loss": 0.1751, "step": 19439 }, { "epoch": 2.744987291725501, "grad_norm": 3.477318813807565, "learning_rate": 3.7660714716643563e-07, "loss": 0.1908, "step": 19440 }, { "epoch": 2.745128494775487, "grad_norm": 3.072981058971069, "learning_rate": 3.7619283455384906e-07, "loss": 0.1047, "step": 19441 }, { "epoch": 2.745269697825473, "grad_norm": 2.485825386775933, "learning_rate": 3.75778745594384e-07, "loss": 0.1001, "step": 19442 }, { "epoch": 2.745410900875459, "grad_norm": 2.956754761247289, "learning_rate": 3.75364880297665e-07, "loss": 0.171, "step": 19443 }, { "epoch": 2.7455521039254447, "grad_norm": 2.9721854454369407, "learning_rate": 3.749512386733101e-07, "loss": 0.1226, "step": 19444 }, { "epoch": 2.7456933069754306, "grad_norm": 3.5239793879798658, "learning_rate": 3.7453782073092913e-07, "loss": 0.1917, "step": 19445 }, { "epoch": 2.7458345100254165, "grad_norm": 3.501797700249198, "learning_rate": 3.741246264801357e-07, "loss": 0.165, "step": 19446 }, { "epoch": 2.7459757130754023, "grad_norm": 2.3099870501951876, "learning_rate": 3.7371165593052763e-07, "loss": 0.1154, "step": 19447 }, { "epoch": 2.746116916125388, "grad_norm": 3.062139647553599, "learning_rate": 3.7329890909170275e-07, "loss": 0.1511, "step": 19448 }, { "epoch": 2.746258119175374, "grad_norm": 3.1010938582881065, "learning_rate": 3.7288638597325453e-07, "loss": 0.1126, "step": 19449 }, { "epoch": 2.74639932222536, "grad_norm": 3.0118743760394624, "learning_rate": 3.7247408658476756e-07, "loss": 0.1328, "step": 19450 }, { "epoch": 2.746540525275346, "grad_norm": 3.39892900080272, "learning_rate": 3.720620109358264e-07, "loss": 0.1831, "step": 19451 }, { "epoch": 2.7466817283253318, "grad_norm": 3.0066353683450004, "learning_rate": 3.7165015903600553e-07, "loss": 0.1489, "step": 19452 }, { "epoch": 2.7468229313753176, "grad_norm": 2.9577942251706615, "learning_rate": 3.712385308948774e-07, "loss": 0.1573, "step": 19453 }, { "epoch": 2.7469641344253035, "grad_norm": 3.6816915819315637, "learning_rate": 3.708271265220087e-07, "loss": 0.1559, "step": 19454 }, { "epoch": 2.7471053374752894, "grad_norm": 3.1628729620234908, "learning_rate": 3.704159459269563e-07, "loss": 0.1234, "step": 19455 }, { "epoch": 2.7472465405252753, "grad_norm": 2.6313836780905566, "learning_rate": 3.700049891192792e-07, "loss": 0.1174, "step": 19456 }, { "epoch": 2.747387743575261, "grad_norm": 2.708731714717972, "learning_rate": 3.6959425610852863e-07, "loss": 0.1281, "step": 19457 }, { "epoch": 2.747528946625247, "grad_norm": 3.0318736090976577, "learning_rate": 3.691837469042481e-07, "loss": 0.1686, "step": 19458 }, { "epoch": 2.747670149675233, "grad_norm": 3.324056212194871, "learning_rate": 3.687734615159777e-07, "loss": 0.1455, "step": 19459 }, { "epoch": 2.747811352725219, "grad_norm": 3.5998944915051965, "learning_rate": 3.683633999532521e-07, "loss": 0.1762, "step": 19460 }, { "epoch": 2.7479525557752047, "grad_norm": 2.7722290465139916, "learning_rate": 3.6795356222560253e-07, "loss": 0.1364, "step": 19461 }, { "epoch": 2.7480937588251906, "grad_norm": 3.240409929325466, "learning_rate": 3.6754394834255023e-07, "loss": 0.1445, "step": 19462 }, { "epoch": 2.7482349618751765, "grad_norm": 4.319019390235662, "learning_rate": 3.671345583136199e-07, "loss": 0.148, "step": 19463 }, { "epoch": 2.7483761649251623, "grad_norm": 4.184632006510233, "learning_rate": 3.6672539214832157e-07, "loss": 0.1487, "step": 19464 }, { "epoch": 2.7485173679751482, "grad_norm": 3.9387228059929296, "learning_rate": 3.663164498561633e-07, "loss": 0.192, "step": 19465 }, { "epoch": 2.748658571025134, "grad_norm": 2.6941572825967355, "learning_rate": 3.659077314466519e-07, "loss": 0.1293, "step": 19466 }, { "epoch": 2.74879977407512, "grad_norm": 2.9813998918161486, "learning_rate": 3.6549923692928204e-07, "loss": 0.1482, "step": 19467 }, { "epoch": 2.748940977125106, "grad_norm": 3.9736551423057156, "learning_rate": 3.650909663135505e-07, "loss": 0.1798, "step": 19468 }, { "epoch": 2.7490821801750918, "grad_norm": 2.899678491572323, "learning_rate": 3.6468291960894406e-07, "loss": 0.1428, "step": 19469 }, { "epoch": 2.7492233832250776, "grad_norm": 4.262922421669515, "learning_rate": 3.642750968249442e-07, "loss": 0.1526, "step": 19470 }, { "epoch": 2.7493645862750635, "grad_norm": 3.0888381922771626, "learning_rate": 3.638674979710322e-07, "loss": 0.1433, "step": 19471 }, { "epoch": 2.7495057893250494, "grad_norm": 2.7460686722026804, "learning_rate": 3.63460123056677e-07, "loss": 0.1074, "step": 19472 }, { "epoch": 2.7496469923750353, "grad_norm": 2.8510387615597903, "learning_rate": 3.630529720913445e-07, "loss": 0.1317, "step": 19473 }, { "epoch": 2.749788195425021, "grad_norm": 3.223684142200759, "learning_rate": 3.626460450845015e-07, "loss": 0.1636, "step": 19474 }, { "epoch": 2.749929398475007, "grad_norm": 3.9422982848498807, "learning_rate": 3.6223934204560165e-07, "loss": 0.1754, "step": 19475 }, { "epoch": 2.750070601524993, "grad_norm": 2.5771652929794846, "learning_rate": 3.6183286298409724e-07, "loss": 0.1047, "step": 19476 }, { "epoch": 2.750211804574979, "grad_norm": 4.189765825573365, "learning_rate": 3.614266079094353e-07, "loss": 0.1667, "step": 19477 }, { "epoch": 2.7503530076249647, "grad_norm": 2.65611009018958, "learning_rate": 3.6102057683105596e-07, "loss": 0.0876, "step": 19478 }, { "epoch": 2.7504942106749506, "grad_norm": 3.1906724612257604, "learning_rate": 3.6061476975839395e-07, "loss": 0.1329, "step": 19479 }, { "epoch": 2.7506354137249365, "grad_norm": 2.576238870813438, "learning_rate": 3.60209186700885e-07, "loss": 0.1245, "step": 19480 }, { "epoch": 2.7507766167749224, "grad_norm": 2.575195536864581, "learning_rate": 3.598038276679494e-07, "loss": 0.0842, "step": 19481 }, { "epoch": 2.7509178198249082, "grad_norm": 3.3017865889359923, "learning_rate": 3.5939869266901073e-07, "loss": 0.1371, "step": 19482 }, { "epoch": 2.751059022874894, "grad_norm": 3.14603263869236, "learning_rate": 3.5899378171348144e-07, "loss": 0.1536, "step": 19483 }, { "epoch": 2.75120022592488, "grad_norm": 3.8305165186411285, "learning_rate": 3.58589094810774e-07, "loss": 0.1898, "step": 19484 }, { "epoch": 2.751341428974866, "grad_norm": 3.311326743292963, "learning_rate": 3.5818463197029086e-07, "loss": 0.1606, "step": 19485 }, { "epoch": 2.7514826320248518, "grad_norm": 2.907670810133994, "learning_rate": 3.5778039320143456e-07, "loss": 0.1231, "step": 19486 }, { "epoch": 2.7516238350748377, "grad_norm": 3.306112875085602, "learning_rate": 3.573763785135975e-07, "loss": 0.1569, "step": 19487 }, { "epoch": 2.7517650381248235, "grad_norm": 3.327947791239029, "learning_rate": 3.5697258791617007e-07, "loss": 0.1425, "step": 19488 }, { "epoch": 2.7519062411748094, "grad_norm": 3.0887860998801053, "learning_rate": 3.5656902141853356e-07, "loss": 0.1387, "step": 19489 }, { "epoch": 2.7520474442247953, "grad_norm": 3.264248806244703, "learning_rate": 3.561656790300683e-07, "loss": 0.1473, "step": 19490 }, { "epoch": 2.752188647274781, "grad_norm": 4.303967715273767, "learning_rate": 3.5576256076014783e-07, "loss": 0.2076, "step": 19491 }, { "epoch": 2.752329850324767, "grad_norm": 2.670530796231145, "learning_rate": 3.553596666181414e-07, "loss": 0.1207, "step": 19492 }, { "epoch": 2.752471053374753, "grad_norm": 3.3508983494305284, "learning_rate": 3.549569966134103e-07, "loss": 0.1139, "step": 19493 }, { "epoch": 2.752612256424739, "grad_norm": 3.1361106059039243, "learning_rate": 3.545545507553139e-07, "loss": 0.0954, "step": 19494 }, { "epoch": 2.7527534594747247, "grad_norm": 3.8432665696055963, "learning_rate": 3.541523290532034e-07, "loss": 0.2032, "step": 19495 }, { "epoch": 2.7528946625247106, "grad_norm": 2.809965976461838, "learning_rate": 3.537503315164259e-07, "loss": 0.153, "step": 19496 }, { "epoch": 2.7530358655746965, "grad_norm": 3.9026772609466063, "learning_rate": 3.533485581543283e-07, "loss": 0.2139, "step": 19497 }, { "epoch": 2.7531770686246824, "grad_norm": 3.348088736192786, "learning_rate": 3.529470089762421e-07, "loss": 0.1429, "step": 19498 }, { "epoch": 2.7533182716746682, "grad_norm": 2.8747479198745816, "learning_rate": 3.525456839915009e-07, "loss": 0.1387, "step": 19499 }, { "epoch": 2.753459474724654, "grad_norm": 2.933515081201963, "learning_rate": 3.521445832094328e-07, "loss": 0.1522, "step": 19500 }, { "epoch": 2.75360067777464, "grad_norm": 4.186558287328867, "learning_rate": 3.51743706639357e-07, "loss": 0.1779, "step": 19501 }, { "epoch": 2.753741880824626, "grad_norm": 2.940150384115856, "learning_rate": 3.5134305429058935e-07, "loss": 0.1283, "step": 19502 }, { "epoch": 2.753883083874612, "grad_norm": 3.7203508032238592, "learning_rate": 3.5094262617244356e-07, "loss": 0.1565, "step": 19503 }, { "epoch": 2.7540242869245977, "grad_norm": 3.259409839230129, "learning_rate": 3.505424222942244e-07, "loss": 0.1313, "step": 19504 }, { "epoch": 2.7541654899745835, "grad_norm": 3.539143838291693, "learning_rate": 3.501424426652333e-07, "loss": 0.1485, "step": 19505 }, { "epoch": 2.7543066930245694, "grad_norm": 3.6697689727089875, "learning_rate": 3.497426872947629e-07, "loss": 0.1533, "step": 19506 }, { "epoch": 2.7544478960745553, "grad_norm": 2.9948094006671226, "learning_rate": 3.4934315619210346e-07, "loss": 0.1405, "step": 19507 }, { "epoch": 2.754589099124541, "grad_norm": 4.472160301585865, "learning_rate": 3.48943849366542e-07, "loss": 0.218, "step": 19508 }, { "epoch": 2.754730302174527, "grad_norm": 3.375127186027285, "learning_rate": 3.485447668273589e-07, "loss": 0.1622, "step": 19509 }, { "epoch": 2.754871505224513, "grad_norm": 2.655549839464905, "learning_rate": 3.481459085838268e-07, "loss": 0.1431, "step": 19510 }, { "epoch": 2.755012708274499, "grad_norm": 2.8110369352142404, "learning_rate": 3.4774727464521484e-07, "loss": 0.1403, "step": 19511 }, { "epoch": 2.7551539113244847, "grad_norm": 3.7258549680762454, "learning_rate": 3.473488650207879e-07, "loss": 0.198, "step": 19512 }, { "epoch": 2.7552951143744706, "grad_norm": 3.90779830717312, "learning_rate": 3.469506797198052e-07, "loss": 0.1663, "step": 19513 }, { "epoch": 2.7554363174244565, "grad_norm": 3.928661652220164, "learning_rate": 3.465527187515194e-07, "loss": 0.1475, "step": 19514 }, { "epoch": 2.7555775204744424, "grad_norm": 3.0050910053602573, "learning_rate": 3.4615498212517975e-07, "loss": 0.1112, "step": 19515 }, { "epoch": 2.7557187235244283, "grad_norm": 3.228661231883579, "learning_rate": 3.4575746985002877e-07, "loss": 0.1594, "step": 19516 }, { "epoch": 2.755859926574414, "grad_norm": 3.555076861281196, "learning_rate": 3.453601819353047e-07, "loss": 0.1557, "step": 19517 }, { "epoch": 2.7560011296244, "grad_norm": 3.106637786355952, "learning_rate": 3.4496311839024133e-07, "loss": 0.1493, "step": 19518 }, { "epoch": 2.756142332674386, "grad_norm": 2.6237853129476596, "learning_rate": 3.4456627922406337e-07, "loss": 0.1115, "step": 19519 }, { "epoch": 2.756283535724372, "grad_norm": 2.370021694969083, "learning_rate": 3.441696644459969e-07, "loss": 0.1165, "step": 19520 }, { "epoch": 2.7564247387743577, "grad_norm": 3.2333158403856275, "learning_rate": 3.437732740652566e-07, "loss": 0.1491, "step": 19521 }, { "epoch": 2.7565659418243436, "grad_norm": 3.76228042493995, "learning_rate": 3.433771080910575e-07, "loss": 0.1801, "step": 19522 }, { "epoch": 2.7567071448743294, "grad_norm": 2.9058942687655, "learning_rate": 3.4298116653260215e-07, "loss": 0.1224, "step": 19523 }, { "epoch": 2.7568483479243153, "grad_norm": 2.3910949105773995, "learning_rate": 3.4258544939909324e-07, "loss": 0.0938, "step": 19524 }, { "epoch": 2.756989550974301, "grad_norm": 2.891962938024656, "learning_rate": 3.4218995669972886e-07, "loss": 0.1679, "step": 19525 }, { "epoch": 2.757130754024287, "grad_norm": 3.7291997678260875, "learning_rate": 3.4179468844369847e-07, "loss": 0.1536, "step": 19526 }, { "epoch": 2.7572719570742725, "grad_norm": 3.360224870454722, "learning_rate": 3.4139964464018904e-07, "loss": 0.1692, "step": 19527 }, { "epoch": 2.7574131601242584, "grad_norm": 3.5607398288577925, "learning_rate": 3.4100482529838e-07, "loss": 0.1548, "step": 19528 }, { "epoch": 2.7575543631742443, "grad_norm": 3.0367291731034802, "learning_rate": 3.4061023042744837e-07, "loss": 0.115, "step": 19529 }, { "epoch": 2.75769556622423, "grad_norm": 2.8816947543093288, "learning_rate": 3.4021586003656236e-07, "loss": 0.1273, "step": 19530 }, { "epoch": 2.757836769274216, "grad_norm": 2.7409791842642455, "learning_rate": 3.3982171413488916e-07, "loss": 0.1378, "step": 19531 }, { "epoch": 2.757977972324202, "grad_norm": 3.2842204605367624, "learning_rate": 3.394277927315859e-07, "loss": 0.1263, "step": 19532 }, { "epoch": 2.758119175374188, "grad_norm": 3.838314309060839, "learning_rate": 3.390340958358096e-07, "loss": 0.1733, "step": 19533 }, { "epoch": 2.7582603784241737, "grad_norm": 2.9166689232032654, "learning_rate": 3.386406234567086e-07, "loss": 0.1274, "step": 19534 }, { "epoch": 2.7584015814741596, "grad_norm": 2.8670261690280983, "learning_rate": 3.382473756034277e-07, "loss": 0.1403, "step": 19535 }, { "epoch": 2.7585427845241455, "grad_norm": 2.87901061234327, "learning_rate": 3.37854352285103e-07, "loss": 0.1415, "step": 19536 }, { "epoch": 2.7586839875741314, "grad_norm": 3.2004710504424896, "learning_rate": 3.3746155351087276e-07, "loss": 0.1359, "step": 19537 }, { "epoch": 2.7588251906241172, "grad_norm": 3.3008086624260926, "learning_rate": 3.370689792898618e-07, "loss": 0.1591, "step": 19538 }, { "epoch": 2.758966393674103, "grad_norm": 3.942132329644193, "learning_rate": 3.3667662963119627e-07, "loss": 0.1645, "step": 19539 }, { "epoch": 2.759107596724089, "grad_norm": 2.4502926500201543, "learning_rate": 3.362845045439911e-07, "loss": 0.1305, "step": 19540 }, { "epoch": 2.759248799774075, "grad_norm": 2.799034668617568, "learning_rate": 3.3589260403736e-07, "loss": 0.1392, "step": 19541 }, { "epoch": 2.7593900028240608, "grad_norm": 2.83381248452317, "learning_rate": 3.3550092812041244e-07, "loss": 0.1317, "step": 19542 }, { "epoch": 2.7595312058740467, "grad_norm": 3.5074770547662175, "learning_rate": 3.3510947680224893e-07, "loss": 0.1244, "step": 19543 }, { "epoch": 2.7596724089240325, "grad_norm": 4.555084382809644, "learning_rate": 3.347182500919677e-07, "loss": 0.1955, "step": 19544 }, { "epoch": 2.7598136119740184, "grad_norm": 3.1773148697972315, "learning_rate": 3.3432724799866034e-07, "loss": 0.1621, "step": 19545 }, { "epoch": 2.7599548150240043, "grad_norm": 3.6582558558216065, "learning_rate": 3.33936470531413e-07, "loss": 0.1243, "step": 19546 }, { "epoch": 2.76009601807399, "grad_norm": 3.6404124238059006, "learning_rate": 3.335459176993083e-07, "loss": 0.1359, "step": 19547 }, { "epoch": 2.760237221123976, "grad_norm": 3.23281727214003, "learning_rate": 3.3315558951142133e-07, "loss": 0.1575, "step": 19548 }, { "epoch": 2.760378424173962, "grad_norm": 3.3773389405855454, "learning_rate": 3.3276548597682366e-07, "loss": 0.1693, "step": 19549 }, { "epoch": 2.760519627223948, "grad_norm": 3.606345556252987, "learning_rate": 3.3237560710458137e-07, "loss": 0.142, "step": 19550 }, { "epoch": 2.7606608302739337, "grad_norm": 2.55250771795876, "learning_rate": 3.31985952903755e-07, "loss": 0.1031, "step": 19551 }, { "epoch": 2.7608020333239196, "grad_norm": 3.3706025219757696, "learning_rate": 3.3159652338339953e-07, "loss": 0.1725, "step": 19552 }, { "epoch": 2.7609432363739055, "grad_norm": 2.9236518818066615, "learning_rate": 3.312073185525633e-07, "loss": 0.1094, "step": 19553 }, { "epoch": 2.7610844394238914, "grad_norm": 2.785924072233544, "learning_rate": 3.3081833842029563e-07, "loss": 0.1499, "step": 19554 }, { "epoch": 2.7612256424738773, "grad_norm": 3.2092281278603205, "learning_rate": 3.3042958299563386e-07, "loss": 0.1577, "step": 19555 }, { "epoch": 2.761366845523863, "grad_norm": 3.32422636749609, "learning_rate": 3.300410522876141e-07, "loss": 0.1721, "step": 19556 }, { "epoch": 2.761508048573849, "grad_norm": 3.971616868191806, "learning_rate": 3.2965274630526236e-07, "loss": 0.1653, "step": 19557 }, { "epoch": 2.761649251623835, "grad_norm": 2.926649857754778, "learning_rate": 3.292646650576037e-07, "loss": 0.1133, "step": 19558 }, { "epoch": 2.761790454673821, "grad_norm": 3.3607366060362582, "learning_rate": 3.2887680855365867e-07, "loss": 0.1251, "step": 19559 }, { "epoch": 2.7619316577238067, "grad_norm": 3.0315689725980435, "learning_rate": 3.284891768024401e-07, "loss": 0.149, "step": 19560 }, { "epoch": 2.7620728607737925, "grad_norm": 3.6866328655452283, "learning_rate": 3.281017698129563e-07, "loss": 0.2047, "step": 19561 }, { "epoch": 2.7622140638237784, "grad_norm": 3.4447937218122546, "learning_rate": 3.2771458759421005e-07, "loss": 0.1172, "step": 19562 }, { "epoch": 2.7623552668737643, "grad_norm": 3.7076393911278425, "learning_rate": 3.2732763015519977e-07, "loss": 0.1551, "step": 19563 }, { "epoch": 2.76249646992375, "grad_norm": 3.615581774096736, "learning_rate": 3.269408975049182e-07, "loss": 0.1255, "step": 19564 }, { "epoch": 2.762637672973736, "grad_norm": 2.7598868782790884, "learning_rate": 3.2655438965235265e-07, "loss": 0.1464, "step": 19565 }, { "epoch": 2.762778876023722, "grad_norm": 3.7200465880969062, "learning_rate": 3.261681066064859e-07, "loss": 0.1995, "step": 19566 }, { "epoch": 2.762920079073708, "grad_norm": 2.629106668080258, "learning_rate": 3.2578204837629414e-07, "loss": 0.1159, "step": 19567 }, { "epoch": 2.7630612821236937, "grad_norm": 3.30962640556775, "learning_rate": 3.25396214970749e-07, "loss": 0.143, "step": 19568 }, { "epoch": 2.7632024851736796, "grad_norm": 3.0980524477300904, "learning_rate": 3.250106063988179e-07, "loss": 0.1507, "step": 19569 }, { "epoch": 2.7633436882236655, "grad_norm": 3.215960454449379, "learning_rate": 3.2462522266946127e-07, "loss": 0.1378, "step": 19570 }, { "epoch": 2.7634848912736514, "grad_norm": 3.105049481043069, "learning_rate": 3.2424006379163764e-07, "loss": 0.1639, "step": 19571 }, { "epoch": 2.7636260943236373, "grad_norm": 3.2525499456536804, "learning_rate": 3.238551297742953e-07, "loss": 0.177, "step": 19572 }, { "epoch": 2.763767297373623, "grad_norm": 3.4288453143469755, "learning_rate": 3.234704206263828e-07, "loss": 0.095, "step": 19573 }, { "epoch": 2.763908500423609, "grad_norm": 4.549117535531631, "learning_rate": 3.230859363568373e-07, "loss": 0.1561, "step": 19574 }, { "epoch": 2.764049703473595, "grad_norm": 3.7502780011628896, "learning_rate": 3.22701676974595e-07, "loss": 0.1846, "step": 19575 }, { "epoch": 2.764190906523581, "grad_norm": 2.830901368806215, "learning_rate": 3.2231764248858656e-07, "loss": 0.1426, "step": 19576 }, { "epoch": 2.7643321095735667, "grad_norm": 2.6768742034239574, "learning_rate": 3.2193383290773705e-07, "loss": 0.1333, "step": 19577 }, { "epoch": 2.7644733126235526, "grad_norm": 3.184745903317428, "learning_rate": 3.215502482409649e-07, "loss": 0.1489, "step": 19578 }, { "epoch": 2.7646145156735384, "grad_norm": 4.4498840411925755, "learning_rate": 3.2116688849718637e-07, "loss": 0.1963, "step": 19579 }, { "epoch": 2.7647557187235243, "grad_norm": 2.255676671830971, "learning_rate": 3.207837536853087e-07, "loss": 0.1155, "step": 19580 }, { "epoch": 2.76489692177351, "grad_norm": 3.5540901215847125, "learning_rate": 3.2040084381423697e-07, "loss": 0.1474, "step": 19581 }, { "epoch": 2.765038124823496, "grad_norm": 2.683536738636351, "learning_rate": 3.2001815889286856e-07, "loss": 0.121, "step": 19582 }, { "epoch": 2.765179327873482, "grad_norm": 2.8248423243288183, "learning_rate": 3.196356989300986e-07, "loss": 0.1269, "step": 19583 }, { "epoch": 2.765320530923468, "grad_norm": 3.88574847785713, "learning_rate": 3.1925346393481327e-07, "loss": 0.1983, "step": 19584 }, { "epoch": 2.7654617339734537, "grad_norm": 2.819869384131492, "learning_rate": 3.188714539158977e-07, "loss": 0.1035, "step": 19585 }, { "epoch": 2.7656029370234396, "grad_norm": 2.8890683464789593, "learning_rate": 3.184896688822281e-07, "loss": 0.1242, "step": 19586 }, { "epoch": 2.7657441400734255, "grad_norm": 2.2804280871246907, "learning_rate": 3.181081088426774e-07, "loss": 0.0857, "step": 19587 }, { "epoch": 2.7658853431234114, "grad_norm": 3.3257308123049003, "learning_rate": 3.1772677380611185e-07, "loss": 0.1337, "step": 19588 }, { "epoch": 2.7660265461733973, "grad_norm": 3.048719385196497, "learning_rate": 3.1734566378139653e-07, "loss": 0.1325, "step": 19589 }, { "epoch": 2.766167749223383, "grad_norm": 3.628264071309252, "learning_rate": 3.1696477877738664e-07, "loss": 0.1498, "step": 19590 }, { "epoch": 2.766308952273369, "grad_norm": 3.452834365576766, "learning_rate": 3.165841188029328e-07, "loss": 0.1636, "step": 19591 }, { "epoch": 2.766450155323355, "grad_norm": 2.892655399266402, "learning_rate": 3.1620368386688137e-07, "loss": 0.1209, "step": 19592 }, { "epoch": 2.766591358373341, "grad_norm": 3.1115898387076046, "learning_rate": 3.158234739780741e-07, "loss": 0.121, "step": 19593 }, { "epoch": 2.7667325614233267, "grad_norm": 3.506284194986558, "learning_rate": 3.154434891453473e-07, "loss": 0.2141, "step": 19594 }, { "epoch": 2.7668737644733126, "grad_norm": 3.3580486162799734, "learning_rate": 3.1506372937753163e-07, "loss": 0.1519, "step": 19595 }, { "epoch": 2.7670149675232985, "grad_norm": 2.9799807487615966, "learning_rate": 3.1468419468345223e-07, "loss": 0.1425, "step": 19596 }, { "epoch": 2.7671561705732843, "grad_norm": 3.0452341473004583, "learning_rate": 3.143048850719299e-07, "loss": 0.1485, "step": 19597 }, { "epoch": 2.76729737362327, "grad_norm": 3.1282722469258126, "learning_rate": 3.1392580055177867e-07, "loss": 0.1544, "step": 19598 }, { "epoch": 2.767438576673256, "grad_norm": 3.338097040866428, "learning_rate": 3.135469411318082e-07, "loss": 0.1523, "step": 19599 }, { "epoch": 2.767579779723242, "grad_norm": 2.4413959824125393, "learning_rate": 3.131683068208247e-07, "loss": 0.1139, "step": 19600 }, { "epoch": 2.767720982773228, "grad_norm": 2.78138289531275, "learning_rate": 3.1278989762762556e-07, "loss": 0.1369, "step": 19601 }, { "epoch": 2.7678621858232137, "grad_norm": 3.5091370595881424, "learning_rate": 3.1241171356100606e-07, "loss": 0.1819, "step": 19602 }, { "epoch": 2.7680033888731996, "grad_norm": 3.2027577153979085, "learning_rate": 3.1203375462975474e-07, "loss": 0.1435, "step": 19603 }, { "epoch": 2.7681445919231855, "grad_norm": 3.6853600371966664, "learning_rate": 3.1165602084265446e-07, "loss": 0.1972, "step": 19604 }, { "epoch": 2.7682857949731714, "grad_norm": 2.813220419690726, "learning_rate": 3.1127851220848273e-07, "loss": 0.126, "step": 19605 }, { "epoch": 2.7684269980231573, "grad_norm": 3.2338330299545395, "learning_rate": 3.109012287360158e-07, "loss": 0.1534, "step": 19606 }, { "epoch": 2.768568201073143, "grad_norm": 3.619583014897995, "learning_rate": 3.1052417043402115e-07, "loss": 0.1537, "step": 19607 }, { "epoch": 2.768709404123129, "grad_norm": 3.1880480700565874, "learning_rate": 3.1014733731125955e-07, "loss": 0.1232, "step": 19608 }, { "epoch": 2.768850607173115, "grad_norm": 3.4992004678162063, "learning_rate": 3.0977072937648846e-07, "loss": 0.1805, "step": 19609 }, { "epoch": 2.768991810223101, "grad_norm": 3.635470892838778, "learning_rate": 3.093943466384597e-07, "loss": 0.1502, "step": 19610 }, { "epoch": 2.7691330132730867, "grad_norm": 2.708651875386631, "learning_rate": 3.0901818910592183e-07, "loss": 0.1126, "step": 19611 }, { "epoch": 2.7692742163230726, "grad_norm": 3.078773839199322, "learning_rate": 3.0864225678761684e-07, "loss": 0.1155, "step": 19612 }, { "epoch": 2.7694154193730585, "grad_norm": 2.909802303822352, "learning_rate": 3.082665496922799e-07, "loss": 0.1473, "step": 19613 }, { "epoch": 2.7695566224230443, "grad_norm": 2.883051784549847, "learning_rate": 3.0789106782864285e-07, "loss": 0.1378, "step": 19614 }, { "epoch": 2.7696978254730302, "grad_norm": 3.6010201404992817, "learning_rate": 3.0751581120543216e-07, "loss": 0.1948, "step": 19615 }, { "epoch": 2.769839028523016, "grad_norm": 4.147006348151358, "learning_rate": 3.071407798313686e-07, "loss": 0.1529, "step": 19616 }, { "epoch": 2.769980231573002, "grad_norm": 2.988242827480415, "learning_rate": 3.0676597371516627e-07, "loss": 0.1353, "step": 19617 }, { "epoch": 2.770121434622988, "grad_norm": 3.043948819837961, "learning_rate": 3.0639139286553707e-07, "loss": 0.1239, "step": 19618 }, { "epoch": 2.7702626376729738, "grad_norm": 2.8728634787079046, "learning_rate": 3.0601703729118524e-07, "loss": 0.1442, "step": 19619 }, { "epoch": 2.7704038407229596, "grad_norm": 3.7708298877762263, "learning_rate": 3.0564290700081044e-07, "loss": 0.1449, "step": 19620 }, { "epoch": 2.7705450437729455, "grad_norm": 3.207569431158485, "learning_rate": 3.0526900200310905e-07, "loss": 0.1156, "step": 19621 }, { "epoch": 2.7706862468229314, "grad_norm": 2.591919975616547, "learning_rate": 3.0489532230676744e-07, "loss": 0.1297, "step": 19622 }, { "epoch": 2.7708274498729173, "grad_norm": 3.473771002822263, "learning_rate": 3.04521867920472e-07, "loss": 0.1814, "step": 19623 }, { "epoch": 2.770968652922903, "grad_norm": 4.001109288340226, "learning_rate": 3.041486388529036e-07, "loss": 0.1855, "step": 19624 }, { "epoch": 2.771109855972889, "grad_norm": 2.6159585989590726, "learning_rate": 3.037756351127319e-07, "loss": 0.1049, "step": 19625 }, { "epoch": 2.771251059022875, "grad_norm": 3.0585527674668826, "learning_rate": 3.0340285670862667e-07, "loss": 0.162, "step": 19626 }, { "epoch": 2.771392262072861, "grad_norm": 3.0136793795050982, "learning_rate": 3.030303036492499e-07, "loss": 0.1187, "step": 19627 }, { "epoch": 2.7715334651228467, "grad_norm": 3.071604381672808, "learning_rate": 3.026579759432635e-07, "loss": 0.1428, "step": 19628 }, { "epoch": 2.7716746681728326, "grad_norm": 2.631543688552586, "learning_rate": 3.0228587359931726e-07, "loss": 0.1093, "step": 19629 }, { "epoch": 2.7718158712228185, "grad_norm": 3.4726543561840963, "learning_rate": 3.019139966260587e-07, "loss": 0.1354, "step": 19630 }, { "epoch": 2.7719570742728044, "grad_norm": 3.594581741869066, "learning_rate": 3.015423450321309e-07, "loss": 0.1615, "step": 19631 }, { "epoch": 2.7720982773227902, "grad_norm": 3.268048914941403, "learning_rate": 3.0117091882617025e-07, "loss": 0.19, "step": 19632 }, { "epoch": 2.772239480372776, "grad_norm": 3.225866639173116, "learning_rate": 3.0079971801680876e-07, "loss": 0.1416, "step": 19633 }, { "epoch": 2.772380683422762, "grad_norm": 2.7167347944405287, "learning_rate": 3.0042874261267395e-07, "loss": 0.1251, "step": 19634 }, { "epoch": 2.772521886472748, "grad_norm": 3.4548878737418685, "learning_rate": 3.0005799262238565e-07, "loss": 0.2028, "step": 19635 }, { "epoch": 2.7726630895227338, "grad_norm": 3.267261089051461, "learning_rate": 2.996874680545603e-07, "loss": 0.1485, "step": 19636 }, { "epoch": 2.7728042925727197, "grad_norm": 3.0673821495415288, "learning_rate": 2.993171689178098e-07, "loss": 0.1481, "step": 19637 }, { "epoch": 2.7729454956227055, "grad_norm": 3.1198652904547965, "learning_rate": 2.989470952207385e-07, "loss": 0.1562, "step": 19638 }, { "epoch": 2.7730866986726914, "grad_norm": 3.434027546801516, "learning_rate": 2.9857724697194503e-07, "loss": 0.1358, "step": 19639 }, { "epoch": 2.7732279017226773, "grad_norm": 3.009952851218732, "learning_rate": 2.9820762418002916e-07, "loss": 0.1567, "step": 19640 }, { "epoch": 2.773369104772663, "grad_norm": 2.986340432938195, "learning_rate": 2.9783822685357844e-07, "loss": 0.1186, "step": 19641 }, { "epoch": 2.773510307822649, "grad_norm": 3.4142620313609298, "learning_rate": 2.9746905500117604e-07, "loss": 0.1741, "step": 19642 }, { "epoch": 2.773651510872635, "grad_norm": 2.9057243173812366, "learning_rate": 2.971001086314029e-07, "loss": 0.086, "step": 19643 }, { "epoch": 2.773792713922621, "grad_norm": 3.010197026934564, "learning_rate": 2.967313877528322e-07, "loss": 0.1447, "step": 19644 }, { "epoch": 2.7739339169726067, "grad_norm": 3.644161942264826, "learning_rate": 2.963628923740347e-07, "loss": 0.1673, "step": 19645 }, { "epoch": 2.7740751200225926, "grad_norm": 3.3068130870957018, "learning_rate": 2.959946225035726e-07, "loss": 0.1462, "step": 19646 }, { "epoch": 2.7742163230725785, "grad_norm": 3.60932267103286, "learning_rate": 2.956265781500045e-07, "loss": 0.1617, "step": 19647 }, { "epoch": 2.7743575261225644, "grad_norm": 4.932320028245283, "learning_rate": 2.9525875932188365e-07, "loss": 0.2174, "step": 19648 }, { "epoch": 2.7744987291725502, "grad_norm": 3.3948479607938826, "learning_rate": 2.948911660277587e-07, "loss": 0.1326, "step": 19649 }, { "epoch": 2.774639932222536, "grad_norm": 3.783702551417677, "learning_rate": 2.945237982761706e-07, "loss": 0.1626, "step": 19650 }, { "epoch": 2.774781135272522, "grad_norm": 4.169890797744005, "learning_rate": 2.9415665607565923e-07, "loss": 0.205, "step": 19651 }, { "epoch": 2.774922338322508, "grad_norm": 4.104137848682385, "learning_rate": 2.937897394347544e-07, "loss": 0.1396, "step": 19652 }, { "epoch": 2.775063541372494, "grad_norm": 3.132945152743542, "learning_rate": 2.9342304836198486e-07, "loss": 0.1195, "step": 19653 }, { "epoch": 2.7752047444224797, "grad_norm": 2.5290306760723222, "learning_rate": 2.930565828658716e-07, "loss": 0.1425, "step": 19654 }, { "epoch": 2.7753459474724655, "grad_norm": 3.539552893927693, "learning_rate": 2.9269034295493105e-07, "loss": 0.1444, "step": 19655 }, { "epoch": 2.7754871505224514, "grad_norm": 4.111920421623204, "learning_rate": 2.9232432863767424e-07, "loss": 0.1482, "step": 19656 }, { "epoch": 2.7756283535724373, "grad_norm": 2.7723911845093903, "learning_rate": 2.919585399226077e-07, "loss": 0.1076, "step": 19657 }, { "epoch": 2.775769556622423, "grad_norm": 3.419980122406715, "learning_rate": 2.915929768182335e-07, "loss": 0.1518, "step": 19658 }, { "epoch": 2.775910759672409, "grad_norm": 3.4917504937148616, "learning_rate": 2.912276393330449e-07, "loss": 0.1817, "step": 19659 }, { "epoch": 2.776051962722395, "grad_norm": 3.3454459788807642, "learning_rate": 2.908625274755339e-07, "loss": 0.1794, "step": 19660 }, { "epoch": 2.776193165772381, "grad_norm": 3.4914202941411046, "learning_rate": 2.9049764125418266e-07, "loss": 0.1404, "step": 19661 }, { "epoch": 2.7763343688223667, "grad_norm": 2.7471783258963773, "learning_rate": 2.901329806774744e-07, "loss": 0.1158, "step": 19662 }, { "epoch": 2.7764755718723526, "grad_norm": 2.51868242038154, "learning_rate": 2.8976854575388235e-07, "loss": 0.0927, "step": 19663 }, { "epoch": 2.7766167749223385, "grad_norm": 4.019283736065937, "learning_rate": 2.8940433649187525e-07, "loss": 0.1912, "step": 19664 }, { "epoch": 2.7767579779723244, "grad_norm": 3.1779389002310805, "learning_rate": 2.890403528999175e-07, "loss": 0.1722, "step": 19665 }, { "epoch": 2.7768991810223103, "grad_norm": 3.33049382278745, "learning_rate": 2.8867659498647e-07, "loss": 0.1575, "step": 19666 }, { "epoch": 2.777040384072296, "grad_norm": 2.9521074943509267, "learning_rate": 2.8831306275998174e-07, "loss": 0.1014, "step": 19667 }, { "epoch": 2.777181587122282, "grad_norm": 2.352915037544021, "learning_rate": 2.8794975622890573e-07, "loss": 0.0975, "step": 19668 }, { "epoch": 2.777322790172268, "grad_norm": 2.624389168238163, "learning_rate": 2.8758667540168203e-07, "loss": 0.074, "step": 19669 }, { "epoch": 2.777463993222254, "grad_norm": 3.537142570820614, "learning_rate": 2.8722382028675055e-07, "loss": 0.1429, "step": 19670 }, { "epoch": 2.7776051962722397, "grad_norm": 2.343178475189642, "learning_rate": 2.8686119089254227e-07, "loss": 0.102, "step": 19671 }, { "epoch": 2.7777463993222256, "grad_norm": 2.8945583062665063, "learning_rate": 2.864987872274849e-07, "loss": 0.1403, "step": 19672 }, { "epoch": 2.7778876023722114, "grad_norm": 3.124781648698604, "learning_rate": 2.861366092999995e-07, "loss": 0.1243, "step": 19673 }, { "epoch": 2.7780288054221973, "grad_norm": 3.3806360372503845, "learning_rate": 2.8577465711850605e-07, "loss": 0.157, "step": 19674 }, { "epoch": 2.778170008472183, "grad_norm": 3.716008540569517, "learning_rate": 2.854129306914144e-07, "loss": 0.1958, "step": 19675 }, { "epoch": 2.778311211522169, "grad_norm": 3.051211052528312, "learning_rate": 2.8505143002713007e-07, "loss": 0.1426, "step": 19676 }, { "epoch": 2.778452414572155, "grad_norm": 4.467449331854612, "learning_rate": 2.8469015513405527e-07, "loss": 0.1951, "step": 19677 }, { "epoch": 2.778593617622141, "grad_norm": 2.988701156351104, "learning_rate": 2.843291060205855e-07, "loss": 0.1484, "step": 19678 }, { "epoch": 2.7787348206721267, "grad_norm": 2.6660472853782116, "learning_rate": 2.839682826951107e-07, "loss": 0.0902, "step": 19679 }, { "epoch": 2.7788760237221126, "grad_norm": 3.1476281517659035, "learning_rate": 2.8360768516601745e-07, "loss": 0.164, "step": 19680 }, { "epoch": 2.7790172267720985, "grad_norm": 3.0024305256365222, "learning_rate": 2.8324731344168575e-07, "loss": 0.1586, "step": 19681 }, { "epoch": 2.7791584298220844, "grad_norm": 3.01605984156517, "learning_rate": 2.8288716753049007e-07, "loss": 0.1611, "step": 19682 }, { "epoch": 2.7792996328720703, "grad_norm": 3.3965850061955183, "learning_rate": 2.825272474408014e-07, "loss": 0.165, "step": 19683 }, { "epoch": 2.779440835922056, "grad_norm": 2.466355255491916, "learning_rate": 2.821675531809809e-07, "loss": 0.1272, "step": 19684 }, { "epoch": 2.779582038972042, "grad_norm": 2.818385815926966, "learning_rate": 2.818080847593896e-07, "loss": 0.091, "step": 19685 }, { "epoch": 2.779723242022028, "grad_norm": 3.0318500771567805, "learning_rate": 2.814488421843831e-07, "loss": 0.1345, "step": 19686 }, { "epoch": 2.779864445072014, "grad_norm": 3.20740407924879, "learning_rate": 2.8108982546430687e-07, "loss": 0.1501, "step": 19687 }, { "epoch": 2.7800056481219997, "grad_norm": 3.127838158761328, "learning_rate": 2.8073103460750653e-07, "loss": 0.172, "step": 19688 }, { "epoch": 2.7801468511719856, "grad_norm": 2.8054312851552163, "learning_rate": 2.803724696223198e-07, "loss": 0.1796, "step": 19689 }, { "epoch": 2.7802880542219714, "grad_norm": 3.7679348900621985, "learning_rate": 2.800141305170789e-07, "loss": 0.2012, "step": 19690 }, { "epoch": 2.7804292572719573, "grad_norm": 2.702551922135359, "learning_rate": 2.796560173001106e-07, "loss": 0.1328, "step": 19691 }, { "epoch": 2.780570460321943, "grad_norm": 3.148342556969039, "learning_rate": 2.7929812997974036e-07, "loss": 0.1585, "step": 19692 }, { "epoch": 2.780711663371929, "grad_norm": 4.168077721357103, "learning_rate": 2.789404685642827e-07, "loss": 0.1393, "step": 19693 }, { "epoch": 2.780852866421915, "grad_norm": 2.5595603181145132, "learning_rate": 2.785830330620509e-07, "loss": 0.1268, "step": 19694 }, { "epoch": 2.780994069471901, "grad_norm": 2.619623082654432, "learning_rate": 2.782258234813506e-07, "loss": 0.1098, "step": 19695 }, { "epoch": 2.7811352725218867, "grad_norm": 3.21743862806986, "learning_rate": 2.7786883983048294e-07, "loss": 0.1277, "step": 19696 }, { "epoch": 2.781276475571872, "grad_norm": 3.109280286698981, "learning_rate": 2.775120821177457e-07, "loss": 0.1634, "step": 19697 }, { "epoch": 2.781417678621858, "grad_norm": 3.9849288504100047, "learning_rate": 2.771555503514289e-07, "loss": 0.1688, "step": 19698 }, { "epoch": 2.781558881671844, "grad_norm": 3.246484610287638, "learning_rate": 2.7679924453981823e-07, "loss": 0.1692, "step": 19699 }, { "epoch": 2.78170008472183, "grad_norm": 2.7868735532814637, "learning_rate": 2.764431646911947e-07, "loss": 0.1352, "step": 19700 }, { "epoch": 2.7818412877718157, "grad_norm": 2.595928869104256, "learning_rate": 2.7608731081383065e-07, "loss": 0.1055, "step": 19701 }, { "epoch": 2.7819824908218016, "grad_norm": 2.177425698604991, "learning_rate": 2.757316829159995e-07, "loss": 0.0971, "step": 19702 }, { "epoch": 2.7821236938717875, "grad_norm": 3.110636862022969, "learning_rate": 2.7537628100596457e-07, "loss": 0.1289, "step": 19703 }, { "epoch": 2.7822648969217734, "grad_norm": 3.7928507531082705, "learning_rate": 2.750211050919849e-07, "loss": 0.1869, "step": 19704 }, { "epoch": 2.7824060999717593, "grad_norm": 4.183989018962651, "learning_rate": 2.746661551823149e-07, "loss": 0.1557, "step": 19705 }, { "epoch": 2.782547303021745, "grad_norm": 3.6501848601041575, "learning_rate": 2.7431143128520243e-07, "loss": 0.1414, "step": 19706 }, { "epoch": 2.782688506071731, "grad_norm": 3.3117378285897447, "learning_rate": 2.739569334088932e-07, "loss": 0.1587, "step": 19707 }, { "epoch": 2.782829709121717, "grad_norm": 3.2764709001460144, "learning_rate": 2.7360266156162274e-07, "loss": 0.1224, "step": 19708 }, { "epoch": 2.782970912171703, "grad_norm": 3.590277352324747, "learning_rate": 2.7324861575162897e-07, "loss": 0.1779, "step": 19709 }, { "epoch": 2.7831121152216887, "grad_norm": 3.433827682210781, "learning_rate": 2.728947959871353e-07, "loss": 0.1516, "step": 19710 }, { "epoch": 2.7832533182716745, "grad_norm": 3.0805470291293213, "learning_rate": 2.7254120227636514e-07, "loss": 0.136, "step": 19711 }, { "epoch": 2.7833945213216604, "grad_norm": 3.3961867613309766, "learning_rate": 2.721878346275364e-07, "loss": 0.1549, "step": 19712 }, { "epoch": 2.7835357243716463, "grad_norm": 2.94879791081446, "learning_rate": 2.7183469304886136e-07, "loss": 0.1225, "step": 19713 }, { "epoch": 2.783676927421632, "grad_norm": 3.0960782423771604, "learning_rate": 2.714817775485468e-07, "loss": 0.131, "step": 19714 }, { "epoch": 2.783818130471618, "grad_norm": 3.4196282586828515, "learning_rate": 2.71129088134795e-07, "loss": 0.1757, "step": 19715 }, { "epoch": 2.783959333521604, "grad_norm": 2.7597716761271087, "learning_rate": 2.707766248158006e-07, "loss": 0.1525, "step": 19716 }, { "epoch": 2.78410053657159, "grad_norm": 2.81462405283787, "learning_rate": 2.704243875997581e-07, "loss": 0.1214, "step": 19717 }, { "epoch": 2.7842417396215757, "grad_norm": 3.4395900142089437, "learning_rate": 2.7007237649484763e-07, "loss": 0.1562, "step": 19718 }, { "epoch": 2.7843829426715616, "grad_norm": 4.7799866702662275, "learning_rate": 2.697205915092549e-07, "loss": 0.1751, "step": 19719 }, { "epoch": 2.7845241457215475, "grad_norm": 2.285274510292052, "learning_rate": 2.693690326511533e-07, "loss": 0.0999, "step": 19720 }, { "epoch": 2.7846653487715334, "grad_norm": 2.339440104717664, "learning_rate": 2.6901769992871305e-07, "loss": 0.1109, "step": 19721 }, { "epoch": 2.7848065518215193, "grad_norm": 3.276646308719734, "learning_rate": 2.686665933500987e-07, "loss": 0.1353, "step": 19722 }, { "epoch": 2.784947754871505, "grad_norm": 3.0191103745595247, "learning_rate": 2.683157129234704e-07, "loss": 0.1402, "step": 19723 }, { "epoch": 2.785088957921491, "grad_norm": 3.590451349502683, "learning_rate": 2.6796505865698263e-07, "loss": 0.139, "step": 19724 }, { "epoch": 2.785230160971477, "grad_norm": 3.3609377698126974, "learning_rate": 2.6761463055878347e-07, "loss": 0.1708, "step": 19725 }, { "epoch": 2.785371364021463, "grad_norm": 3.595471138314666, "learning_rate": 2.672644286370163e-07, "loss": 0.1761, "step": 19726 }, { "epoch": 2.7855125670714487, "grad_norm": 3.1227629959426206, "learning_rate": 2.669144528998213e-07, "loss": 0.1436, "step": 19727 }, { "epoch": 2.7856537701214346, "grad_norm": 3.386613925536485, "learning_rate": 2.665647033553309e-07, "loss": 0.1642, "step": 19728 }, { "epoch": 2.7857949731714204, "grad_norm": 2.527652899813678, "learning_rate": 2.662151800116741e-07, "loss": 0.1288, "step": 19729 }, { "epoch": 2.7859361762214063, "grad_norm": 3.8872211458799413, "learning_rate": 2.6586588287697114e-07, "loss": 0.1523, "step": 19730 }, { "epoch": 2.786077379271392, "grad_norm": 2.9889262537859183, "learning_rate": 2.655168119593421e-07, "loss": 0.1405, "step": 19731 }, { "epoch": 2.786218582321378, "grad_norm": 2.5078425801655095, "learning_rate": 2.651679672668983e-07, "loss": 0.1147, "step": 19732 }, { "epoch": 2.786359785371364, "grad_norm": 2.5971809792033573, "learning_rate": 2.6481934880774663e-07, "loss": 0.115, "step": 19733 }, { "epoch": 2.78650098842135, "grad_norm": 2.985195835689216, "learning_rate": 2.6447095658999054e-07, "loss": 0.1462, "step": 19734 }, { "epoch": 2.7866421914713357, "grad_norm": 3.212421410801023, "learning_rate": 2.641227906217225e-07, "loss": 0.1547, "step": 19735 }, { "epoch": 2.7867833945213216, "grad_norm": 4.748936257900525, "learning_rate": 2.6377485091103825e-07, "loss": 0.1967, "step": 19736 }, { "epoch": 2.7869245975713075, "grad_norm": 3.8002290377331396, "learning_rate": 2.6342713746602023e-07, "loss": 0.1831, "step": 19737 }, { "epoch": 2.7870658006212934, "grad_norm": 2.360553086865986, "learning_rate": 2.630796502947519e-07, "loss": 0.0976, "step": 19738 }, { "epoch": 2.7872070036712793, "grad_norm": 3.6785745156118037, "learning_rate": 2.6273238940530686e-07, "loss": 0.1779, "step": 19739 }, { "epoch": 2.787348206721265, "grad_norm": 3.7272884658778564, "learning_rate": 2.6238535480575533e-07, "loss": 0.1956, "step": 19740 }, { "epoch": 2.787489409771251, "grad_norm": 3.4420236919899003, "learning_rate": 2.6203854650416307e-07, "loss": 0.1869, "step": 19741 }, { "epoch": 2.787630612821237, "grad_norm": 3.0041869355223816, "learning_rate": 2.616919645085902e-07, "loss": 0.144, "step": 19742 }, { "epoch": 2.787771815871223, "grad_norm": 3.2824009915361936, "learning_rate": 2.613456088270894e-07, "loss": 0.1551, "step": 19743 }, { "epoch": 2.7879130189212087, "grad_norm": 3.48185830588085, "learning_rate": 2.609994794677118e-07, "loss": 0.1795, "step": 19744 }, { "epoch": 2.7880542219711946, "grad_norm": 3.52228696731064, "learning_rate": 2.6065357643849985e-07, "loss": 0.1428, "step": 19745 }, { "epoch": 2.7881954250211805, "grad_norm": 4.370579440122452, "learning_rate": 2.6030789974749285e-07, "loss": 0.1796, "step": 19746 }, { "epoch": 2.7883366280711663, "grad_norm": 2.926163528297969, "learning_rate": 2.599624494027231e-07, "loss": 0.1345, "step": 19747 }, { "epoch": 2.788477831121152, "grad_norm": 3.5434999724346676, "learning_rate": 2.596172254122209e-07, "loss": 0.1573, "step": 19748 }, { "epoch": 2.788619034171138, "grad_norm": 2.9469587234222234, "learning_rate": 2.592722277840065e-07, "loss": 0.1059, "step": 19749 }, { "epoch": 2.788760237221124, "grad_norm": 2.884543768726291, "learning_rate": 2.589274565261002e-07, "loss": 0.1253, "step": 19750 }, { "epoch": 2.78890144027111, "grad_norm": 3.6432340478225904, "learning_rate": 2.585829116465133e-07, "loss": 0.1821, "step": 19751 }, { "epoch": 2.7890426433210957, "grad_norm": 3.905786455544547, "learning_rate": 2.582385931532505e-07, "loss": 0.1271, "step": 19752 }, { "epoch": 2.7891838463710816, "grad_norm": 3.3668998324564345, "learning_rate": 2.578945010543177e-07, "loss": 0.1415, "step": 19753 }, { "epoch": 2.7893250494210675, "grad_norm": 3.6171165129076623, "learning_rate": 2.575506353577084e-07, "loss": 0.1754, "step": 19754 }, { "epoch": 2.7894662524710534, "grad_norm": 3.558362181852943, "learning_rate": 2.5720699607141517e-07, "loss": 0.1525, "step": 19755 }, { "epoch": 2.7896074555210393, "grad_norm": 2.6752172064437993, "learning_rate": 2.5686358320342387e-07, "loss": 0.1303, "step": 19756 }, { "epoch": 2.789748658571025, "grad_norm": 3.221259491270782, "learning_rate": 2.565203967617147e-07, "loss": 0.1756, "step": 19757 }, { "epoch": 2.789889861621011, "grad_norm": 2.8220728264288333, "learning_rate": 2.5617743675426354e-07, "loss": 0.1119, "step": 19758 }, { "epoch": 2.790031064670997, "grad_norm": 3.078113985719411, "learning_rate": 2.558347031890418e-07, "loss": 0.1381, "step": 19759 }, { "epoch": 2.790172267720983, "grad_norm": 2.861955372763654, "learning_rate": 2.55492196074012e-07, "loss": 0.1415, "step": 19760 }, { "epoch": 2.7903134707709687, "grad_norm": 3.557964171112699, "learning_rate": 2.5514991541713664e-07, "loss": 0.1746, "step": 19761 }, { "epoch": 2.7904546738209546, "grad_norm": 3.5988090245121094, "learning_rate": 2.5480786122636713e-07, "loss": 0.1626, "step": 19762 }, { "epoch": 2.7905958768709405, "grad_norm": 3.096196558917275, "learning_rate": 2.5446603350965606e-07, "loss": 0.1145, "step": 19763 }, { "epoch": 2.7907370799209263, "grad_norm": 3.218223223297159, "learning_rate": 2.5412443227494365e-07, "loss": 0.1576, "step": 19764 }, { "epoch": 2.7908782829709122, "grad_norm": 3.195011441759945, "learning_rate": 2.537830575301714e-07, "loss": 0.1454, "step": 19765 }, { "epoch": 2.791019486020898, "grad_norm": 3.2710454837699676, "learning_rate": 2.534419092832718e-07, "loss": 0.1309, "step": 19766 }, { "epoch": 2.791160689070884, "grad_norm": 4.089959851962814, "learning_rate": 2.531009875421731e-07, "loss": 0.1627, "step": 19767 }, { "epoch": 2.79130189212087, "grad_norm": 2.48870334664276, "learning_rate": 2.527602923147998e-07, "loss": 0.1089, "step": 19768 }, { "epoch": 2.7914430951708558, "grad_norm": 3.868846461620955, "learning_rate": 2.524198236090658e-07, "loss": 0.1848, "step": 19769 }, { "epoch": 2.7915842982208416, "grad_norm": 2.941945756781059, "learning_rate": 2.520795814328847e-07, "loss": 0.1099, "step": 19770 }, { "epoch": 2.7917255012708275, "grad_norm": 3.0437126894051496, "learning_rate": 2.517395657941657e-07, "loss": 0.1423, "step": 19771 }, { "epoch": 2.7918667043208134, "grad_norm": 3.416545823309591, "learning_rate": 2.513997767008092e-07, "loss": 0.1611, "step": 19772 }, { "epoch": 2.7920079073707993, "grad_norm": 3.4982600948230176, "learning_rate": 2.51060214160711e-07, "loss": 0.1635, "step": 19773 }, { "epoch": 2.792149110420785, "grad_norm": 3.4204796345548076, "learning_rate": 2.507208781817638e-07, "loss": 0.158, "step": 19774 }, { "epoch": 2.792290313470771, "grad_norm": 2.7485241680568326, "learning_rate": 2.503817687718535e-07, "loss": 0.0979, "step": 19775 }, { "epoch": 2.792431516520757, "grad_norm": 3.274028409615973, "learning_rate": 2.500428859388593e-07, "loss": 0.1522, "step": 19776 }, { "epoch": 2.792572719570743, "grad_norm": 2.910238675797789, "learning_rate": 2.4970422969065823e-07, "loss": 0.1356, "step": 19777 }, { "epoch": 2.7927139226207287, "grad_norm": 3.7489227791941104, "learning_rate": 2.4936580003512066e-07, "loss": 0.1631, "step": 19778 }, { "epoch": 2.7928551256707146, "grad_norm": 2.624590953545149, "learning_rate": 2.4902759698011036e-07, "loss": 0.0993, "step": 19779 }, { "epoch": 2.7929963287207005, "grad_norm": 3.196320439324553, "learning_rate": 2.4868962053348764e-07, "loss": 0.133, "step": 19780 }, { "epoch": 2.7931375317706864, "grad_norm": 2.543056808495935, "learning_rate": 2.483518707031063e-07, "loss": 0.1097, "step": 19781 }, { "epoch": 2.7932787348206722, "grad_norm": 3.2965514813937213, "learning_rate": 2.4801434749681553e-07, "loss": 0.1644, "step": 19782 }, { "epoch": 2.793419937870658, "grad_norm": 2.6367521778865495, "learning_rate": 2.476770509224613e-07, "loss": 0.1171, "step": 19783 }, { "epoch": 2.793561140920644, "grad_norm": 2.8136171927157743, "learning_rate": 2.473399809878807e-07, "loss": 0.0926, "step": 19784 }, { "epoch": 2.79370234397063, "grad_norm": 2.587668848106781, "learning_rate": 2.4700313770090745e-07, "loss": 0.0886, "step": 19785 }, { "epoch": 2.7938435470206158, "grad_norm": 3.080842706029899, "learning_rate": 2.466665210693686e-07, "loss": 0.1358, "step": 19786 }, { "epoch": 2.7939847500706017, "grad_norm": 3.0970160914555698, "learning_rate": 2.463301311010857e-07, "loss": 0.1641, "step": 19787 }, { "epoch": 2.7941259531205875, "grad_norm": 4.084603500357253, "learning_rate": 2.459939678038803e-07, "loss": 0.1877, "step": 19788 }, { "epoch": 2.7942671561705734, "grad_norm": 3.0875846681330144, "learning_rate": 2.4565803118556273e-07, "loss": 0.1621, "step": 19789 }, { "epoch": 2.7944083592205593, "grad_norm": 2.904387360432018, "learning_rate": 2.453223212539391e-07, "loss": 0.137, "step": 19790 }, { "epoch": 2.794549562270545, "grad_norm": 2.890734556719952, "learning_rate": 2.44986838016813e-07, "loss": 0.1404, "step": 19791 }, { "epoch": 2.794690765320531, "grad_norm": 2.9278152837346214, "learning_rate": 2.446515814819794e-07, "loss": 0.144, "step": 19792 }, { "epoch": 2.794831968370517, "grad_norm": 2.575229335020528, "learning_rate": 2.443165516572299e-07, "loss": 0.1222, "step": 19793 }, { "epoch": 2.794973171420503, "grad_norm": 3.0127385635915798, "learning_rate": 2.4398174855035037e-07, "loss": 0.1459, "step": 19794 }, { "epoch": 2.7951143744704887, "grad_norm": 2.996121451302781, "learning_rate": 2.4364717216912246e-07, "loss": 0.1458, "step": 19795 }, { "epoch": 2.7952555775204746, "grad_norm": 3.6520492088059693, "learning_rate": 2.4331282252132103e-07, "loss": 0.1478, "step": 19796 }, { "epoch": 2.7953967805704605, "grad_norm": 2.779089676498598, "learning_rate": 2.4297869961471544e-07, "loss": 0.1359, "step": 19797 }, { "epoch": 2.7955379836204464, "grad_norm": 2.804376087681676, "learning_rate": 2.4264480345707053e-07, "loss": 0.132, "step": 19798 }, { "epoch": 2.795679186670432, "grad_norm": 2.8383130752549524, "learning_rate": 2.4231113405614684e-07, "loss": 0.112, "step": 19799 }, { "epoch": 2.7958203897204177, "grad_norm": 2.908452757002246, "learning_rate": 2.419776914196981e-07, "loss": 0.1513, "step": 19800 }, { "epoch": 2.7959615927704036, "grad_norm": 3.2273178864815106, "learning_rate": 2.4164447555547475e-07, "loss": 0.1378, "step": 19801 }, { "epoch": 2.7961027958203895, "grad_norm": 3.9839313726308414, "learning_rate": 2.413114864712196e-07, "loss": 0.1882, "step": 19802 }, { "epoch": 2.7962439988703753, "grad_norm": 3.291626911587527, "learning_rate": 2.4097872417467085e-07, "loss": 0.1711, "step": 19803 }, { "epoch": 2.796385201920361, "grad_norm": 2.578288245895344, "learning_rate": 2.4064618867356003e-07, "loss": 0.1164, "step": 19804 }, { "epoch": 2.796526404970347, "grad_norm": 3.3662448965881757, "learning_rate": 2.4031387997561885e-07, "loss": 0.176, "step": 19805 }, { "epoch": 2.796667608020333, "grad_norm": 3.515644057236048, "learning_rate": 2.399817980885677e-07, "loss": 0.1685, "step": 19806 }, { "epoch": 2.796808811070319, "grad_norm": 3.405302845096384, "learning_rate": 2.396499430201249e-07, "loss": 0.1598, "step": 19807 }, { "epoch": 2.7969500141203048, "grad_norm": 2.8651385750390284, "learning_rate": 2.3931831477800207e-07, "loss": 0.0948, "step": 19808 }, { "epoch": 2.7970912171702906, "grad_norm": 3.66639107502556, "learning_rate": 2.389869133699063e-07, "loss": 0.1405, "step": 19809 }, { "epoch": 2.7972324202202765, "grad_norm": 2.759372017256367, "learning_rate": 2.3865573880353933e-07, "loss": 0.1373, "step": 19810 }, { "epoch": 2.7973736232702624, "grad_norm": 4.238204390451638, "learning_rate": 2.3832479108659712e-07, "loss": 0.2025, "step": 19811 }, { "epoch": 2.7975148263202483, "grad_norm": 3.0515705610389485, "learning_rate": 2.3799407022677022e-07, "loss": 0.1355, "step": 19812 }, { "epoch": 2.797656029370234, "grad_norm": 3.253519447937637, "learning_rate": 2.3766357623174697e-07, "loss": 0.1332, "step": 19813 }, { "epoch": 2.79779723242022, "grad_norm": 2.7405457744602217, "learning_rate": 2.373333091092056e-07, "loss": 0.1387, "step": 19814 }, { "epoch": 2.797938435470206, "grad_norm": 2.4886015473770624, "learning_rate": 2.3700326886682113e-07, "loss": 0.0992, "step": 19815 }, { "epoch": 2.798079638520192, "grad_norm": 3.2068818422431193, "learning_rate": 2.3667345551226406e-07, "loss": 0.1491, "step": 19816 }, { "epoch": 2.7982208415701777, "grad_norm": 3.1164620004700785, "learning_rate": 2.3634386905320051e-07, "loss": 0.1107, "step": 19817 }, { "epoch": 2.7983620446201636, "grad_norm": 3.421131878222402, "learning_rate": 2.3601450949728876e-07, "loss": 0.1988, "step": 19818 }, { "epoch": 2.7985032476701495, "grad_norm": 3.0871150932237823, "learning_rate": 2.3568537685218386e-07, "loss": 0.1577, "step": 19819 }, { "epoch": 2.7986444507201353, "grad_norm": 2.9761272613025387, "learning_rate": 2.3535647112553295e-07, "loss": 0.1227, "step": 19820 }, { "epoch": 2.7987856537701212, "grad_norm": 4.20404782126191, "learning_rate": 2.3502779232497996e-07, "loss": 0.1745, "step": 19821 }, { "epoch": 2.798926856820107, "grad_norm": 2.863490475552275, "learning_rate": 2.3469934045816435e-07, "loss": 0.1136, "step": 19822 }, { "epoch": 2.799068059870093, "grad_norm": 3.8291560660389274, "learning_rate": 2.3437111553271884e-07, "loss": 0.1959, "step": 19823 }, { "epoch": 2.799209262920079, "grad_norm": 2.616143584175229, "learning_rate": 2.3404311755627184e-07, "loss": 0.1214, "step": 19824 }, { "epoch": 2.7993504659700648, "grad_norm": 3.599342567482938, "learning_rate": 2.33715346536445e-07, "loss": 0.1568, "step": 19825 }, { "epoch": 2.7994916690200506, "grad_norm": 2.3097396507714647, "learning_rate": 2.3338780248085557e-07, "loss": 0.086, "step": 19826 }, { "epoch": 2.7996328720700365, "grad_norm": 3.3857436165788224, "learning_rate": 2.3306048539711523e-07, "loss": 0.1563, "step": 19827 }, { "epoch": 2.7997740751200224, "grad_norm": 3.0497383476757154, "learning_rate": 2.3273339529283123e-07, "loss": 0.1321, "step": 19828 }, { "epoch": 2.7999152781700083, "grad_norm": 2.549700587600999, "learning_rate": 2.3240653217560528e-07, "loss": 0.1072, "step": 19829 }, { "epoch": 2.800056481219994, "grad_norm": 3.202581099256297, "learning_rate": 2.320798960530335e-07, "loss": 0.133, "step": 19830 }, { "epoch": 2.80019768426998, "grad_norm": 3.3826781173382128, "learning_rate": 2.317534869327065e-07, "loss": 0.1503, "step": 19831 }, { "epoch": 2.800338887319966, "grad_norm": 2.8956930173423774, "learning_rate": 2.314273048222093e-07, "loss": 0.104, "step": 19832 }, { "epoch": 2.800480090369952, "grad_norm": 2.9903381989421773, "learning_rate": 2.3110134972912257e-07, "loss": 0.1129, "step": 19833 }, { "epoch": 2.8006212934199377, "grad_norm": 2.882903487631556, "learning_rate": 2.307756216610224e-07, "loss": 0.1179, "step": 19834 }, { "epoch": 2.8007624964699236, "grad_norm": 2.8401168358658, "learning_rate": 2.3045012062547723e-07, "loss": 0.1051, "step": 19835 }, { "epoch": 2.8009036995199095, "grad_norm": 3.371857572473701, "learning_rate": 2.301248466300543e-07, "loss": 0.156, "step": 19836 }, { "epoch": 2.8010449025698954, "grad_norm": 3.096511181361602, "learning_rate": 2.297997996823087e-07, "loss": 0.1321, "step": 19837 }, { "epoch": 2.8011861056198812, "grad_norm": 2.812072138075477, "learning_rate": 2.294749797897955e-07, "loss": 0.1288, "step": 19838 }, { "epoch": 2.801327308669867, "grad_norm": 3.84347055311232, "learning_rate": 2.2915038696006532e-07, "loss": 0.1884, "step": 19839 }, { "epoch": 2.801468511719853, "grad_norm": 2.8362311181094486, "learning_rate": 2.288260212006599e-07, "loss": 0.1132, "step": 19840 }, { "epoch": 2.801609714769839, "grad_norm": 3.2495479260667572, "learning_rate": 2.2850188251911877e-07, "loss": 0.1335, "step": 19841 }, { "epoch": 2.8017509178198248, "grad_norm": 3.120511188023168, "learning_rate": 2.2817797092297256e-07, "loss": 0.1088, "step": 19842 }, { "epoch": 2.8018921208698107, "grad_norm": 3.9133807311803337, "learning_rate": 2.2785428641975194e-07, "loss": 0.1324, "step": 19843 }, { "epoch": 2.8020333239197965, "grad_norm": 2.779194699909916, "learning_rate": 2.2753082901697644e-07, "loss": 0.1204, "step": 19844 }, { "epoch": 2.8021745269697824, "grad_norm": 2.485940268657665, "learning_rate": 2.2720759872216446e-07, "loss": 0.1177, "step": 19845 }, { "epoch": 2.8023157300197683, "grad_norm": 2.9430383261182302, "learning_rate": 2.2688459554282673e-07, "loss": 0.1229, "step": 19846 }, { "epoch": 2.802456933069754, "grad_norm": 3.1661517946768813, "learning_rate": 2.265618194864705e-07, "loss": 0.1504, "step": 19847 }, { "epoch": 2.80259813611974, "grad_norm": 3.3488258924551726, "learning_rate": 2.2623927056059647e-07, "loss": 0.191, "step": 19848 }, { "epoch": 2.802739339169726, "grad_norm": 2.8219201459371326, "learning_rate": 2.25916948772702e-07, "loss": 0.1262, "step": 19849 }, { "epoch": 2.802880542219712, "grad_norm": 2.7883782414304505, "learning_rate": 2.2559485413027438e-07, "loss": 0.1446, "step": 19850 }, { "epoch": 2.8030217452696977, "grad_norm": 3.1969355966536135, "learning_rate": 2.2527298664080323e-07, "loss": 0.1575, "step": 19851 }, { "epoch": 2.8031629483196836, "grad_norm": 2.9628528182886025, "learning_rate": 2.2495134631176585e-07, "loss": 0.1231, "step": 19852 }, { "epoch": 2.8033041513696695, "grad_norm": 2.990317245860659, "learning_rate": 2.2462993315063853e-07, "loss": 0.1472, "step": 19853 }, { "epoch": 2.8034453544196554, "grad_norm": 3.5269220350115624, "learning_rate": 2.243087471648886e-07, "loss": 0.1595, "step": 19854 }, { "epoch": 2.8035865574696412, "grad_norm": 3.646376824355197, "learning_rate": 2.239877883619812e-07, "loss": 0.1349, "step": 19855 }, { "epoch": 2.803727760519627, "grad_norm": 3.567234596145686, "learning_rate": 2.2366705674937596e-07, "loss": 0.1713, "step": 19856 }, { "epoch": 2.803868963569613, "grad_norm": 3.2875378059391758, "learning_rate": 2.2334655233452683e-07, "loss": 0.1633, "step": 19857 }, { "epoch": 2.804010166619599, "grad_norm": 3.33718009344948, "learning_rate": 2.230262751248813e-07, "loss": 0.1494, "step": 19858 }, { "epoch": 2.804151369669585, "grad_norm": 3.2532603283583263, "learning_rate": 2.2270622512788332e-07, "loss": 0.1687, "step": 19859 }, { "epoch": 2.8042925727195707, "grad_norm": 3.4247493075007176, "learning_rate": 2.2238640235097032e-07, "loss": 0.1756, "step": 19860 }, { "epoch": 2.8044337757695565, "grad_norm": 3.053248062521965, "learning_rate": 2.220668068015741e-07, "loss": 0.1334, "step": 19861 }, { "epoch": 2.8045749788195424, "grad_norm": 2.7526404970463307, "learning_rate": 2.217474384871221e-07, "loss": 0.1331, "step": 19862 }, { "epoch": 2.8047161818695283, "grad_norm": 3.6330934016895546, "learning_rate": 2.2142829741503723e-07, "loss": 0.2129, "step": 19863 }, { "epoch": 2.804857384919514, "grad_norm": 3.1718699210032577, "learning_rate": 2.2110938359273583e-07, "loss": 0.1675, "step": 19864 }, { "epoch": 2.8049985879695, "grad_norm": 2.914452023842297, "learning_rate": 2.2079069702762968e-07, "loss": 0.1333, "step": 19865 }, { "epoch": 2.805139791019486, "grad_norm": 3.6591772853598226, "learning_rate": 2.20472237727124e-07, "loss": 0.1993, "step": 19866 }, { "epoch": 2.805280994069472, "grad_norm": 3.3520677607277265, "learning_rate": 2.2015400569861845e-07, "loss": 0.1826, "step": 19867 }, { "epoch": 2.8054221971194577, "grad_norm": 3.1360796626234215, "learning_rate": 2.1983600094951153e-07, "loss": 0.1073, "step": 19868 }, { "epoch": 2.8055634001694436, "grad_norm": 2.602562423349869, "learning_rate": 2.1951822348719287e-07, "loss": 0.1328, "step": 19869 }, { "epoch": 2.8057046032194295, "grad_norm": 3.0119026707439684, "learning_rate": 2.192006733190466e-07, "loss": 0.1112, "step": 19870 }, { "epoch": 2.8058458062694154, "grad_norm": 3.6665006011981482, "learning_rate": 2.1888335045245235e-07, "loss": 0.1696, "step": 19871 }, { "epoch": 2.8059870093194013, "grad_norm": 3.0730992035101186, "learning_rate": 2.1856625489478532e-07, "loss": 0.1409, "step": 19872 }, { "epoch": 2.806128212369387, "grad_norm": 2.7664630164233595, "learning_rate": 2.18249386653413e-07, "loss": 0.1125, "step": 19873 }, { "epoch": 2.806269415419373, "grad_norm": 3.1220354743612138, "learning_rate": 2.1793274573570166e-07, "loss": 0.1158, "step": 19874 }, { "epoch": 2.806410618469359, "grad_norm": 4.032470883426098, "learning_rate": 2.1761633214900767e-07, "loss": 0.1751, "step": 19875 }, { "epoch": 2.806551821519345, "grad_norm": 3.6935241113745283, "learning_rate": 2.1730014590068625e-07, "loss": 0.1598, "step": 19876 }, { "epoch": 2.8066930245693307, "grad_norm": 3.342671238861575, "learning_rate": 2.1698418699808488e-07, "loss": 0.1451, "step": 19877 }, { "epoch": 2.8068342276193166, "grad_norm": 3.5874192380057206, "learning_rate": 2.1666845544854542e-07, "loss": 0.1937, "step": 19878 }, { "epoch": 2.8069754306693024, "grad_norm": 2.895591784977058, "learning_rate": 2.1635295125940647e-07, "loss": 0.1683, "step": 19879 }, { "epoch": 2.8071166337192883, "grad_norm": 2.871996802385165, "learning_rate": 2.1603767443799994e-07, "loss": 0.1247, "step": 19880 }, { "epoch": 2.807257836769274, "grad_norm": 2.8136607588017113, "learning_rate": 2.157226249916522e-07, "loss": 0.1244, "step": 19881 }, { "epoch": 2.80739903981926, "grad_norm": 3.4856417171993717, "learning_rate": 2.1540780292768516e-07, "loss": 0.1424, "step": 19882 }, { "epoch": 2.807540242869246, "grad_norm": 2.8603449844994, "learning_rate": 2.1509320825341407e-07, "loss": 0.1029, "step": 19883 }, { "epoch": 2.807681445919232, "grad_norm": 3.23413525380374, "learning_rate": 2.1477884097615308e-07, "loss": 0.1595, "step": 19884 }, { "epoch": 2.8078226489692177, "grad_norm": 3.154878024422754, "learning_rate": 2.1446470110320306e-07, "loss": 0.1178, "step": 19885 }, { "epoch": 2.8079638520192036, "grad_norm": 2.981853921166106, "learning_rate": 2.1415078864187034e-07, "loss": 0.1112, "step": 19886 }, { "epoch": 2.8081050550691895, "grad_norm": 3.6848725420251958, "learning_rate": 2.1383710359944576e-07, "loss": 0.1467, "step": 19887 }, { "epoch": 2.8082462581191754, "grad_norm": 3.451982914028179, "learning_rate": 2.135236459832213e-07, "loss": 0.1407, "step": 19888 }, { "epoch": 2.8083874611691613, "grad_norm": 2.908639996240426, "learning_rate": 2.1321041580047997e-07, "loss": 0.1331, "step": 19889 }, { "epoch": 2.808528664219147, "grad_norm": 3.651446783025837, "learning_rate": 2.1289741305850154e-07, "loss": 0.1517, "step": 19890 }, { "epoch": 2.808669867269133, "grad_norm": 3.874647278281282, "learning_rate": 2.125846377645613e-07, "loss": 0.1526, "step": 19891 }, { "epoch": 2.808811070319119, "grad_norm": 3.1391740039980895, "learning_rate": 2.1227208992592675e-07, "loss": 0.1464, "step": 19892 }, { "epoch": 2.808952273369105, "grad_norm": 2.563789612976445, "learning_rate": 2.119597695498621e-07, "loss": 0.1239, "step": 19893 }, { "epoch": 2.8090934764190907, "grad_norm": 3.059663121316726, "learning_rate": 2.1164767664362485e-07, "loss": 0.1217, "step": 19894 }, { "epoch": 2.8092346794690766, "grad_norm": 4.673663293850825, "learning_rate": 2.1133581121446923e-07, "loss": 0.2062, "step": 19895 }, { "epoch": 2.8093758825190625, "grad_norm": 2.9714692210771, "learning_rate": 2.1102417326964165e-07, "loss": 0.1206, "step": 19896 }, { "epoch": 2.8095170855690483, "grad_norm": 2.7546180952924124, "learning_rate": 2.107127628163852e-07, "loss": 0.1096, "step": 19897 }, { "epoch": 2.809658288619034, "grad_norm": 3.4120931958782146, "learning_rate": 2.104015798619352e-07, "loss": 0.141, "step": 19898 }, { "epoch": 2.80979949166902, "grad_norm": 2.599002414194393, "learning_rate": 2.100906244135259e-07, "loss": 0.1183, "step": 19899 }, { "epoch": 2.809940694719006, "grad_norm": 2.656151464727653, "learning_rate": 2.097798964783826e-07, "loss": 0.1308, "step": 19900 }, { "epoch": 2.810081897768992, "grad_norm": 3.3497727102697366, "learning_rate": 2.0946939606372508e-07, "loss": 0.1481, "step": 19901 }, { "epoch": 2.8102231008189777, "grad_norm": 2.9843185994000065, "learning_rate": 2.091591231767709e-07, "loss": 0.1192, "step": 19902 }, { "epoch": 2.8103643038689636, "grad_norm": 3.5992234484189716, "learning_rate": 2.0884907782473206e-07, "loss": 0.1402, "step": 19903 }, { "epoch": 2.8105055069189495, "grad_norm": 2.7362378711712316, "learning_rate": 2.085392600148106e-07, "loss": 0.1113, "step": 19904 }, { "epoch": 2.8106467099689354, "grad_norm": 2.173603154661741, "learning_rate": 2.0822966975420856e-07, "loss": 0.082, "step": 19905 }, { "epoch": 2.8107879130189213, "grad_norm": 3.162703552957746, "learning_rate": 2.0792030705012013e-07, "loss": 0.1067, "step": 19906 }, { "epoch": 2.810929116068907, "grad_norm": 2.67596595555186, "learning_rate": 2.07611171909734e-07, "loss": 0.1139, "step": 19907 }, { "epoch": 2.811070319118893, "grad_norm": 4.15354068342097, "learning_rate": 2.0730226434023671e-07, "loss": 0.1649, "step": 19908 }, { "epoch": 2.811211522168879, "grad_norm": 3.2557454396503926, "learning_rate": 2.0699358434880468e-07, "loss": 0.146, "step": 19909 }, { "epoch": 2.811352725218865, "grad_norm": 3.750295435886425, "learning_rate": 2.066851319426133e-07, "loss": 0.1213, "step": 19910 }, { "epoch": 2.8114939282688507, "grad_norm": 3.1293551892849027, "learning_rate": 2.063769071288302e-07, "loss": 0.1326, "step": 19911 }, { "epoch": 2.8116351313188366, "grad_norm": 3.3683994362502583, "learning_rate": 2.0606890991461737e-07, "loss": 0.1281, "step": 19912 }, { "epoch": 2.8117763343688225, "grad_norm": 3.0332567748548036, "learning_rate": 2.0576114030713355e-07, "loss": 0.1489, "step": 19913 }, { "epoch": 2.8119175374188083, "grad_norm": 3.187496520032008, "learning_rate": 2.0545359831353195e-07, "loss": 0.1371, "step": 19914 }, { "epoch": 2.8120587404687942, "grad_norm": 3.43973389191249, "learning_rate": 2.051462839409579e-07, "loss": 0.1276, "step": 19915 }, { "epoch": 2.81219994351878, "grad_norm": 2.95050164583715, "learning_rate": 2.0483919719655466e-07, "loss": 0.1272, "step": 19916 }, { "epoch": 2.812341146568766, "grad_norm": 3.4233778234758985, "learning_rate": 2.0453233808745753e-07, "loss": 0.1609, "step": 19917 }, { "epoch": 2.812482349618752, "grad_norm": 3.7242376893870577, "learning_rate": 2.0422570662079866e-07, "loss": 0.1159, "step": 19918 }, { "epoch": 2.8126235526687378, "grad_norm": 3.1273162896012274, "learning_rate": 2.0391930280370342e-07, "loss": 0.1471, "step": 19919 }, { "epoch": 2.8127647557187236, "grad_norm": 4.569217867324224, "learning_rate": 2.0361312664329502e-07, "loss": 0.195, "step": 19920 }, { "epoch": 2.8129059587687095, "grad_norm": 3.297864843268361, "learning_rate": 2.0330717814668556e-07, "loss": 0.1237, "step": 19921 }, { "epoch": 2.8130471618186954, "grad_norm": 3.4029588593975717, "learning_rate": 2.0300145732098596e-07, "loss": 0.1468, "step": 19922 }, { "epoch": 2.8131883648686813, "grad_norm": 2.474800549439491, "learning_rate": 2.0269596417330173e-07, "loss": 0.1351, "step": 19923 }, { "epoch": 2.813329567918667, "grad_norm": 4.469617574695108, "learning_rate": 2.0239069871073157e-07, "loss": 0.1597, "step": 19924 }, { "epoch": 2.813470770968653, "grad_norm": 2.455344212099276, "learning_rate": 2.0208566094037096e-07, "loss": 0.0922, "step": 19925 }, { "epoch": 2.813611974018639, "grad_norm": 3.4281699016310156, "learning_rate": 2.0178085086930865e-07, "loss": 0.1514, "step": 19926 }, { "epoch": 2.813753177068625, "grad_norm": 2.9901803542713243, "learning_rate": 2.0147626850462786e-07, "loss": 0.1408, "step": 19927 }, { "epoch": 2.8138943801186107, "grad_norm": 3.714182344893192, "learning_rate": 2.0117191385340629e-07, "loss": 0.1835, "step": 19928 }, { "epoch": 2.8140355831685966, "grad_norm": 3.2533105363564605, "learning_rate": 2.0086778692271824e-07, "loss": 0.1854, "step": 19929 }, { "epoch": 2.8141767862185825, "grad_norm": 2.9722172423195987, "learning_rate": 2.005638877196303e-07, "loss": 0.1303, "step": 19930 }, { "epoch": 2.8143179892685684, "grad_norm": 3.009214846768815, "learning_rate": 2.0026021625120574e-07, "loss": 0.1419, "step": 19931 }, { "epoch": 2.8144591923185542, "grad_norm": 3.047166019452896, "learning_rate": 1.999567725245022e-07, "loss": 0.1346, "step": 19932 }, { "epoch": 2.81460039536854, "grad_norm": 2.599856736831499, "learning_rate": 1.9965355654656958e-07, "loss": 0.1123, "step": 19933 }, { "epoch": 2.814741598418526, "grad_norm": 3.2746826505589475, "learning_rate": 1.9935056832445676e-07, "loss": 0.1641, "step": 19934 }, { "epoch": 2.814882801468512, "grad_norm": 2.4700331236673487, "learning_rate": 1.9904780786520473e-07, "loss": 0.109, "step": 19935 }, { "epoch": 2.8150240045184978, "grad_norm": 3.5553211471332715, "learning_rate": 1.9874527517584784e-07, "loss": 0.1588, "step": 19936 }, { "epoch": 2.8151652075684837, "grad_norm": 3.1693383097547185, "learning_rate": 1.984429702634194e-07, "loss": 0.1302, "step": 19937 }, { "epoch": 2.8153064106184695, "grad_norm": 3.4500170097762735, "learning_rate": 1.9814089313494157e-07, "loss": 0.1448, "step": 19938 }, { "epoch": 2.8154476136684554, "grad_norm": 3.1163600923269374, "learning_rate": 1.9783904379743758e-07, "loss": 0.1551, "step": 19939 }, { "epoch": 2.8155888167184413, "grad_norm": 3.8876502651382094, "learning_rate": 1.975374222579207e-07, "loss": 0.1477, "step": 19940 }, { "epoch": 2.815730019768427, "grad_norm": 2.153131549769038, "learning_rate": 1.9723602852339985e-07, "loss": 0.0877, "step": 19941 }, { "epoch": 2.815871222818413, "grad_norm": 3.0825470440176255, "learning_rate": 1.9693486260088047e-07, "loss": 0.1713, "step": 19942 }, { "epoch": 2.816012425868399, "grad_norm": 2.7147099591208232, "learning_rate": 1.9663392449736142e-07, "loss": 0.1145, "step": 19943 }, { "epoch": 2.816153628918385, "grad_norm": 3.3409853228136113, "learning_rate": 1.9633321421983708e-07, "loss": 0.1201, "step": 19944 }, { "epoch": 2.8162948319683707, "grad_norm": 2.9415384879205098, "learning_rate": 1.9603273177529415e-07, "loss": 0.1027, "step": 19945 }, { "epoch": 2.8164360350183566, "grad_norm": 3.106972323325981, "learning_rate": 1.957324771707181e-07, "loss": 0.1043, "step": 19946 }, { "epoch": 2.8165772380683425, "grad_norm": 2.435429001964734, "learning_rate": 1.9543245041308224e-07, "loss": 0.1094, "step": 19947 }, { "epoch": 2.8167184411183284, "grad_norm": 3.38095144185477, "learning_rate": 1.9513265150936433e-07, "loss": 0.1414, "step": 19948 }, { "epoch": 2.8168596441683142, "grad_norm": 3.108336136514969, "learning_rate": 1.948330804665277e-07, "loss": 0.1648, "step": 19949 }, { "epoch": 2.8170008472183, "grad_norm": 2.797628039061124, "learning_rate": 1.945337372915368e-07, "loss": 0.0963, "step": 19950 }, { "epoch": 2.817142050268286, "grad_norm": 2.940999376394885, "learning_rate": 1.9423462199134713e-07, "loss": 0.1402, "step": 19951 }, { "epoch": 2.817283253318272, "grad_norm": 2.9326721584824234, "learning_rate": 1.9393573457290983e-07, "loss": 0.1026, "step": 19952 }, { "epoch": 2.817424456368258, "grad_norm": 3.2435673757005734, "learning_rate": 1.9363707504317042e-07, "loss": 0.135, "step": 19953 }, { "epoch": 2.8175656594182437, "grad_norm": 2.627565942906979, "learning_rate": 1.9333864340907116e-07, "loss": 0.0947, "step": 19954 }, { "epoch": 2.8177068624682295, "grad_norm": 2.495772579634133, "learning_rate": 1.930404396775465e-07, "loss": 0.1069, "step": 19955 }, { "epoch": 2.8178480655182154, "grad_norm": 3.420042760600069, "learning_rate": 1.9274246385552753e-07, "loss": 0.1625, "step": 19956 }, { "epoch": 2.8179892685682013, "grad_norm": 3.4156790617669177, "learning_rate": 1.9244471594993652e-07, "loss": 0.1384, "step": 19957 }, { "epoch": 2.818130471618187, "grad_norm": 3.0881298321118016, "learning_rate": 1.921471959676957e-07, "loss": 0.1344, "step": 19958 }, { "epoch": 2.818271674668173, "grad_norm": 2.7476251885555927, "learning_rate": 1.9184990391571846e-07, "loss": 0.1129, "step": 19959 }, { "epoch": 2.818412877718159, "grad_norm": 3.066981159504285, "learning_rate": 1.9155283980091366e-07, "loss": 0.1121, "step": 19960 }, { "epoch": 2.818554080768145, "grad_norm": 3.341191280030943, "learning_rate": 1.9125600363018472e-07, "loss": 0.1534, "step": 19961 }, { "epoch": 2.8186952838181307, "grad_norm": 3.3361115946119204, "learning_rate": 1.909593954104294e-07, "loss": 0.1296, "step": 19962 }, { "epoch": 2.8188364868681166, "grad_norm": 3.3250030270386564, "learning_rate": 1.9066301514854334e-07, "loss": 0.1436, "step": 19963 }, { "epoch": 2.8189776899181025, "grad_norm": 2.8798322727909103, "learning_rate": 1.9036686285141105e-07, "loss": 0.1525, "step": 19964 }, { "epoch": 2.8191188929680884, "grad_norm": 3.742796963550915, "learning_rate": 1.9007093852591696e-07, "loss": 0.1595, "step": 19965 }, { "epoch": 2.8192600960180743, "grad_norm": 3.1610697184557837, "learning_rate": 1.8977524217893782e-07, "loss": 0.14, "step": 19966 }, { "epoch": 2.81940129906806, "grad_norm": 3.6196421448622282, "learning_rate": 1.8947977381734484e-07, "loss": 0.1919, "step": 19967 }, { "epoch": 2.819542502118046, "grad_norm": 3.5052864846912186, "learning_rate": 1.891845334480058e-07, "loss": 0.165, "step": 19968 }, { "epoch": 2.819683705168032, "grad_norm": 2.801407731596878, "learning_rate": 1.8888952107778081e-07, "loss": 0.1134, "step": 19969 }, { "epoch": 2.8198249082180173, "grad_norm": 2.429398685236878, "learning_rate": 1.8859473671352546e-07, "loss": 0.0971, "step": 19970 }, { "epoch": 2.8199661112680032, "grad_norm": 3.1523193342893365, "learning_rate": 1.8830018036209208e-07, "loss": 0.1262, "step": 19971 }, { "epoch": 2.820107314317989, "grad_norm": 3.3911042946535446, "learning_rate": 1.8800585203032517e-07, "loss": 0.1421, "step": 19972 }, { "epoch": 2.820248517367975, "grad_norm": 2.844415942423072, "learning_rate": 1.8771175172506484e-07, "loss": 0.0994, "step": 19973 }, { "epoch": 2.820389720417961, "grad_norm": 3.57009276235482, "learning_rate": 1.874178794531456e-07, "loss": 0.163, "step": 19974 }, { "epoch": 2.8205309234679468, "grad_norm": 3.1666234009411003, "learning_rate": 1.8712423522139756e-07, "loss": 0.1636, "step": 19975 }, { "epoch": 2.8206721265179326, "grad_norm": 2.8244028866461752, "learning_rate": 1.86830819036643e-07, "loss": 0.1118, "step": 19976 }, { "epoch": 2.8208133295679185, "grad_norm": 3.1805826012913188, "learning_rate": 1.865376309057032e-07, "loss": 0.1148, "step": 19977 }, { "epoch": 2.8209545326179044, "grad_norm": 4.116302741670571, "learning_rate": 1.8624467083539154e-07, "loss": 0.1609, "step": 19978 }, { "epoch": 2.8210957356678903, "grad_norm": 2.7243995910849854, "learning_rate": 1.8595193883251484e-07, "loss": 0.0978, "step": 19979 }, { "epoch": 2.821236938717876, "grad_norm": 2.2818341253199663, "learning_rate": 1.8565943490387761e-07, "loss": 0.0967, "step": 19980 }, { "epoch": 2.821378141767862, "grad_norm": 2.912330235195069, "learning_rate": 1.8536715905627445e-07, "loss": 0.1362, "step": 19981 }, { "epoch": 2.821519344817848, "grad_norm": 3.3738034268073913, "learning_rate": 1.85075111296501e-07, "loss": 0.1532, "step": 19982 }, { "epoch": 2.821660547867834, "grad_norm": 3.6794137805430815, "learning_rate": 1.84783291631343e-07, "loss": 0.1804, "step": 19983 }, { "epoch": 2.8218017509178197, "grad_norm": 2.8305574850888546, "learning_rate": 1.8449170006758278e-07, "loss": 0.1522, "step": 19984 }, { "epoch": 2.8219429539678056, "grad_norm": 2.973856765596425, "learning_rate": 1.842003366119971e-07, "loss": 0.1533, "step": 19985 }, { "epoch": 2.8220841570177915, "grad_norm": 3.3986121684985022, "learning_rate": 1.8390920127135613e-07, "loss": 0.1453, "step": 19986 }, { "epoch": 2.8222253600677774, "grad_norm": 3.1375206305441776, "learning_rate": 1.836182940524256e-07, "loss": 0.1569, "step": 19987 }, { "epoch": 2.8223665631177632, "grad_norm": 3.3080607177037273, "learning_rate": 1.833276149619667e-07, "loss": 0.1574, "step": 19988 }, { "epoch": 2.822507766167749, "grad_norm": 3.8773492877765143, "learning_rate": 1.830371640067341e-07, "loss": 0.1422, "step": 19989 }, { "epoch": 2.822648969217735, "grad_norm": 2.719084425884646, "learning_rate": 1.8274694119347901e-07, "loss": 0.1568, "step": 19990 }, { "epoch": 2.822790172267721, "grad_norm": 3.87600917519555, "learning_rate": 1.8245694652894496e-07, "loss": 0.1579, "step": 19991 }, { "epoch": 2.8229313753177068, "grad_norm": 3.020654148882816, "learning_rate": 1.8216718001987098e-07, "loss": 0.1094, "step": 19992 }, { "epoch": 2.8230725783676927, "grad_norm": 4.898352693979814, "learning_rate": 1.8187764167299171e-07, "loss": 0.2184, "step": 19993 }, { "epoch": 2.8232137814176785, "grad_norm": 2.7835101836987013, "learning_rate": 1.815883314950373e-07, "loss": 0.1335, "step": 19994 }, { "epoch": 2.8233549844676644, "grad_norm": 3.1076394258538493, "learning_rate": 1.8129924949272904e-07, "loss": 0.1289, "step": 19995 }, { "epoch": 2.8234961875176503, "grad_norm": 3.6946907431862357, "learning_rate": 1.81010395672786e-07, "loss": 0.1639, "step": 19996 }, { "epoch": 2.823637390567636, "grad_norm": 3.151729676732851, "learning_rate": 1.807217700419206e-07, "loss": 0.1111, "step": 19997 }, { "epoch": 2.823778593617622, "grad_norm": 3.318976105346471, "learning_rate": 1.804333726068408e-07, "loss": 0.1618, "step": 19998 }, { "epoch": 2.823919796667608, "grad_norm": 4.254973572717238, "learning_rate": 1.801452033742479e-07, "loss": 0.175, "step": 19999 }, { "epoch": 2.824060999717594, "grad_norm": 2.4012838561269487, "learning_rate": 1.79857262350841e-07, "loss": 0.1078, "step": 20000 }, { "epoch": 2.8242022027675797, "grad_norm": 2.7468931906044376, "learning_rate": 1.7956954954330918e-07, "loss": 0.1231, "step": 20001 }, { "epoch": 2.8243434058175656, "grad_norm": 4.187355013022518, "learning_rate": 1.7928206495834043e-07, "loss": 0.1617, "step": 20002 }, { "epoch": 2.8244846088675515, "grad_norm": 4.9777430289691065, "learning_rate": 1.78994808602615e-07, "loss": 0.2173, "step": 20003 }, { "epoch": 2.8246258119175374, "grad_norm": 2.8064552756946832, "learning_rate": 1.787077804828097e-07, "loss": 0.1279, "step": 20004 }, { "epoch": 2.8247670149675232, "grad_norm": 2.5618895442132725, "learning_rate": 1.7842098060559366e-07, "loss": 0.1146, "step": 20005 }, { "epoch": 2.824908218017509, "grad_norm": 2.7035873128791357, "learning_rate": 1.7813440897763158e-07, "loss": 0.1128, "step": 20006 }, { "epoch": 2.825049421067495, "grad_norm": 3.4576972423535772, "learning_rate": 1.7784806560558477e-07, "loss": 0.1384, "step": 20007 }, { "epoch": 2.825190624117481, "grad_norm": 3.547624333031972, "learning_rate": 1.7756195049610682e-07, "loss": 0.1632, "step": 20008 }, { "epoch": 2.825331827167467, "grad_norm": 3.537132018896759, "learning_rate": 1.7727606365584792e-07, "loss": 0.1718, "step": 20009 }, { "epoch": 2.8254730302174527, "grad_norm": 2.7726777317094378, "learning_rate": 1.769904050914495e-07, "loss": 0.1572, "step": 20010 }, { "epoch": 2.8256142332674385, "grad_norm": 3.0074108247981486, "learning_rate": 1.7670497480955286e-07, "loss": 0.1166, "step": 20011 }, { "epoch": 2.8257554363174244, "grad_norm": 3.134770383679413, "learning_rate": 1.7641977281679046e-07, "loss": 0.1585, "step": 20012 }, { "epoch": 2.8258966393674103, "grad_norm": 3.0944634560173467, "learning_rate": 1.7613479911979036e-07, "loss": 0.1718, "step": 20013 }, { "epoch": 2.826037842417396, "grad_norm": 3.38886925330426, "learning_rate": 1.7585005372517504e-07, "loss": 0.1661, "step": 20014 }, { "epoch": 2.826179045467382, "grad_norm": 2.7393362901219307, "learning_rate": 1.7556553663956034e-07, "loss": 0.1138, "step": 20015 }, { "epoch": 2.826320248517368, "grad_norm": 3.0202584126381806, "learning_rate": 1.7528124786956092e-07, "loss": 0.1217, "step": 20016 }, { "epoch": 2.826461451567354, "grad_norm": 3.290678634249369, "learning_rate": 1.7499718742178152e-07, "loss": 0.1553, "step": 20017 }, { "epoch": 2.8266026546173397, "grad_norm": 2.9239943075614496, "learning_rate": 1.7471335530282574e-07, "loss": 0.1517, "step": 20018 }, { "epoch": 2.8267438576673256, "grad_norm": 3.1155126303046683, "learning_rate": 1.744297515192872e-07, "loss": 0.1471, "step": 20019 }, { "epoch": 2.8268850607173115, "grad_norm": 3.0451486889715937, "learning_rate": 1.741463760777584e-07, "loss": 0.1162, "step": 20020 }, { "epoch": 2.8270262637672974, "grad_norm": 2.9090605696067655, "learning_rate": 1.7386322898482412e-07, "loss": 0.1347, "step": 20021 }, { "epoch": 2.8271674668172833, "grad_norm": 2.925972617336945, "learning_rate": 1.7358031024706456e-07, "loss": 0.1568, "step": 20022 }, { "epoch": 2.827308669867269, "grad_norm": 3.3069164735181493, "learning_rate": 1.7329761987105564e-07, "loss": 0.1311, "step": 20023 }, { "epoch": 2.827449872917255, "grad_norm": 2.282752963461291, "learning_rate": 1.7301515786336541e-07, "loss": 0.1048, "step": 20024 }, { "epoch": 2.827591075967241, "grad_norm": 3.166255281648736, "learning_rate": 1.7273292423055975e-07, "loss": 0.1068, "step": 20025 }, { "epoch": 2.827732279017227, "grad_norm": 2.833925617736957, "learning_rate": 1.7245091897919564e-07, "loss": 0.1081, "step": 20026 }, { "epoch": 2.8278734820672127, "grad_norm": 2.8406202269014633, "learning_rate": 1.7216914211582892e-07, "loss": 0.1246, "step": 20027 }, { "epoch": 2.8280146851171986, "grad_norm": 3.606967636895364, "learning_rate": 1.7188759364700658e-07, "loss": 0.1975, "step": 20028 }, { "epoch": 2.8281558881671844, "grad_norm": 3.4952839360828665, "learning_rate": 1.716062735792723e-07, "loss": 0.161, "step": 20029 }, { "epoch": 2.8282970912171703, "grad_norm": 2.887793045218106, "learning_rate": 1.7132518191916413e-07, "loss": 0.1161, "step": 20030 }, { "epoch": 2.828438294267156, "grad_norm": 3.6815798641138, "learning_rate": 1.710443186732147e-07, "loss": 0.1627, "step": 20031 }, { "epoch": 2.828579497317142, "grad_norm": 3.7854032044275914, "learning_rate": 1.7076368384794872e-07, "loss": 0.1444, "step": 20032 }, { "epoch": 2.828720700367128, "grad_norm": 2.801745923695371, "learning_rate": 1.704832774498899e-07, "loss": 0.1046, "step": 20033 }, { "epoch": 2.828861903417114, "grad_norm": 3.4479850668546557, "learning_rate": 1.7020309948555525e-07, "loss": 0.1676, "step": 20034 }, { "epoch": 2.8290031064670997, "grad_norm": 3.2734328039527525, "learning_rate": 1.699231499614562e-07, "loss": 0.1284, "step": 20035 }, { "epoch": 2.8291443095170856, "grad_norm": 2.889018498899647, "learning_rate": 1.6964342888409646e-07, "loss": 0.1101, "step": 20036 }, { "epoch": 2.8292855125670715, "grad_norm": 3.3830369885861074, "learning_rate": 1.693639362599786e-07, "loss": 0.1656, "step": 20037 }, { "epoch": 2.8294267156170574, "grad_norm": 3.743580663291065, "learning_rate": 1.6908467209559853e-07, "loss": 0.1742, "step": 20038 }, { "epoch": 2.8295679186670433, "grad_norm": 3.87866383737866, "learning_rate": 1.688056363974433e-07, "loss": 0.1683, "step": 20039 }, { "epoch": 2.829709121717029, "grad_norm": 3.413257315776105, "learning_rate": 1.685268291719999e-07, "loss": 0.1366, "step": 20040 }, { "epoch": 2.829850324767015, "grad_norm": 2.9809593811841593, "learning_rate": 1.6824825042574766e-07, "loss": 0.137, "step": 20041 }, { "epoch": 2.829991527817001, "grad_norm": 3.54127210834927, "learning_rate": 1.6796990016515914e-07, "loss": 0.1694, "step": 20042 }, { "epoch": 2.830132730866987, "grad_norm": 3.587748049388663, "learning_rate": 1.6769177839670468e-07, "loss": 0.1747, "step": 20043 }, { "epoch": 2.8302739339169727, "grad_norm": 2.6410395721894093, "learning_rate": 1.6741388512684586e-07, "loss": 0.1292, "step": 20044 }, { "epoch": 2.8304151369669586, "grad_norm": 3.388886587797723, "learning_rate": 1.6713622036204303e-07, "loss": 0.1334, "step": 20045 }, { "epoch": 2.8305563400169444, "grad_norm": 3.0715184052644053, "learning_rate": 1.6685878410874768e-07, "loss": 0.1665, "step": 20046 }, { "epoch": 2.8306975430669303, "grad_norm": 2.985054510823789, "learning_rate": 1.665815763734091e-07, "loss": 0.1337, "step": 20047 }, { "epoch": 2.830838746116916, "grad_norm": 3.143342979931068, "learning_rate": 1.663045971624666e-07, "loss": 0.1423, "step": 20048 }, { "epoch": 2.830979949166902, "grad_norm": 3.151113221804687, "learning_rate": 1.6602784648235838e-07, "loss": 0.1419, "step": 20049 }, { "epoch": 2.831121152216888, "grad_norm": 4.8016177881321545, "learning_rate": 1.657513243395159e-07, "loss": 0.1859, "step": 20050 }, { "epoch": 2.831262355266874, "grad_norm": 2.963714438418722, "learning_rate": 1.6547503074036518e-07, "loss": 0.1384, "step": 20051 }, { "epoch": 2.8314035583168597, "grad_norm": 3.2996236762080313, "learning_rate": 1.6519896569132886e-07, "loss": 0.1296, "step": 20052 }, { "epoch": 2.8315447613668456, "grad_norm": 3.136524718997218, "learning_rate": 1.649231291988196e-07, "loss": 0.1396, "step": 20053 }, { "epoch": 2.8316859644168315, "grad_norm": 2.4764576082112204, "learning_rate": 1.646475212692511e-07, "loss": 0.1118, "step": 20054 }, { "epoch": 2.8318271674668174, "grad_norm": 3.940081522566291, "learning_rate": 1.6437214190902606e-07, "loss": 0.1604, "step": 20055 }, { "epoch": 2.8319683705168033, "grad_norm": 3.8983725695851583, "learning_rate": 1.640969911245438e-07, "loss": 0.139, "step": 20056 }, { "epoch": 2.832109573566789, "grad_norm": 3.03172787183828, "learning_rate": 1.6382206892220032e-07, "loss": 0.1107, "step": 20057 }, { "epoch": 2.832250776616775, "grad_norm": 2.921795100658245, "learning_rate": 1.6354737530838494e-07, "loss": 0.1655, "step": 20058 }, { "epoch": 2.832391979666761, "grad_norm": 3.2768851485848827, "learning_rate": 1.6327291028947923e-07, "loss": 0.147, "step": 20059 }, { "epoch": 2.832533182716747, "grad_norm": 3.9173352919643625, "learning_rate": 1.6299867387186363e-07, "loss": 0.2269, "step": 20060 }, { "epoch": 2.8326743857667327, "grad_norm": 3.68988948154806, "learning_rate": 1.6272466606190972e-07, "loss": 0.1611, "step": 20061 }, { "epoch": 2.8328155888167186, "grad_norm": 3.081646350689254, "learning_rate": 1.6245088686598686e-07, "loss": 0.1443, "step": 20062 }, { "epoch": 2.8329567918667045, "grad_norm": 2.893537884250687, "learning_rate": 1.621773362904566e-07, "loss": 0.1126, "step": 20063 }, { "epoch": 2.8330979949166903, "grad_norm": 4.112954248522791, "learning_rate": 1.6190401434167725e-07, "loss": 0.1995, "step": 20064 }, { "epoch": 2.8332391979666762, "grad_norm": 3.698312193170609, "learning_rate": 1.616309210259992e-07, "loss": 0.1626, "step": 20065 }, { "epoch": 2.833380401016662, "grad_norm": 3.7188467830075616, "learning_rate": 1.6135805634976966e-07, "loss": 0.1791, "step": 20066 }, { "epoch": 2.833521604066648, "grad_norm": 2.5095339745503993, "learning_rate": 1.6108542031932904e-07, "loss": 0.117, "step": 20067 }, { "epoch": 2.833662807116634, "grad_norm": 3.6649368234155304, "learning_rate": 1.608130129410157e-07, "loss": 0.1849, "step": 20068 }, { "epoch": 2.8338040101666198, "grad_norm": 3.444949806182833, "learning_rate": 1.6054083422115786e-07, "loss": 0.1672, "step": 20069 }, { "epoch": 2.8339452132166056, "grad_norm": 2.9269732816384515, "learning_rate": 1.6026888416608267e-07, "loss": 0.1324, "step": 20070 }, { "epoch": 2.8340864162665915, "grad_norm": 2.399734641112467, "learning_rate": 1.599971627821084e-07, "loss": 0.0983, "step": 20071 }, { "epoch": 2.834227619316577, "grad_norm": 2.9320858757455768, "learning_rate": 1.5972567007555008e-07, "loss": 0.1373, "step": 20072 }, { "epoch": 2.834368822366563, "grad_norm": 3.7376639511626313, "learning_rate": 1.5945440605271812e-07, "loss": 0.151, "step": 20073 }, { "epoch": 2.8345100254165487, "grad_norm": 2.6430765057450243, "learning_rate": 1.591833707199153e-07, "loss": 0.1222, "step": 20074 }, { "epoch": 2.8346512284665346, "grad_norm": 4.339031596277187, "learning_rate": 1.5891256408344214e-07, "loss": 0.1599, "step": 20075 }, { "epoch": 2.8347924315165205, "grad_norm": 3.8358527825448867, "learning_rate": 1.5864198614959025e-07, "loss": 0.1349, "step": 20076 }, { "epoch": 2.8349336345665064, "grad_norm": 3.839986358422419, "learning_rate": 1.5837163692464797e-07, "loss": 0.182, "step": 20077 }, { "epoch": 2.8350748376164923, "grad_norm": 3.763195504779256, "learning_rate": 1.5810151641489912e-07, "loss": 0.1805, "step": 20078 }, { "epoch": 2.835216040666478, "grad_norm": 3.165343900013053, "learning_rate": 1.5783162462661983e-07, "loss": 0.1283, "step": 20079 }, { "epoch": 2.835357243716464, "grad_norm": 2.9614233286449902, "learning_rate": 1.5756196156608393e-07, "loss": 0.1282, "step": 20080 }, { "epoch": 2.83549844676645, "grad_norm": 3.7761481557618604, "learning_rate": 1.572925272395587e-07, "loss": 0.1711, "step": 20081 }, { "epoch": 2.835639649816436, "grad_norm": 3.537461371354076, "learning_rate": 1.5702332165330348e-07, "loss": 0.1632, "step": 20082 }, { "epoch": 2.8357808528664217, "grad_norm": 3.2157354586250206, "learning_rate": 1.5675434481357444e-07, "loss": 0.1399, "step": 20083 }, { "epoch": 2.8359220559164076, "grad_norm": 3.532780852614847, "learning_rate": 1.5648559672662322e-07, "loss": 0.1321, "step": 20084 }, { "epoch": 2.8360632589663934, "grad_norm": 2.763795242784741, "learning_rate": 1.5621707739869707e-07, "loss": 0.1291, "step": 20085 }, { "epoch": 2.8362044620163793, "grad_norm": 3.3270571781773435, "learning_rate": 1.559487868360343e-07, "loss": 0.1564, "step": 20086 }, { "epoch": 2.836345665066365, "grad_norm": 2.621539801785005, "learning_rate": 1.5568072504486997e-07, "loss": 0.1167, "step": 20087 }, { "epoch": 2.836486868116351, "grad_norm": 3.592711960328074, "learning_rate": 1.554128920314346e-07, "loss": 0.1982, "step": 20088 }, { "epoch": 2.836628071166337, "grad_norm": 2.5159818649070713, "learning_rate": 1.5514528780195215e-07, "loss": 0.1074, "step": 20089 }, { "epoch": 2.836769274216323, "grad_norm": 3.72750913575453, "learning_rate": 1.5487791236264095e-07, "loss": 0.1663, "step": 20090 }, { "epoch": 2.8369104772663087, "grad_norm": 3.0813563181449397, "learning_rate": 1.54610765719716e-07, "loss": 0.0976, "step": 20091 }, { "epoch": 2.8370516803162946, "grad_norm": 3.090326759927255, "learning_rate": 1.543438478793846e-07, "loss": 0.1442, "step": 20092 }, { "epoch": 2.8371928833662805, "grad_norm": 3.229843290110156, "learning_rate": 1.5407715884785068e-07, "loss": 0.1545, "step": 20093 }, { "epoch": 2.8373340864162664, "grad_norm": 3.1787425761536783, "learning_rate": 1.5381069863131037e-07, "loss": 0.114, "step": 20094 }, { "epoch": 2.8374752894662523, "grad_norm": 2.7349647252627105, "learning_rate": 1.535444672359576e-07, "loss": 0.1191, "step": 20095 }, { "epoch": 2.837616492516238, "grad_norm": 2.812462470016027, "learning_rate": 1.5327846466797857e-07, "loss": 0.1312, "step": 20096 }, { "epoch": 2.837757695566224, "grad_norm": 2.981400767301244, "learning_rate": 1.5301269093355607e-07, "loss": 0.1467, "step": 20097 }, { "epoch": 2.83789889861621, "grad_norm": 3.9790337085341196, "learning_rate": 1.5274714603886742e-07, "loss": 0.1675, "step": 20098 }, { "epoch": 2.838040101666196, "grad_norm": 2.958917806545585, "learning_rate": 1.52481829990081e-07, "loss": 0.1401, "step": 20099 }, { "epoch": 2.8381813047161817, "grad_norm": 4.018486294393013, "learning_rate": 1.5221674279336408e-07, "loss": 0.1633, "step": 20100 }, { "epoch": 2.8383225077661676, "grad_norm": 3.2114286581721836, "learning_rate": 1.519518844548773e-07, "loss": 0.1448, "step": 20101 }, { "epoch": 2.8384637108161535, "grad_norm": 3.280430199138361, "learning_rate": 1.5168725498077574e-07, "loss": 0.1589, "step": 20102 }, { "epoch": 2.8386049138661393, "grad_norm": 2.7075444569272755, "learning_rate": 1.5142285437720894e-07, "loss": 0.1041, "step": 20103 }, { "epoch": 2.838746116916125, "grad_norm": 3.5067255298297497, "learning_rate": 1.5115868265032195e-07, "loss": 0.1323, "step": 20104 }, { "epoch": 2.838887319966111, "grad_norm": 3.0493362467362655, "learning_rate": 1.5089473980625324e-07, "loss": 0.1514, "step": 20105 }, { "epoch": 2.839028523016097, "grad_norm": 3.9128557189699724, "learning_rate": 1.5063102585113786e-07, "loss": 0.1321, "step": 20106 }, { "epoch": 2.839169726066083, "grad_norm": 3.317676923487786, "learning_rate": 1.5036754079110427e-07, "loss": 0.1428, "step": 20107 }, { "epoch": 2.8393109291160687, "grad_norm": 3.0864751618088793, "learning_rate": 1.5010428463227423e-07, "loss": 0.1292, "step": 20108 }, { "epoch": 2.8394521321660546, "grad_norm": 3.310862928978597, "learning_rate": 1.4984125738076728e-07, "loss": 0.1682, "step": 20109 }, { "epoch": 2.8395933352160405, "grad_norm": 3.363593724724202, "learning_rate": 1.495784590426963e-07, "loss": 0.1412, "step": 20110 }, { "epoch": 2.8397345382660264, "grad_norm": 2.7305328622456506, "learning_rate": 1.4931588962416755e-07, "loss": 0.1552, "step": 20111 }, { "epoch": 2.8398757413160123, "grad_norm": 3.03102161813855, "learning_rate": 1.4905354913128279e-07, "loss": 0.1399, "step": 20112 }, { "epoch": 2.840016944365998, "grad_norm": 2.8206466452122676, "learning_rate": 1.4879143757013824e-07, "loss": 0.1205, "step": 20113 }, { "epoch": 2.840158147415984, "grad_norm": 3.647103150902528, "learning_rate": 1.4852955494682798e-07, "loss": 0.163, "step": 20114 }, { "epoch": 2.84029935046597, "grad_norm": 3.6350826105702976, "learning_rate": 1.4826790126743596e-07, "loss": 0.1607, "step": 20115 }, { "epoch": 2.840440553515956, "grad_norm": 3.5436747863870584, "learning_rate": 1.4800647653804289e-07, "loss": 0.1732, "step": 20116 }, { "epoch": 2.8405817565659417, "grad_norm": 2.7397490192075455, "learning_rate": 1.4774528076472505e-07, "loss": 0.1137, "step": 20117 }, { "epoch": 2.8407229596159276, "grad_norm": 3.753796034590914, "learning_rate": 1.4748431395355088e-07, "loss": 0.1667, "step": 20118 }, { "epoch": 2.8408641626659135, "grad_norm": 3.3303348939922186, "learning_rate": 1.472235761105878e-07, "loss": 0.134, "step": 20119 }, { "epoch": 2.8410053657158993, "grad_norm": 2.3780096269986166, "learning_rate": 1.4696306724189312e-07, "loss": 0.0985, "step": 20120 }, { "epoch": 2.8411465687658852, "grad_norm": 2.3977516338728813, "learning_rate": 1.4670278735352094e-07, "loss": 0.1167, "step": 20121 }, { "epoch": 2.841287771815871, "grad_norm": 4.527111388218047, "learning_rate": 1.4644273645152196e-07, "loss": 0.1765, "step": 20122 }, { "epoch": 2.841428974865857, "grad_norm": 3.0990269541292896, "learning_rate": 1.461829145419369e-07, "loss": 0.1068, "step": 20123 }, { "epoch": 2.841570177915843, "grad_norm": 4.874338941260348, "learning_rate": 1.4592332163080648e-07, "loss": 0.2147, "step": 20124 }, { "epoch": 2.8417113809658288, "grad_norm": 3.000027031098533, "learning_rate": 1.4566395772416254e-07, "loss": 0.1103, "step": 20125 }, { "epoch": 2.8418525840158146, "grad_norm": 2.713669946050697, "learning_rate": 1.4540482282803136e-07, "loss": 0.1092, "step": 20126 }, { "epoch": 2.8419937870658005, "grad_norm": 3.6726079300647614, "learning_rate": 1.4514591694843704e-07, "loss": 0.1686, "step": 20127 }, { "epoch": 2.8421349901157864, "grad_norm": 4.032347769911445, "learning_rate": 1.4488724009139588e-07, "loss": 0.142, "step": 20128 }, { "epoch": 2.8422761931657723, "grad_norm": 3.0752797018108944, "learning_rate": 1.4462879226291858e-07, "loss": 0.1015, "step": 20129 }, { "epoch": 2.842417396215758, "grad_norm": 3.4231297220189942, "learning_rate": 1.4437057346901152e-07, "loss": 0.1379, "step": 20130 }, { "epoch": 2.842558599265744, "grad_norm": 3.423606595782078, "learning_rate": 1.441125837156765e-07, "loss": 0.136, "step": 20131 }, { "epoch": 2.84269980231573, "grad_norm": 3.3077086863856366, "learning_rate": 1.4385482300890873e-07, "loss": 0.1497, "step": 20132 }, { "epoch": 2.842841005365716, "grad_norm": 3.6895902842055133, "learning_rate": 1.4359729135469903e-07, "loss": 0.1407, "step": 20133 }, { "epoch": 2.8429822084157017, "grad_norm": 2.838225346864948, "learning_rate": 1.4333998875903032e-07, "loss": 0.1455, "step": 20134 }, { "epoch": 2.8431234114656876, "grad_norm": 3.6696856089573764, "learning_rate": 1.4308291522788344e-07, "loss": 0.1483, "step": 20135 }, { "epoch": 2.8432646145156735, "grad_norm": 2.734964589692828, "learning_rate": 1.4282607076723355e-07, "loss": 0.1458, "step": 20136 }, { "epoch": 2.8434058175656594, "grad_norm": 2.8629674673292045, "learning_rate": 1.4256945538304812e-07, "loss": 0.106, "step": 20137 }, { "epoch": 2.8435470206156452, "grad_norm": 4.3442153684868074, "learning_rate": 1.423130690812924e-07, "loss": 0.1838, "step": 20138 }, { "epoch": 2.843688223665631, "grad_norm": 2.4886833012373994, "learning_rate": 1.420569118679227e-07, "loss": 0.1162, "step": 20139 }, { "epoch": 2.843829426715617, "grad_norm": 2.8280182103262352, "learning_rate": 1.4180098374889429e-07, "loss": 0.1198, "step": 20140 }, { "epoch": 2.843970629765603, "grad_norm": 3.8174318334458572, "learning_rate": 1.415452847301524e-07, "loss": 0.1639, "step": 20141 }, { "epoch": 2.8441118328155888, "grad_norm": 3.071892094348412, "learning_rate": 1.4128981481764115e-07, "loss": 0.1229, "step": 20142 }, { "epoch": 2.8442530358655747, "grad_norm": 3.3662266297291317, "learning_rate": 1.4103457401729692e-07, "loss": 0.1576, "step": 20143 }, { "epoch": 2.8443942389155605, "grad_norm": 2.8388120291062653, "learning_rate": 1.4077956233505163e-07, "loss": 0.1206, "step": 20144 }, { "epoch": 2.8445354419655464, "grad_norm": 2.746761977345328, "learning_rate": 1.4052477977683167e-07, "loss": 0.0964, "step": 20145 }, { "epoch": 2.8446766450155323, "grad_norm": 3.124899117991557, "learning_rate": 1.402702263485567e-07, "loss": 0.1289, "step": 20146 }, { "epoch": 2.844817848065518, "grad_norm": 3.377381553154912, "learning_rate": 1.4001590205614425e-07, "loss": 0.1662, "step": 20147 }, { "epoch": 2.844959051115504, "grad_norm": 3.3031563959642365, "learning_rate": 1.3976180690550402e-07, "loss": 0.1618, "step": 20148 }, { "epoch": 2.84510025416549, "grad_norm": 3.0717957375908664, "learning_rate": 1.3950794090254127e-07, "loss": 0.1384, "step": 20149 }, { "epoch": 2.845241457215476, "grad_norm": 3.079643076805089, "learning_rate": 1.3925430405315577e-07, "loss": 0.117, "step": 20150 }, { "epoch": 2.8453826602654617, "grad_norm": 3.326591431499201, "learning_rate": 1.3900089636324164e-07, "loss": 0.1394, "step": 20151 }, { "epoch": 2.8455238633154476, "grad_norm": 2.786912680528717, "learning_rate": 1.3874771783868758e-07, "loss": 0.1245, "step": 20152 }, { "epoch": 2.8456650663654335, "grad_norm": 3.2327597455210344, "learning_rate": 1.3849476848537656e-07, "loss": 0.1612, "step": 20153 }, { "epoch": 2.8458062694154194, "grad_norm": 3.3951017114134716, "learning_rate": 1.3824204830918952e-07, "loss": 0.1257, "step": 20154 }, { "epoch": 2.8459474724654052, "grad_norm": 3.245018447411824, "learning_rate": 1.379895573159995e-07, "loss": 0.1518, "step": 20155 }, { "epoch": 2.846088675515391, "grad_norm": 2.9032374758391457, "learning_rate": 1.3773729551167182e-07, "loss": 0.1157, "step": 20156 }, { "epoch": 2.846229878565377, "grad_norm": 3.4670277528852944, "learning_rate": 1.3748526290207065e-07, "loss": 0.1606, "step": 20157 }, { "epoch": 2.846371081615363, "grad_norm": 2.7438000763654578, "learning_rate": 1.3723345949305245e-07, "loss": 0.1259, "step": 20158 }, { "epoch": 2.846512284665349, "grad_norm": 3.9056112957646794, "learning_rate": 1.3698188529046918e-07, "loss": 0.1961, "step": 20159 }, { "epoch": 2.8466534877153347, "grad_norm": 2.4238680708012197, "learning_rate": 1.367305403001673e-07, "loss": 0.1313, "step": 20160 }, { "epoch": 2.8467946907653205, "grad_norm": 3.2078907987506597, "learning_rate": 1.3647942452798768e-07, "loss": 0.1697, "step": 20161 }, { "epoch": 2.8469358938153064, "grad_norm": 3.1243146667734956, "learning_rate": 1.3622853797976786e-07, "loss": 0.159, "step": 20162 }, { "epoch": 2.8470770968652923, "grad_norm": 4.304785377709412, "learning_rate": 1.3597788066133544e-07, "loss": 0.1669, "step": 20163 }, { "epoch": 2.847218299915278, "grad_norm": 2.869146979679962, "learning_rate": 1.3572745257851792e-07, "loss": 0.1501, "step": 20164 }, { "epoch": 2.847359502965264, "grad_norm": 2.817750338207062, "learning_rate": 1.3547725373713406e-07, "loss": 0.1177, "step": 20165 }, { "epoch": 2.84750070601525, "grad_norm": 3.8896156607061676, "learning_rate": 1.3522728414299911e-07, "loss": 0.1566, "step": 20166 }, { "epoch": 2.847641909065236, "grad_norm": 3.165767335327929, "learning_rate": 1.3497754380192184e-07, "loss": 0.1385, "step": 20167 }, { "epoch": 2.8477831121152217, "grad_norm": 2.8028314860963857, "learning_rate": 1.3472803271970536e-07, "loss": 0.12, "step": 20168 }, { "epoch": 2.8479243151652076, "grad_norm": 3.023315439173604, "learning_rate": 1.3447875090214945e-07, "loss": 0.1202, "step": 20169 }, { "epoch": 2.8480655182151935, "grad_norm": 4.245166720673949, "learning_rate": 1.342296983550462e-07, "loss": 0.1999, "step": 20170 }, { "epoch": 2.8482067212651794, "grad_norm": 3.719732320661145, "learning_rate": 1.3398087508418423e-07, "loss": 0.1719, "step": 20171 }, { "epoch": 2.8483479243151653, "grad_norm": 3.0366549031903585, "learning_rate": 1.3373228109534675e-07, "loss": 0.1516, "step": 20172 }, { "epoch": 2.848489127365151, "grad_norm": 2.9598040609442133, "learning_rate": 1.3348391639430913e-07, "loss": 0.1387, "step": 20173 }, { "epoch": 2.848630330415137, "grad_norm": 3.3432790963044, "learning_rate": 1.3323578098684565e-07, "loss": 0.1794, "step": 20174 }, { "epoch": 2.848771533465123, "grad_norm": 2.8473246315021847, "learning_rate": 1.3298787487872055e-07, "loss": 0.131, "step": 20175 }, { "epoch": 2.848912736515109, "grad_norm": 2.8006278444409687, "learning_rate": 1.3274019807569593e-07, "loss": 0.1342, "step": 20176 }, { "epoch": 2.8490539395650947, "grad_norm": 2.9161502837531312, "learning_rate": 1.324927505835283e-07, "loss": 0.1047, "step": 20177 }, { "epoch": 2.8491951426150806, "grad_norm": 3.9115411181793425, "learning_rate": 1.3224553240796633e-07, "loss": 0.1523, "step": 20178 }, { "epoch": 2.8493363456650664, "grad_norm": 3.315597478713627, "learning_rate": 1.3199854355475772e-07, "loss": 0.131, "step": 20179 }, { "epoch": 2.8494775487150523, "grad_norm": 2.9859261734114013, "learning_rate": 1.3175178402964116e-07, "loss": 0.1389, "step": 20180 }, { "epoch": 2.849618751765038, "grad_norm": 2.498227420514449, "learning_rate": 1.315052538383521e-07, "loss": 0.1223, "step": 20181 }, { "epoch": 2.849759954815024, "grad_norm": 3.2924253969230177, "learning_rate": 1.3125895298661705e-07, "loss": 0.1363, "step": 20182 }, { "epoch": 2.84990115786501, "grad_norm": 3.1582084859666804, "learning_rate": 1.3101288148016477e-07, "loss": 0.1182, "step": 20183 }, { "epoch": 2.850042360914996, "grad_norm": 2.6674987014747655, "learning_rate": 1.3076703932470958e-07, "loss": 0.1024, "step": 20184 }, { "epoch": 2.8501835639649817, "grad_norm": 3.147236032883525, "learning_rate": 1.305214265259658e-07, "loss": 0.1568, "step": 20185 }, { "epoch": 2.8503247670149676, "grad_norm": 2.9751718813931585, "learning_rate": 1.3027604308964215e-07, "loss": 0.1262, "step": 20186 }, { "epoch": 2.8504659700649535, "grad_norm": 3.009931234788582, "learning_rate": 1.3003088902143968e-07, "loss": 0.1539, "step": 20187 }, { "epoch": 2.8506071731149394, "grad_norm": 3.6321521492500697, "learning_rate": 1.2978596432705826e-07, "loss": 0.1659, "step": 20188 }, { "epoch": 2.8507483761649253, "grad_norm": 3.47105747892272, "learning_rate": 1.2954126901218778e-07, "loss": 0.1584, "step": 20189 }, { "epoch": 2.850889579214911, "grad_norm": 3.6253566323212314, "learning_rate": 1.292968030825159e-07, "loss": 0.1516, "step": 20190 }, { "epoch": 2.851030782264897, "grad_norm": 3.9028523062322122, "learning_rate": 1.2905256654372366e-07, "loss": 0.159, "step": 20191 }, { "epoch": 2.851171985314883, "grad_norm": 2.9961940391185706, "learning_rate": 1.288085594014865e-07, "loss": 0.1276, "step": 20192 }, { "epoch": 2.851313188364869, "grad_norm": 2.804953586749761, "learning_rate": 1.2856478166147546e-07, "loss": 0.1207, "step": 20193 }, { "epoch": 2.8514543914148547, "grad_norm": 3.110657247834433, "learning_rate": 1.2832123332935598e-07, "loss": 0.1703, "step": 20194 }, { "epoch": 2.8515955944648406, "grad_norm": 3.4270481919123816, "learning_rate": 1.2807791441078797e-07, "loss": 0.1119, "step": 20195 }, { "epoch": 2.8517367975148264, "grad_norm": 3.8028381211433344, "learning_rate": 1.2783482491142474e-07, "loss": 0.175, "step": 20196 }, { "epoch": 2.8518780005648123, "grad_norm": 3.3663687910991857, "learning_rate": 1.275919648369184e-07, "loss": 0.1424, "step": 20197 }, { "epoch": 2.852019203614798, "grad_norm": 3.0482814044569673, "learning_rate": 1.2734933419290996e-07, "loss": 0.1159, "step": 20198 }, { "epoch": 2.852160406664784, "grad_norm": 3.199999772225247, "learning_rate": 1.271069329850383e-07, "loss": 0.1424, "step": 20199 }, { "epoch": 2.85230160971477, "grad_norm": 2.9038813618153534, "learning_rate": 1.2686476121894e-07, "loss": 0.1463, "step": 20200 }, { "epoch": 2.852442812764756, "grad_norm": 3.213831133875631, "learning_rate": 1.2662281890024052e-07, "loss": 0.1595, "step": 20201 }, { "epoch": 2.8525840158147417, "grad_norm": 3.5554672878632134, "learning_rate": 1.263811060345621e-07, "loss": 0.1497, "step": 20202 }, { "epoch": 2.8527252188647276, "grad_norm": 3.202487860714442, "learning_rate": 1.2613962262752245e-07, "loss": 0.1349, "step": 20203 }, { "epoch": 2.8528664219147135, "grad_norm": 2.9988151105247955, "learning_rate": 1.2589836868473259e-07, "loss": 0.1537, "step": 20204 }, { "epoch": 2.8530076249646994, "grad_norm": 3.226572951277175, "learning_rate": 1.2565734421180252e-07, "loss": 0.1194, "step": 20205 }, { "epoch": 2.8531488280146853, "grad_norm": 3.4608649347038303, "learning_rate": 1.2541654921432998e-07, "loss": 0.1806, "step": 20206 }, { "epoch": 2.853290031064671, "grad_norm": 2.7955632245415893, "learning_rate": 1.2517598369791383e-07, "loss": 0.1096, "step": 20207 }, { "epoch": 2.853431234114657, "grad_norm": 2.9430711085108534, "learning_rate": 1.2493564766814292e-07, "loss": 0.1267, "step": 20208 }, { "epoch": 2.853572437164643, "grad_norm": 3.2857711936755782, "learning_rate": 1.2469554113060168e-07, "loss": 0.1363, "step": 20209 }, { "epoch": 2.853713640214629, "grad_norm": 2.0794461117695247, "learning_rate": 1.244556640908712e-07, "loss": 0.1097, "step": 20210 }, { "epoch": 2.8538548432646147, "grad_norm": 3.2961909285149145, "learning_rate": 1.2421601655452696e-07, "loss": 0.1455, "step": 20211 }, { "epoch": 2.8539960463146006, "grad_norm": 3.4543509165727944, "learning_rate": 1.2397659852713684e-07, "loss": 0.1768, "step": 20212 }, { "epoch": 2.8541372493645865, "grad_norm": 2.9422164522253063, "learning_rate": 1.237374100142663e-07, "loss": 0.1266, "step": 20213 }, { "epoch": 2.8542784524145723, "grad_norm": 3.0011170395580313, "learning_rate": 1.2349845102147317e-07, "loss": 0.1278, "step": 20214 }, { "epoch": 2.8544196554645582, "grad_norm": 3.6748656197522154, "learning_rate": 1.2325972155430966e-07, "loss": 0.1911, "step": 20215 }, { "epoch": 2.854560858514544, "grad_norm": 3.360604717328857, "learning_rate": 1.2302122161832464e-07, "loss": 0.1259, "step": 20216 }, { "epoch": 2.85470206156453, "grad_norm": 3.8520345006788537, "learning_rate": 1.2278295121906258e-07, "loss": 0.1557, "step": 20217 }, { "epoch": 2.854843264614516, "grad_norm": 4.663319534893335, "learning_rate": 1.2254491036205797e-07, "loss": 0.2138, "step": 20218 }, { "epoch": 2.8549844676645018, "grad_norm": 3.358483363236862, "learning_rate": 1.223070990528441e-07, "loss": 0.1515, "step": 20219 }, { "epoch": 2.8551256707144876, "grad_norm": 3.100920222331805, "learning_rate": 1.220695172969477e-07, "loss": 0.1435, "step": 20220 }, { "epoch": 2.8552668737644735, "grad_norm": 2.6237381811926217, "learning_rate": 1.2183216509988881e-07, "loss": 0.1208, "step": 20221 }, { "epoch": 2.8554080768144594, "grad_norm": 2.5264425721971877, "learning_rate": 1.2159504246718522e-07, "loss": 0.1448, "step": 20222 }, { "epoch": 2.8555492798644453, "grad_norm": 3.108330700554917, "learning_rate": 1.2135814940434587e-07, "loss": 0.1597, "step": 20223 }, { "epoch": 2.855690482914431, "grad_norm": 2.657930701123252, "learning_rate": 1.2112148591687743e-07, "loss": 0.1021, "step": 20224 }, { "epoch": 2.855831685964417, "grad_norm": 3.358311261788388, "learning_rate": 1.2088505201028e-07, "loss": 0.1249, "step": 20225 }, { "epoch": 2.855972889014403, "grad_norm": 2.7365177423227736, "learning_rate": 1.2064884769004692e-07, "loss": 0.1286, "step": 20226 }, { "epoch": 2.856114092064389, "grad_norm": 2.9635294166077983, "learning_rate": 1.2041287296166715e-07, "loss": 0.1281, "step": 20227 }, { "epoch": 2.8562552951143747, "grad_norm": 3.535685731643499, "learning_rate": 1.201771278306263e-07, "loss": 0.1729, "step": 20228 }, { "epoch": 2.8563964981643606, "grad_norm": 2.676209698843738, "learning_rate": 1.199416123024022e-07, "loss": 0.1203, "step": 20229 }, { "epoch": 2.8565377012143465, "grad_norm": 3.3646955713783813, "learning_rate": 1.1970632638246827e-07, "loss": 0.1295, "step": 20230 }, { "epoch": 2.8566789042643324, "grad_norm": 2.222027329839311, "learning_rate": 1.1947127007629234e-07, "loss": 0.1316, "step": 20231 }, { "epoch": 2.8568201073143182, "grad_norm": 4.139333990712217, "learning_rate": 1.192364433893378e-07, "loss": 0.1693, "step": 20232 }, { "epoch": 2.856961310364304, "grad_norm": 2.70787758242927, "learning_rate": 1.1900184632705924e-07, "loss": 0.1062, "step": 20233 }, { "epoch": 2.85710251341429, "grad_norm": 3.112493340863802, "learning_rate": 1.1876747889491225e-07, "loss": 0.1395, "step": 20234 }, { "epoch": 2.857243716464276, "grad_norm": 2.928612164154651, "learning_rate": 1.1853334109834136e-07, "loss": 0.1358, "step": 20235 }, { "epoch": 2.8573849195142618, "grad_norm": 4.0597829954849285, "learning_rate": 1.1829943294278778e-07, "loss": 0.1927, "step": 20236 }, { "epoch": 2.8575261225642477, "grad_norm": 3.5381288806643103, "learning_rate": 1.1806575443368717e-07, "loss": 0.1632, "step": 20237 }, { "epoch": 2.8576673256142335, "grad_norm": 3.3525270218664422, "learning_rate": 1.1783230557647075e-07, "loss": 0.1538, "step": 20238 }, { "epoch": 2.8578085286642194, "grad_norm": 3.1018747781509677, "learning_rate": 1.1759908637656525e-07, "loss": 0.15, "step": 20239 }, { "epoch": 2.8579497317142053, "grad_norm": 3.097626813499742, "learning_rate": 1.1736609683938749e-07, "loss": 0.1406, "step": 20240 }, { "epoch": 2.858090934764191, "grad_norm": 3.0374837526393765, "learning_rate": 1.1713333697035423e-07, "loss": 0.1426, "step": 20241 }, { "epoch": 2.8582321378141766, "grad_norm": 2.8533941503425857, "learning_rate": 1.1690080677487558e-07, "loss": 0.1299, "step": 20242 }, { "epoch": 2.8583733408641625, "grad_norm": 3.3721739414634597, "learning_rate": 1.166685062583528e-07, "loss": 0.1428, "step": 20243 }, { "epoch": 2.8585145439141484, "grad_norm": 3.170959493702561, "learning_rate": 1.1643643542618488e-07, "loss": 0.1439, "step": 20244 }, { "epoch": 2.8586557469641343, "grad_norm": 3.290092304069314, "learning_rate": 1.162045942837664e-07, "loss": 0.149, "step": 20245 }, { "epoch": 2.85879695001412, "grad_norm": 3.076316537817162, "learning_rate": 1.1597298283648529e-07, "loss": 0.1392, "step": 20246 }, { "epoch": 2.858938153064106, "grad_norm": 3.4543080958398593, "learning_rate": 1.1574160108972277e-07, "loss": 0.1488, "step": 20247 }, { "epoch": 2.859079356114092, "grad_norm": 3.382143629716755, "learning_rate": 1.1551044904885678e-07, "loss": 0.1558, "step": 20248 }, { "epoch": 2.859220559164078, "grad_norm": 2.6916194512197658, "learning_rate": 1.1527952671925968e-07, "loss": 0.1231, "step": 20249 }, { "epoch": 2.8593617622140637, "grad_norm": 3.557852763005533, "learning_rate": 1.1504883410629608e-07, "loss": 0.2, "step": 20250 }, { "epoch": 2.8595029652640496, "grad_norm": 3.58397476953013, "learning_rate": 1.1481837121533057e-07, "loss": 0.165, "step": 20251 }, { "epoch": 2.8596441683140355, "grad_norm": 3.528473036923732, "learning_rate": 1.1458813805171665e-07, "loss": 0.1376, "step": 20252 }, { "epoch": 2.8597853713640213, "grad_norm": 3.3401827028229825, "learning_rate": 1.1435813462080447e-07, "loss": 0.1313, "step": 20253 }, { "epoch": 2.859926574414007, "grad_norm": 3.4990557030392444, "learning_rate": 1.1412836092793977e-07, "loss": 0.1587, "step": 20254 }, { "epoch": 2.860067777463993, "grad_norm": 3.335944031885215, "learning_rate": 1.1389881697846383e-07, "loss": 0.1448, "step": 20255 }, { "epoch": 2.860208980513979, "grad_norm": 3.0957759909076374, "learning_rate": 1.1366950277770794e-07, "loss": 0.1686, "step": 20256 }, { "epoch": 2.860350183563965, "grad_norm": 3.13192842169984, "learning_rate": 1.134404183310045e-07, "loss": 0.1247, "step": 20257 }, { "epoch": 2.8604913866139507, "grad_norm": 3.7203794059442554, "learning_rate": 1.1321156364367591e-07, "loss": 0.1532, "step": 20258 }, { "epoch": 2.8606325896639366, "grad_norm": 3.3916187909154534, "learning_rate": 1.1298293872104127e-07, "loss": 0.1823, "step": 20259 }, { "epoch": 2.8607737927139225, "grad_norm": 3.708151458907937, "learning_rate": 1.1275454356841298e-07, "loss": 0.1544, "step": 20260 }, { "epoch": 2.8609149957639084, "grad_norm": 2.8761214059586546, "learning_rate": 1.1252637819109902e-07, "loss": 0.1456, "step": 20261 }, { "epoch": 2.8610561988138943, "grad_norm": 3.390287125110571, "learning_rate": 1.1229844259440182e-07, "loss": 0.1685, "step": 20262 }, { "epoch": 2.86119740186388, "grad_norm": 2.745855007597313, "learning_rate": 1.1207073678361824e-07, "loss": 0.1327, "step": 20263 }, { "epoch": 2.861338604913866, "grad_norm": 3.700445458841732, "learning_rate": 1.1184326076404073e-07, "loss": 0.1352, "step": 20264 }, { "epoch": 2.861479807963852, "grad_norm": 3.5351679927253854, "learning_rate": 1.1161601454095616e-07, "loss": 0.1451, "step": 20265 }, { "epoch": 2.861621011013838, "grad_norm": 3.2152162492024, "learning_rate": 1.1138899811964477e-07, "loss": 0.1478, "step": 20266 }, { "epoch": 2.8617622140638237, "grad_norm": 3.227587442220049, "learning_rate": 1.1116221150538231e-07, "loss": 0.1698, "step": 20267 }, { "epoch": 2.8619034171138096, "grad_norm": 2.6038289376774437, "learning_rate": 1.1093565470343904e-07, "loss": 0.1268, "step": 20268 }, { "epoch": 2.8620446201637955, "grad_norm": 3.092086905454962, "learning_rate": 1.1070932771908072e-07, "loss": 0.1581, "step": 20269 }, { "epoch": 2.8621858232137813, "grad_norm": 3.4474877337555023, "learning_rate": 1.1048323055756649e-07, "loss": 0.169, "step": 20270 }, { "epoch": 2.8623270262637672, "grad_norm": 3.2764986723978495, "learning_rate": 1.1025736322415104e-07, "loss": 0.1492, "step": 20271 }, { "epoch": 2.862468229313753, "grad_norm": 3.9917769584964686, "learning_rate": 1.1003172572408349e-07, "loss": 0.1882, "step": 20272 }, { "epoch": 2.862609432363739, "grad_norm": 2.756172086057813, "learning_rate": 1.0980631806260745e-07, "loss": 0.1152, "step": 20273 }, { "epoch": 2.862750635413725, "grad_norm": 4.15303445230611, "learning_rate": 1.0958114024496202e-07, "loss": 0.1908, "step": 20274 }, { "epoch": 2.8628918384637108, "grad_norm": 2.9467043341917423, "learning_rate": 1.0935619227637862e-07, "loss": 0.1296, "step": 20275 }, { "epoch": 2.8630330415136966, "grad_norm": 2.5521190761981662, "learning_rate": 1.0913147416208636e-07, "loss": 0.1041, "step": 20276 }, { "epoch": 2.8631742445636825, "grad_norm": 3.134719108570739, "learning_rate": 1.0890698590730775e-07, "loss": 0.141, "step": 20277 }, { "epoch": 2.8633154476136684, "grad_norm": 2.7630641858991485, "learning_rate": 1.086827275172575e-07, "loss": 0.1339, "step": 20278 }, { "epoch": 2.8634566506636543, "grad_norm": 3.2105525262759813, "learning_rate": 1.0845869899715034e-07, "loss": 0.1232, "step": 20279 }, { "epoch": 2.86359785371364, "grad_norm": 3.347207684214817, "learning_rate": 1.0823490035218986e-07, "loss": 0.1366, "step": 20280 }, { "epoch": 2.863739056763626, "grad_norm": 2.9857748540318236, "learning_rate": 1.080113315875797e-07, "loss": 0.1332, "step": 20281 }, { "epoch": 2.863880259813612, "grad_norm": 2.64748303684728, "learning_rate": 1.0778799270851348e-07, "loss": 0.1423, "step": 20282 }, { "epoch": 2.864021462863598, "grad_norm": 2.9302208991713585, "learning_rate": 1.0756488372018259e-07, "loss": 0.1397, "step": 20283 }, { "epoch": 2.8641626659135837, "grad_norm": 3.873961399405908, "learning_rate": 1.0734200462777178e-07, "loss": 0.1566, "step": 20284 }, { "epoch": 2.8643038689635696, "grad_norm": 3.3850476326755836, "learning_rate": 1.0711935543646023e-07, "loss": 0.1463, "step": 20285 }, { "epoch": 2.8644450720135555, "grad_norm": 3.2661165064688777, "learning_rate": 1.068969361514216e-07, "loss": 0.1315, "step": 20286 }, { "epoch": 2.8645862750635414, "grad_norm": 2.8713120224885587, "learning_rate": 1.0667474677782619e-07, "loss": 0.1137, "step": 20287 }, { "epoch": 2.8647274781135272, "grad_norm": 3.3640251115340605, "learning_rate": 1.0645278732083763e-07, "loss": 0.1581, "step": 20288 }, { "epoch": 2.864868681163513, "grad_norm": 3.4299562674675093, "learning_rate": 1.0623105778561294e-07, "loss": 0.1597, "step": 20289 }, { "epoch": 2.865009884213499, "grad_norm": 3.302965321669273, "learning_rate": 1.0600955817730573e-07, "loss": 0.1913, "step": 20290 }, { "epoch": 2.865151087263485, "grad_norm": 3.7882142983029987, "learning_rate": 1.0578828850106415e-07, "loss": 0.1759, "step": 20291 }, { "epoch": 2.8652922903134708, "grad_norm": 3.21523273646911, "learning_rate": 1.055672487620285e-07, "loss": 0.1372, "step": 20292 }, { "epoch": 2.8654334933634567, "grad_norm": 3.4771356174526007, "learning_rate": 1.0534643896533913e-07, "loss": 0.1591, "step": 20293 }, { "epoch": 2.8655746964134425, "grad_norm": 3.2477589673789464, "learning_rate": 1.0512585911612416e-07, "loss": 0.1106, "step": 20294 }, { "epoch": 2.8657158994634284, "grad_norm": 3.384088406022248, "learning_rate": 1.0490550921950948e-07, "loss": 0.1446, "step": 20295 }, { "epoch": 2.8658571025134143, "grad_norm": 2.8581201297252155, "learning_rate": 1.0468538928061878e-07, "loss": 0.1095, "step": 20296 }, { "epoch": 2.8659983055634, "grad_norm": 3.741280559444293, "learning_rate": 1.0446549930456684e-07, "loss": 0.1809, "step": 20297 }, { "epoch": 2.866139508613386, "grad_norm": 3.8307731895143617, "learning_rate": 1.0424583929646181e-07, "loss": 0.144, "step": 20298 }, { "epoch": 2.866280711663372, "grad_norm": 3.616675116920907, "learning_rate": 1.0402640926141072e-07, "loss": 0.1385, "step": 20299 }, { "epoch": 2.866421914713358, "grad_norm": 3.2909846244420256, "learning_rate": 1.038072092045117e-07, "loss": 0.1548, "step": 20300 }, { "epoch": 2.8665631177633437, "grad_norm": 3.658145280150118, "learning_rate": 1.0358823913085958e-07, "loss": 0.1446, "step": 20301 }, { "epoch": 2.8667043208133296, "grad_norm": 3.5854314129827225, "learning_rate": 1.033694990455425e-07, "loss": 0.1718, "step": 20302 }, { "epoch": 2.8668455238633155, "grad_norm": 3.4134320576728308, "learning_rate": 1.0315098895364417e-07, "loss": 0.1735, "step": 20303 }, { "epoch": 2.8669867269133014, "grad_norm": 3.399469177816294, "learning_rate": 1.0293270886024276e-07, "loss": 0.1357, "step": 20304 }, { "epoch": 2.8671279299632872, "grad_norm": 3.6440333183144067, "learning_rate": 1.0271465877041198e-07, "loss": 0.1808, "step": 20305 }, { "epoch": 2.867269133013273, "grad_norm": 2.7916391187280203, "learning_rate": 1.0249683868921667e-07, "loss": 0.1366, "step": 20306 }, { "epoch": 2.867410336063259, "grad_norm": 3.6069155505781105, "learning_rate": 1.0227924862172057e-07, "loss": 0.1568, "step": 20307 }, { "epoch": 2.867551539113245, "grad_norm": 3.013128405519665, "learning_rate": 1.0206188857298182e-07, "loss": 0.1306, "step": 20308 }, { "epoch": 2.867692742163231, "grad_norm": 3.9342917486679077, "learning_rate": 1.0184475854804865e-07, "loss": 0.1844, "step": 20309 }, { "epoch": 2.8678339452132167, "grad_norm": 3.136156327270639, "learning_rate": 1.0162785855197032e-07, "loss": 0.1324, "step": 20310 }, { "epoch": 2.8679751482632025, "grad_norm": 3.970606890095133, "learning_rate": 1.0141118858978393e-07, "loss": 0.2008, "step": 20311 }, { "epoch": 2.8681163513131884, "grad_norm": 3.3025868895949766, "learning_rate": 1.0119474866652767e-07, "loss": 0.1686, "step": 20312 }, { "epoch": 2.8682575543631743, "grad_norm": 3.2152804769213397, "learning_rate": 1.0097853878722975e-07, "loss": 0.1717, "step": 20313 }, { "epoch": 2.86839875741316, "grad_norm": 3.297453634720773, "learning_rate": 1.0076255895691611e-07, "loss": 0.1234, "step": 20314 }, { "epoch": 2.868539960463146, "grad_norm": 2.83714541619477, "learning_rate": 1.00546809180605e-07, "loss": 0.1431, "step": 20315 }, { "epoch": 2.868681163513132, "grad_norm": 3.276328357241232, "learning_rate": 1.0033128946331128e-07, "loss": 0.1871, "step": 20316 }, { "epoch": 2.868822366563118, "grad_norm": 2.531575023822282, "learning_rate": 1.0011599981004317e-07, "loss": 0.0922, "step": 20317 }, { "epoch": 2.8689635696131037, "grad_norm": 3.4027875098884532, "learning_rate": 9.990094022580332e-08, "loss": 0.1374, "step": 20318 }, { "epoch": 2.8691047726630896, "grad_norm": 3.1780673761123146, "learning_rate": 9.968611071558998e-08, "loss": 0.1328, "step": 20319 }, { "epoch": 2.8692459757130755, "grad_norm": 3.838171258226563, "learning_rate": 9.947151128439692e-08, "loss": 0.142, "step": 20320 }, { "epoch": 2.8693871787630614, "grad_norm": 2.529865573428903, "learning_rate": 9.925714193720904e-08, "loss": 0.0871, "step": 20321 }, { "epoch": 2.8695283818130473, "grad_norm": 3.2166928719700802, "learning_rate": 9.904300267901012e-08, "loss": 0.1331, "step": 20322 }, { "epoch": 2.869669584863033, "grad_norm": 3.0174225870271636, "learning_rate": 9.88290935147751e-08, "loss": 0.1396, "step": 20323 }, { "epoch": 2.869810787913019, "grad_norm": 2.963454211902711, "learning_rate": 9.861541444947554e-08, "loss": 0.13, "step": 20324 }, { "epoch": 2.869951990963005, "grad_norm": 3.32357811434459, "learning_rate": 9.840196548807857e-08, "loss": 0.1167, "step": 20325 }, { "epoch": 2.870093194012991, "grad_norm": 3.33142322928062, "learning_rate": 9.818874663554356e-08, "loss": 0.1332, "step": 20326 }, { "epoch": 2.8702343970629767, "grad_norm": 3.459463599717467, "learning_rate": 9.797575789682657e-08, "loss": 0.155, "step": 20327 }, { "epoch": 2.8703756001129626, "grad_norm": 2.4783131173873416, "learning_rate": 9.776299927687694e-08, "loss": 0.1197, "step": 20328 }, { "epoch": 2.8705168031629484, "grad_norm": 3.364299098674183, "learning_rate": 9.755047078063629e-08, "loss": 0.1589, "step": 20329 }, { "epoch": 2.8706580062129343, "grad_norm": 2.5742003912414413, "learning_rate": 9.733817241304844e-08, "loss": 0.1232, "step": 20330 }, { "epoch": 2.87079920926292, "grad_norm": 3.2050237092980023, "learning_rate": 9.712610417904389e-08, "loss": 0.1353, "step": 20331 }, { "epoch": 2.870940412312906, "grad_norm": 3.5066045863791144, "learning_rate": 9.691426608355203e-08, "loss": 0.1591, "step": 20332 }, { "epoch": 2.871081615362892, "grad_norm": 3.456602117917055, "learning_rate": 9.67026581314956e-08, "loss": 0.1417, "step": 20333 }, { "epoch": 2.871222818412878, "grad_norm": 3.4035098836102122, "learning_rate": 9.649128032779287e-08, "loss": 0.1575, "step": 20334 }, { "epoch": 2.8713640214628637, "grad_norm": 3.4944405134582874, "learning_rate": 9.628013267735658e-08, "loss": 0.1504, "step": 20335 }, { "epoch": 2.8715052245128496, "grad_norm": 2.7666618155300946, "learning_rate": 9.606921518509172e-08, "loss": 0.1162, "step": 20336 }, { "epoch": 2.8716464275628355, "grad_norm": 2.7690167900919023, "learning_rate": 9.5858527855901e-08, "loss": 0.1044, "step": 20337 }, { "epoch": 2.8717876306128214, "grad_norm": 3.237584405385272, "learning_rate": 9.564807069468163e-08, "loss": 0.1334, "step": 20338 }, { "epoch": 2.8719288336628073, "grad_norm": 3.550702046761701, "learning_rate": 9.543784370632414e-08, "loss": 0.1129, "step": 20339 }, { "epoch": 2.872070036712793, "grad_norm": 3.2501926234483745, "learning_rate": 9.52278468957124e-08, "loss": 0.1574, "step": 20340 }, { "epoch": 2.872211239762779, "grad_norm": 3.641556255961306, "learning_rate": 9.501808026772808e-08, "loss": 0.1746, "step": 20341 }, { "epoch": 2.872352442812765, "grad_norm": 3.4230548729522288, "learning_rate": 9.480854382724613e-08, "loss": 0.1659, "step": 20342 }, { "epoch": 2.872493645862751, "grad_norm": 3.6955646338424137, "learning_rate": 9.459923757913603e-08, "loss": 0.1877, "step": 20343 }, { "epoch": 2.8726348489127362, "grad_norm": 3.26625329909185, "learning_rate": 9.439016152826275e-08, "loss": 0.1396, "step": 20344 }, { "epoch": 2.872776051962722, "grad_norm": 4.23319242421209, "learning_rate": 9.418131567948352e-08, "loss": 0.1876, "step": 20345 }, { "epoch": 2.872917255012708, "grad_norm": 3.242569105256127, "learning_rate": 9.397270003765224e-08, "loss": 0.1494, "step": 20346 }, { "epoch": 2.873058458062694, "grad_norm": 4.096917325179497, "learning_rate": 9.376431460761725e-08, "loss": 0.216, "step": 20347 }, { "epoch": 2.8731996611126798, "grad_norm": 3.1564857961057653, "learning_rate": 9.355615939422135e-08, "loss": 0.1263, "step": 20348 }, { "epoch": 2.8733408641626657, "grad_norm": 4.059766000741964, "learning_rate": 9.334823440230289e-08, "loss": 0.1758, "step": 20349 }, { "epoch": 2.8734820672126515, "grad_norm": 3.5973189736570474, "learning_rate": 9.314053963669245e-08, "loss": 0.1472, "step": 20350 }, { "epoch": 2.8736232702626374, "grad_norm": 2.9132391805538536, "learning_rate": 9.293307510221727e-08, "loss": 0.1093, "step": 20351 }, { "epoch": 2.8737644733126233, "grad_norm": 2.5320790848846984, "learning_rate": 9.272584080370018e-08, "loss": 0.1163, "step": 20352 }, { "epoch": 2.873905676362609, "grad_norm": 3.0877767563428407, "learning_rate": 9.251883674595396e-08, "loss": 0.1359, "step": 20353 }, { "epoch": 2.874046879412595, "grad_norm": 3.332206041194482, "learning_rate": 9.231206293379257e-08, "loss": 0.1482, "step": 20354 }, { "epoch": 2.874188082462581, "grad_norm": 3.4669665810571444, "learning_rate": 9.210551937201995e-08, "loss": 0.1177, "step": 20355 }, { "epoch": 2.874329285512567, "grad_norm": 3.106761253166471, "learning_rate": 9.189920606543556e-08, "loss": 0.1564, "step": 20356 }, { "epoch": 2.8744704885625527, "grad_norm": 4.157905161075009, "learning_rate": 9.16931230188356e-08, "loss": 0.2154, "step": 20357 }, { "epoch": 2.8746116916125386, "grad_norm": 2.7239832959328583, "learning_rate": 9.148727023700731e-08, "loss": 0.1365, "step": 20358 }, { "epoch": 2.8747528946625245, "grad_norm": 2.513485938518859, "learning_rate": 9.12816477247358e-08, "loss": 0.1254, "step": 20359 }, { "epoch": 2.8748940977125104, "grad_norm": 2.9932638451657483, "learning_rate": 9.107625548679944e-08, "loss": 0.1122, "step": 20360 }, { "epoch": 2.8750353007624962, "grad_norm": 2.8616897601905236, "learning_rate": 9.087109352797329e-08, "loss": 0.1179, "step": 20361 }, { "epoch": 2.875176503812482, "grad_norm": 3.8571391579898586, "learning_rate": 9.066616185302246e-08, "loss": 0.1952, "step": 20362 }, { "epoch": 2.875317706862468, "grad_norm": 3.680556762866874, "learning_rate": 9.046146046670979e-08, "loss": 0.1741, "step": 20363 }, { "epoch": 2.875458909912454, "grad_norm": 3.525354760319563, "learning_rate": 9.025698937379368e-08, "loss": 0.1542, "step": 20364 }, { "epoch": 2.87560011296244, "grad_norm": 4.01415818337065, "learning_rate": 9.005274857902479e-08, "loss": 0.1551, "step": 20365 }, { "epoch": 2.8757413160124257, "grad_norm": 3.837528581370704, "learning_rate": 8.984873808715155e-08, "loss": 0.1694, "step": 20366 }, { "epoch": 2.8758825190624115, "grad_norm": 3.5817699900635573, "learning_rate": 8.96449579029135e-08, "loss": 0.1496, "step": 20367 }, { "epoch": 2.8760237221123974, "grad_norm": 3.3972088894365773, "learning_rate": 8.944140803104573e-08, "loss": 0.1422, "step": 20368 }, { "epoch": 2.8761649251623833, "grad_norm": 3.259956126192769, "learning_rate": 8.923808847628002e-08, "loss": 0.136, "step": 20369 }, { "epoch": 2.876306128212369, "grad_norm": 3.231640709198301, "learning_rate": 8.903499924334147e-08, "loss": 0.1316, "step": 20370 }, { "epoch": 2.876447331262355, "grad_norm": 3.152697892357744, "learning_rate": 8.883214033694964e-08, "loss": 0.1402, "step": 20371 }, { "epoch": 2.876588534312341, "grad_norm": 3.2566760114057156, "learning_rate": 8.862951176181744e-08, "loss": 0.1229, "step": 20372 }, { "epoch": 2.876729737362327, "grad_norm": 2.669408121753116, "learning_rate": 8.842711352265554e-08, "loss": 0.1084, "step": 20373 }, { "epoch": 2.8768709404123127, "grad_norm": 3.1378763379041774, "learning_rate": 8.822494562416684e-08, "loss": 0.1214, "step": 20374 }, { "epoch": 2.8770121434622986, "grad_norm": 2.713762352623823, "learning_rate": 8.80230080710498e-08, "loss": 0.1313, "step": 20375 }, { "epoch": 2.8771533465122845, "grad_norm": 3.5840606190416717, "learning_rate": 8.782130086799734e-08, "loss": 0.1936, "step": 20376 }, { "epoch": 2.8772945495622704, "grad_norm": 2.835259407698947, "learning_rate": 8.761982401969793e-08, "loss": 0.1267, "step": 20377 }, { "epoch": 2.8774357526122563, "grad_norm": 3.863618391732048, "learning_rate": 8.741857753083228e-08, "loss": 0.1983, "step": 20378 }, { "epoch": 2.877576955662242, "grad_norm": 3.50569147256025, "learning_rate": 8.721756140607885e-08, "loss": 0.1579, "step": 20379 }, { "epoch": 2.877718158712228, "grad_norm": 3.859532790382486, "learning_rate": 8.701677565010725e-08, "loss": 0.168, "step": 20380 }, { "epoch": 2.877859361762214, "grad_norm": 3.6965802488413875, "learning_rate": 8.681622026758485e-08, "loss": 0.1845, "step": 20381 }, { "epoch": 2.8780005648122, "grad_norm": 2.4554546170724625, "learning_rate": 8.661589526317238e-08, "loss": 0.1057, "step": 20382 }, { "epoch": 2.8781417678621857, "grad_norm": 3.2592594123177916, "learning_rate": 8.641580064152499e-08, "loss": 0.1567, "step": 20383 }, { "epoch": 2.8782829709121716, "grad_norm": 3.2083885887708887, "learning_rate": 8.621593640729343e-08, "loss": 0.1433, "step": 20384 }, { "epoch": 2.8784241739621574, "grad_norm": 3.2624157058203296, "learning_rate": 8.601630256512173e-08, "loss": 0.1804, "step": 20385 }, { "epoch": 2.8785653770121433, "grad_norm": 2.8779474363969806, "learning_rate": 8.581689911965063e-08, "loss": 0.1672, "step": 20386 }, { "epoch": 2.878706580062129, "grad_norm": 3.104528268880012, "learning_rate": 8.5617726075512e-08, "loss": 0.1257, "step": 20387 }, { "epoch": 2.878847783112115, "grad_norm": 3.257602854277619, "learning_rate": 8.541878343733656e-08, "loss": 0.1401, "step": 20388 }, { "epoch": 2.878988986162101, "grad_norm": 2.4494926906812355, "learning_rate": 8.522007120974617e-08, "loss": 0.1076, "step": 20389 }, { "epoch": 2.879130189212087, "grad_norm": 3.2663072041803245, "learning_rate": 8.502158939736049e-08, "loss": 0.1223, "step": 20390 }, { "epoch": 2.8792713922620727, "grad_norm": 3.1817376048543276, "learning_rate": 8.482333800479026e-08, "loss": 0.1455, "step": 20391 }, { "epoch": 2.8794125953120586, "grad_norm": 3.097772185608281, "learning_rate": 8.46253170366429e-08, "loss": 0.134, "step": 20392 }, { "epoch": 2.8795537983620445, "grad_norm": 3.1292624237295996, "learning_rate": 8.442752649752139e-08, "loss": 0.1228, "step": 20393 }, { "epoch": 2.8796950014120304, "grad_norm": 2.5528454392126716, "learning_rate": 8.422996639202318e-08, "loss": 0.1205, "step": 20394 }, { "epoch": 2.8798362044620163, "grad_norm": 3.362113758666691, "learning_rate": 8.403263672473793e-08, "loss": 0.1484, "step": 20395 }, { "epoch": 2.879977407512002, "grad_norm": 3.768971394779968, "learning_rate": 8.383553750025198e-08, "loss": 0.1479, "step": 20396 }, { "epoch": 2.880118610561988, "grad_norm": 2.7205561925490893, "learning_rate": 8.363866872314497e-08, "loss": 0.1122, "step": 20397 }, { "epoch": 2.880259813611974, "grad_norm": 3.3011194874606993, "learning_rate": 8.344203039799214e-08, "loss": 0.1551, "step": 20398 }, { "epoch": 2.88040101666196, "grad_norm": 3.0203151332107416, "learning_rate": 8.32456225293643e-08, "loss": 0.1566, "step": 20399 }, { "epoch": 2.8805422197119457, "grad_norm": 3.079257760107595, "learning_rate": 8.304944512182666e-08, "loss": 0.1291, "step": 20400 }, { "epoch": 2.8806834227619316, "grad_norm": 2.71234672167962, "learning_rate": 8.28534981799356e-08, "loss": 0.1089, "step": 20401 }, { "epoch": 2.8808246258119174, "grad_norm": 3.0497759462456657, "learning_rate": 8.265778170824746e-08, "loss": 0.135, "step": 20402 }, { "epoch": 2.8809658288619033, "grad_norm": 3.672926602840041, "learning_rate": 8.24622957113086e-08, "loss": 0.1964, "step": 20403 }, { "epoch": 2.881107031911889, "grad_norm": 3.480010937347849, "learning_rate": 8.226704019366427e-08, "loss": 0.1646, "step": 20404 }, { "epoch": 2.881248234961875, "grad_norm": 3.5029111691103036, "learning_rate": 8.207201515984975e-08, "loss": 0.1526, "step": 20405 }, { "epoch": 2.881389438011861, "grad_norm": 2.8028669015685903, "learning_rate": 8.187722061439806e-08, "loss": 0.1341, "step": 20406 }, { "epoch": 2.881530641061847, "grad_norm": 2.911891182187555, "learning_rate": 8.168265656183783e-08, "loss": 0.1536, "step": 20407 }, { "epoch": 2.8816718441118327, "grad_norm": 2.689927866483319, "learning_rate": 8.148832300668763e-08, "loss": 0.1645, "step": 20408 }, { "epoch": 2.8818130471618186, "grad_norm": 2.496662417374966, "learning_rate": 8.129421995346609e-08, "loss": 0.1086, "step": 20409 }, { "epoch": 2.8819542502118045, "grad_norm": 3.435291686232006, "learning_rate": 8.110034740668293e-08, "loss": 0.1445, "step": 20410 }, { "epoch": 2.8820954532617904, "grad_norm": 3.1480099953221212, "learning_rate": 8.090670537084455e-08, "loss": 0.1388, "step": 20411 }, { "epoch": 2.8822366563117763, "grad_norm": 3.3802958207009235, "learning_rate": 8.071329385045068e-08, "loss": 0.1186, "step": 20412 }, { "epoch": 2.882377859361762, "grad_norm": 3.0299613977686866, "learning_rate": 8.052011284999661e-08, "loss": 0.1635, "step": 20413 }, { "epoch": 2.882519062411748, "grad_norm": 2.797734570472381, "learning_rate": 8.032716237396987e-08, "loss": 0.1287, "step": 20414 }, { "epoch": 2.882660265461734, "grad_norm": 3.0674494840440403, "learning_rate": 8.013444242685686e-08, "loss": 0.1274, "step": 20415 }, { "epoch": 2.88280146851172, "grad_norm": 3.570408554229105, "learning_rate": 7.9941953013134e-08, "loss": 0.1341, "step": 20416 }, { "epoch": 2.8829426715617057, "grad_norm": 2.6160332332851692, "learning_rate": 7.974969413727773e-08, "loss": 0.1012, "step": 20417 }, { "epoch": 2.8830838746116916, "grad_norm": 3.1600160253748526, "learning_rate": 7.955766580375334e-08, "loss": 0.1219, "step": 20418 }, { "epoch": 2.8832250776616775, "grad_norm": 3.5516690128159762, "learning_rate": 7.936586801702507e-08, "loss": 0.1553, "step": 20419 }, { "epoch": 2.8833662807116633, "grad_norm": 3.201764644214429, "learning_rate": 7.91743007815493e-08, "loss": 0.1403, "step": 20420 }, { "epoch": 2.8835074837616492, "grad_norm": 3.4835164755404313, "learning_rate": 7.898296410177808e-08, "loss": 0.1588, "step": 20421 }, { "epoch": 2.883648686811635, "grad_norm": 2.669971832620358, "learning_rate": 7.879185798215894e-08, "loss": 0.1186, "step": 20422 }, { "epoch": 2.883789889861621, "grad_norm": 3.226234347399841, "learning_rate": 7.860098242713165e-08, "loss": 0.1292, "step": 20423 }, { "epoch": 2.883931092911607, "grad_norm": 3.6306433597131176, "learning_rate": 7.841033744113268e-08, "loss": 0.179, "step": 20424 }, { "epoch": 2.8840722959615928, "grad_norm": 3.288603361273725, "learning_rate": 7.821992302859405e-08, "loss": 0.1428, "step": 20425 }, { "epoch": 2.8842134990115786, "grad_norm": 3.4295717963947254, "learning_rate": 7.802973919393775e-08, "loss": 0.1485, "step": 20426 }, { "epoch": 2.8843547020615645, "grad_norm": 2.8880597218069433, "learning_rate": 7.783978594158581e-08, "loss": 0.149, "step": 20427 }, { "epoch": 2.8844959051115504, "grad_norm": 3.473708487842749, "learning_rate": 7.765006327595248e-08, "loss": 0.1715, "step": 20428 }, { "epoch": 2.8846371081615363, "grad_norm": 2.8780097829298894, "learning_rate": 7.746057120144757e-08, "loss": 0.1179, "step": 20429 }, { "epoch": 2.884778311211522, "grad_norm": 3.3571081234988167, "learning_rate": 7.727130972247199e-08, "loss": 0.1499, "step": 20430 }, { "epoch": 2.884919514261508, "grad_norm": 4.520239199347437, "learning_rate": 7.708227884342667e-08, "loss": 0.2158, "step": 20431 }, { "epoch": 2.885060717311494, "grad_norm": 3.2602143715118816, "learning_rate": 7.689347856870366e-08, "loss": 0.1592, "step": 20432 }, { "epoch": 2.88520192036148, "grad_norm": 2.792615806408365, "learning_rate": 7.670490890269055e-08, "loss": 0.1239, "step": 20433 }, { "epoch": 2.8853431234114657, "grad_norm": 3.317346808137323, "learning_rate": 7.651656984977051e-08, "loss": 0.1701, "step": 20434 }, { "epoch": 2.8854843264614516, "grad_norm": 3.3121621295514805, "learning_rate": 7.632846141432004e-08, "loss": 0.165, "step": 20435 }, { "epoch": 2.8856255295114375, "grad_norm": 3.627267679816503, "learning_rate": 7.614058360070897e-08, "loss": 0.1699, "step": 20436 }, { "epoch": 2.8857667325614234, "grad_norm": 2.541823423977845, "learning_rate": 7.595293641330714e-08, "loss": 0.1189, "step": 20437 }, { "epoch": 2.8859079356114092, "grad_norm": 3.371866772937369, "learning_rate": 7.576551985647107e-08, "loss": 0.1627, "step": 20438 }, { "epoch": 2.886049138661395, "grad_norm": 3.0140698138967434, "learning_rate": 7.557833393455838e-08, "loss": 0.146, "step": 20439 }, { "epoch": 2.886190341711381, "grad_norm": 3.3115059466649606, "learning_rate": 7.539137865192003e-08, "loss": 0.1246, "step": 20440 }, { "epoch": 2.886331544761367, "grad_norm": 2.5562396716782274, "learning_rate": 7.520465401290033e-08, "loss": 0.1155, "step": 20441 }, { "epoch": 2.8864727478113528, "grad_norm": 3.4525700310216636, "learning_rate": 7.501816002183803e-08, "loss": 0.1483, "step": 20442 }, { "epoch": 2.8866139508613387, "grad_norm": 3.246805829556253, "learning_rate": 7.483189668306635e-08, "loss": 0.1643, "step": 20443 }, { "epoch": 2.8867551539113245, "grad_norm": 3.036268791761304, "learning_rate": 7.464586400091623e-08, "loss": 0.1211, "step": 20444 }, { "epoch": 2.8868963569613104, "grad_norm": 2.8018187823484957, "learning_rate": 7.446006197970867e-08, "loss": 0.0977, "step": 20445 }, { "epoch": 2.8870375600112963, "grad_norm": 3.6775682429886585, "learning_rate": 7.427449062376468e-08, "loss": 0.1711, "step": 20446 }, { "epoch": 2.887178763061282, "grad_norm": 3.2024177711186086, "learning_rate": 7.408914993739303e-08, "loss": 0.1249, "step": 20447 }, { "epoch": 2.887319966111268, "grad_norm": 3.2107032137662115, "learning_rate": 7.390403992490358e-08, "loss": 0.1408, "step": 20448 }, { "epoch": 2.887461169161254, "grad_norm": 2.9117696689707167, "learning_rate": 7.371916059059847e-08, "loss": 0.126, "step": 20449 }, { "epoch": 2.88760237221124, "grad_norm": 2.8832625732627757, "learning_rate": 7.353451193877092e-08, "loss": 0.15, "step": 20450 }, { "epoch": 2.8877435752612257, "grad_norm": 3.393575700983024, "learning_rate": 7.33500939737164e-08, "loss": 0.2014, "step": 20451 }, { "epoch": 2.8878847783112116, "grad_norm": 3.5786111034923875, "learning_rate": 7.316590669971813e-08, "loss": 0.1421, "step": 20452 }, { "epoch": 2.8880259813611975, "grad_norm": 4.086293983587457, "learning_rate": 7.298195012105713e-08, "loss": 0.1734, "step": 20453 }, { "epoch": 2.8881671844111834, "grad_norm": 3.2527230166278374, "learning_rate": 7.279822424200889e-08, "loss": 0.1261, "step": 20454 }, { "epoch": 2.8883083874611692, "grad_norm": 3.074007960174228, "learning_rate": 7.261472906684108e-08, "loss": 0.1408, "step": 20455 }, { "epoch": 2.888449590511155, "grad_norm": 4.259213897994005, "learning_rate": 7.243146459982142e-08, "loss": 0.1841, "step": 20456 }, { "epoch": 2.888590793561141, "grad_norm": 2.9888962442678344, "learning_rate": 7.224843084520649e-08, "loss": 0.1443, "step": 20457 }, { "epoch": 2.888731996611127, "grad_norm": 3.174048788913956, "learning_rate": 7.206562780725068e-08, "loss": 0.165, "step": 20458 }, { "epoch": 2.888873199661113, "grad_norm": 3.349364519204902, "learning_rate": 7.18830554902017e-08, "loss": 0.1373, "step": 20459 }, { "epoch": 2.8890144027110987, "grad_norm": 3.729323360519263, "learning_rate": 7.17007138983028e-08, "loss": 0.1566, "step": 20460 }, { "epoch": 2.8891556057610845, "grad_norm": 3.0511628559730744, "learning_rate": 7.151860303579283e-08, "loss": 0.1425, "step": 20461 }, { "epoch": 2.8892968088110704, "grad_norm": 4.625338837018194, "learning_rate": 7.133672290690064e-08, "loss": 0.159, "step": 20462 }, { "epoch": 2.8894380118610563, "grad_norm": 3.0466701860429115, "learning_rate": 7.115507351585727e-08, "loss": 0.1335, "step": 20463 }, { "epoch": 2.889579214911042, "grad_norm": 3.539604967695227, "learning_rate": 7.097365486688158e-08, "loss": 0.1634, "step": 20464 }, { "epoch": 2.889720417961028, "grad_norm": 2.755663442756148, "learning_rate": 7.079246696418906e-08, "loss": 0.1742, "step": 20465 }, { "epoch": 2.889861621011014, "grad_norm": 3.410707496183685, "learning_rate": 7.061150981199194e-08, "loss": 0.1562, "step": 20466 }, { "epoch": 2.890002824061, "grad_norm": 2.9440052357002893, "learning_rate": 7.043078341449572e-08, "loss": 0.1179, "step": 20467 }, { "epoch": 2.8901440271109857, "grad_norm": 3.5253782786270595, "learning_rate": 7.025028777589926e-08, "loss": 0.1769, "step": 20468 }, { "epoch": 2.8902852301609716, "grad_norm": 4.939303099223028, "learning_rate": 7.00700229003981e-08, "loss": 0.2284, "step": 20469 }, { "epoch": 2.8904264332109575, "grad_norm": 3.1588728121618685, "learning_rate": 6.988998879218111e-08, "loss": 0.1413, "step": 20470 }, { "epoch": 2.8905676362609434, "grad_norm": 2.8564319047344098, "learning_rate": 6.971018545543273e-08, "loss": 0.1428, "step": 20471 }, { "epoch": 2.8907088393109293, "grad_norm": 2.246405780771575, "learning_rate": 6.953061289433072e-08, "loss": 0.0955, "step": 20472 }, { "epoch": 2.890850042360915, "grad_norm": 3.268887350689115, "learning_rate": 6.93512711130484e-08, "loss": 0.1682, "step": 20473 }, { "epoch": 2.890991245410901, "grad_norm": 3.3161696418291466, "learning_rate": 6.917216011575357e-08, "loss": 0.1638, "step": 20474 }, { "epoch": 2.891132448460887, "grad_norm": 4.039205924603781, "learning_rate": 6.899327990660953e-08, "loss": 0.1999, "step": 20475 }, { "epoch": 2.891273651510873, "grad_norm": 3.2633857839496, "learning_rate": 6.8814630489773e-08, "loss": 0.1535, "step": 20476 }, { "epoch": 2.8914148545608587, "grad_norm": 3.2603687812863074, "learning_rate": 6.863621186939506e-08, "loss": 0.1216, "step": 20477 }, { "epoch": 2.8915560576108446, "grad_norm": 3.080963820471434, "learning_rate": 6.845802404962243e-08, "loss": 0.1097, "step": 20478 }, { "epoch": 2.8916972606608304, "grad_norm": 3.238059986106422, "learning_rate": 6.828006703459622e-08, "loss": 0.1538, "step": 20479 }, { "epoch": 2.8918384637108163, "grad_norm": 2.7824932750983304, "learning_rate": 6.810234082845313e-08, "loss": 0.1189, "step": 20480 }, { "epoch": 2.891979666760802, "grad_norm": 2.767215899042566, "learning_rate": 6.792484543532096e-08, "loss": 0.1277, "step": 20481 }, { "epoch": 2.892120869810788, "grad_norm": 3.0587162095894596, "learning_rate": 6.774758085932642e-08, "loss": 0.1715, "step": 20482 }, { "epoch": 2.892262072860774, "grad_norm": 3.0856715704051294, "learning_rate": 6.757054710458955e-08, "loss": 0.1154, "step": 20483 }, { "epoch": 2.89240327591076, "grad_norm": 3.7429884957504793, "learning_rate": 6.73937441752226e-08, "loss": 0.1335, "step": 20484 }, { "epoch": 2.8925444789607457, "grad_norm": 2.9677825162101588, "learning_rate": 6.721717207533563e-08, "loss": 0.1112, "step": 20485 }, { "epoch": 2.8926856820107316, "grad_norm": 2.6400047096170787, "learning_rate": 6.704083080903201e-08, "loss": 0.1257, "step": 20486 }, { "epoch": 2.8928268850607175, "grad_norm": 2.7548952586345603, "learning_rate": 6.68647203804107e-08, "loss": 0.1141, "step": 20487 }, { "epoch": 2.8929680881107034, "grad_norm": 3.5257397342755192, "learning_rate": 6.668884079356287e-08, "loss": 0.1751, "step": 20488 }, { "epoch": 2.8931092911606893, "grad_norm": 3.4157519565920924, "learning_rate": 6.651319205257633e-08, "loss": 0.1406, "step": 20489 }, { "epoch": 2.893250494210675, "grad_norm": 2.9373819735385043, "learning_rate": 6.633777416153232e-08, "loss": 0.1074, "step": 20490 }, { "epoch": 2.893391697260661, "grad_norm": 2.6086504842958833, "learning_rate": 6.616258712450973e-08, "loss": 0.1406, "step": 20491 }, { "epoch": 2.893532900310647, "grad_norm": 2.353247234724596, "learning_rate": 6.59876309455787e-08, "loss": 0.1016, "step": 20492 }, { "epoch": 2.893674103360633, "grad_norm": 3.2037996857375735, "learning_rate": 6.581290562880372e-08, "loss": 0.1406, "step": 20493 }, { "epoch": 2.8938153064106187, "grad_norm": 3.067531594511451, "learning_rate": 6.56384111782482e-08, "loss": 0.1637, "step": 20494 }, { "epoch": 2.8939565094606046, "grad_norm": 2.9093622091328597, "learning_rate": 6.546414759796448e-08, "loss": 0.1209, "step": 20495 }, { "epoch": 2.8940977125105904, "grad_norm": 3.3240384152724465, "learning_rate": 6.529011489200377e-08, "loss": 0.117, "step": 20496 }, { "epoch": 2.8942389155605763, "grad_norm": 3.3032984332072552, "learning_rate": 6.511631306441058e-08, "loss": 0.1687, "step": 20497 }, { "epoch": 2.894380118610562, "grad_norm": 3.1027684887947524, "learning_rate": 6.494274211922392e-08, "loss": 0.1489, "step": 20498 }, { "epoch": 2.894521321660548, "grad_norm": 3.075119500500554, "learning_rate": 6.476940206047722e-08, "loss": 0.1738, "step": 20499 }, { "epoch": 2.894662524710534, "grad_norm": 3.491713722629365, "learning_rate": 6.459629289219838e-08, "loss": 0.1576, "step": 20500 }, { "epoch": 2.89480372776052, "grad_norm": 3.023544705036932, "learning_rate": 6.442341461841084e-08, "loss": 0.1374, "step": 20501 }, { "epoch": 2.8949449308105057, "grad_norm": 2.728131576731426, "learning_rate": 6.425076724313251e-08, "loss": 0.0948, "step": 20502 }, { "epoch": 2.8950861338604916, "grad_norm": 4.135358863271494, "learning_rate": 6.407835077037572e-08, "loss": 0.153, "step": 20503 }, { "epoch": 2.8952273369104775, "grad_norm": 2.870204425534631, "learning_rate": 6.390616520414617e-08, "loss": 0.1301, "step": 20504 }, { "epoch": 2.8953685399604634, "grad_norm": 2.708127117657482, "learning_rate": 6.373421054844842e-08, "loss": 0.105, "step": 20505 }, { "epoch": 2.8955097430104493, "grad_norm": 3.4564497029643535, "learning_rate": 6.356248680727484e-08, "loss": 0.1468, "step": 20506 }, { "epoch": 2.895650946060435, "grad_norm": 3.3600931621531567, "learning_rate": 6.339099398461778e-08, "loss": 0.1108, "step": 20507 }, { "epoch": 2.895792149110421, "grad_norm": 3.135136934310426, "learning_rate": 6.321973208446298e-08, "loss": 0.1202, "step": 20508 }, { "epoch": 2.895933352160407, "grad_norm": 2.8108124745935563, "learning_rate": 6.304870111079053e-08, "loss": 0.1137, "step": 20509 }, { "epoch": 2.896074555210393, "grad_norm": 2.092923482290512, "learning_rate": 6.287790106757396e-08, "loss": 0.0885, "step": 20510 }, { "epoch": 2.8962157582603787, "grad_norm": 3.0261933405427897, "learning_rate": 6.270733195878454e-08, "loss": 0.1096, "step": 20511 }, { "epoch": 2.8963569613103646, "grad_norm": 2.8067009564149363, "learning_rate": 6.253699378838462e-08, "loss": 0.1278, "step": 20512 }, { "epoch": 2.8964981643603505, "grad_norm": 3.555494746221602, "learning_rate": 6.23668865603333e-08, "loss": 0.151, "step": 20513 }, { "epoch": 2.896639367410336, "grad_norm": 2.702676450591189, "learning_rate": 6.219701027858405e-08, "loss": 0.1288, "step": 20514 }, { "epoch": 2.896780570460322, "grad_norm": 3.345827631928737, "learning_rate": 6.202736494708484e-08, "loss": 0.1359, "step": 20515 }, { "epoch": 2.8969217735103077, "grad_norm": 2.8478107297888267, "learning_rate": 6.185795056977695e-08, "loss": 0.1091, "step": 20516 }, { "epoch": 2.8970629765602935, "grad_norm": 2.8027055545669914, "learning_rate": 6.168876715059835e-08, "loss": 0.1194, "step": 20517 }, { "epoch": 2.8972041796102794, "grad_norm": 2.5689122611176476, "learning_rate": 6.151981469348034e-08, "loss": 0.1111, "step": 20518 }, { "epoch": 2.8973453826602653, "grad_norm": 3.0549548478960116, "learning_rate": 6.135109320235089e-08, "loss": 0.1278, "step": 20519 }, { "epoch": 2.897486585710251, "grad_norm": 2.6359749845202782, "learning_rate": 6.118260268112908e-08, "loss": 0.1282, "step": 20520 }, { "epoch": 2.897627788760237, "grad_norm": 4.077794127699537, "learning_rate": 6.101434313373178e-08, "loss": 0.1514, "step": 20521 }, { "epoch": 2.897768991810223, "grad_norm": 3.524269086398971, "learning_rate": 6.084631456406919e-08, "loss": 0.1549, "step": 20522 }, { "epoch": 2.897910194860209, "grad_norm": 2.458890944174128, "learning_rate": 6.067851697604599e-08, "loss": 0.0995, "step": 20523 }, { "epoch": 2.8980513979101947, "grad_norm": 3.5448931519412525, "learning_rate": 6.051095037356013e-08, "loss": 0.1395, "step": 20524 }, { "epoch": 2.8981926009601806, "grad_norm": 2.516123939259161, "learning_rate": 6.034361476050854e-08, "loss": 0.0996, "step": 20525 }, { "epoch": 2.8983338040101665, "grad_norm": 3.6325705908781725, "learning_rate": 6.017651014077807e-08, "loss": 0.1291, "step": 20526 }, { "epoch": 2.8984750070601524, "grad_norm": 2.785311381226458, "learning_rate": 6.000963651825343e-08, "loss": 0.103, "step": 20527 }, { "epoch": 2.8986162101101383, "grad_norm": 3.014158136556519, "learning_rate": 5.984299389681148e-08, "loss": 0.1028, "step": 20528 }, { "epoch": 2.898757413160124, "grad_norm": 4.469046889064139, "learning_rate": 5.96765822803258e-08, "loss": 0.1745, "step": 20529 }, { "epoch": 2.89889861621011, "grad_norm": 4.261606787965814, "learning_rate": 5.951040167266331e-08, "loss": 0.2166, "step": 20530 }, { "epoch": 2.899039819260096, "grad_norm": 2.8366535666458903, "learning_rate": 5.9344452077686464e-08, "loss": 0.1137, "step": 20531 }, { "epoch": 2.899181022310082, "grad_norm": 2.9425002978559354, "learning_rate": 5.9178733499251073e-08, "loss": 0.1232, "step": 20532 }, { "epoch": 2.8993222253600677, "grad_norm": 3.510458337353858, "learning_rate": 5.9013245941209606e-08, "loss": 0.1416, "step": 20533 }, { "epoch": 2.8994634284100536, "grad_norm": 3.185630911202359, "learning_rate": 5.884798940740566e-08, "loss": 0.1271, "step": 20534 }, { "epoch": 2.8996046314600394, "grad_norm": 4.253389590310115, "learning_rate": 5.868296390168282e-08, "loss": 0.2083, "step": 20535 }, { "epoch": 2.8997458345100253, "grad_norm": 2.916721573396054, "learning_rate": 5.8518169427873584e-08, "loss": 0.1247, "step": 20536 }, { "epoch": 2.899887037560011, "grad_norm": 3.230876002981545, "learning_rate": 5.8353605989808216e-08, "loss": 0.1226, "step": 20537 }, { "epoch": 2.900028240609997, "grad_norm": 3.6281781335381953, "learning_rate": 5.8189273591312546e-08, "loss": 0.2213, "step": 20538 }, { "epoch": 2.900169443659983, "grad_norm": 2.689572629520733, "learning_rate": 5.802517223620463e-08, "loss": 0.1362, "step": 20539 }, { "epoch": 2.900310646709969, "grad_norm": 3.5437054851037924, "learning_rate": 5.786130192829809e-08, "loss": 0.124, "step": 20540 }, { "epoch": 2.9004518497599547, "grad_norm": 3.1720214747014523, "learning_rate": 5.769766267140098e-08, "loss": 0.1214, "step": 20541 }, { "epoch": 2.9005930528099406, "grad_norm": 3.722765653954463, "learning_rate": 5.753425446931582e-08, "loss": 0.1467, "step": 20542 }, { "epoch": 2.9007342558599265, "grad_norm": 3.508121745691823, "learning_rate": 5.7371077325841795e-08, "loss": 0.154, "step": 20543 }, { "epoch": 2.9008754589099124, "grad_norm": 3.211094480738291, "learning_rate": 5.7208131244769206e-08, "loss": 0.1527, "step": 20544 }, { "epoch": 2.9010166619598983, "grad_norm": 3.3009171935040826, "learning_rate": 5.704541622988613e-08, "loss": 0.1512, "step": 20545 }, { "epoch": 2.901157865009884, "grad_norm": 3.560123632873036, "learning_rate": 5.688293228497399e-08, "loss": 0.146, "step": 20546 }, { "epoch": 2.90129906805987, "grad_norm": 3.3610064312673034, "learning_rate": 5.6720679413807546e-08, "loss": 0.1418, "step": 20547 }, { "epoch": 2.901440271109856, "grad_norm": 2.6780967734068053, "learning_rate": 5.655865762015822e-08, "loss": 0.1374, "step": 20548 }, { "epoch": 2.901581474159842, "grad_norm": 2.8386528058332856, "learning_rate": 5.6396866907791893e-08, "loss": 0.1518, "step": 20549 }, { "epoch": 2.9017226772098277, "grad_norm": 3.5994912872353693, "learning_rate": 5.623530728046889e-08, "loss": 0.1468, "step": 20550 }, { "epoch": 2.9018638802598136, "grad_norm": 3.373388226146045, "learning_rate": 5.607397874194176e-08, "loss": 0.1266, "step": 20551 }, { "epoch": 2.9020050833097994, "grad_norm": 3.2965930174226448, "learning_rate": 5.591288129596195e-08, "loss": 0.1574, "step": 20552 }, { "epoch": 2.9021462863597853, "grad_norm": 3.47342503135177, "learning_rate": 5.575201494627092e-08, "loss": 0.166, "step": 20553 }, { "epoch": 2.902287489409771, "grad_norm": 3.718683682231933, "learning_rate": 5.559137969660899e-08, "loss": 0.1697, "step": 20554 }, { "epoch": 2.902428692459757, "grad_norm": 3.0037856051481095, "learning_rate": 5.543097555070875e-08, "loss": 0.1389, "step": 20555 }, { "epoch": 2.902569895509743, "grad_norm": 3.7801174401943785, "learning_rate": 5.527080251229833e-08, "loss": 0.1817, "step": 20556 }, { "epoch": 2.902711098559729, "grad_norm": 3.805432416613619, "learning_rate": 5.511086058510029e-08, "loss": 0.1742, "step": 20557 }, { "epoch": 2.9028523016097147, "grad_norm": 3.187896854003314, "learning_rate": 5.495114977282945e-08, "loss": 0.1447, "step": 20558 }, { "epoch": 2.9029935046597006, "grad_norm": 2.0275498532588063, "learning_rate": 5.47916700791995e-08, "loss": 0.078, "step": 20559 }, { "epoch": 2.9031347077096865, "grad_norm": 3.2328042519877855, "learning_rate": 5.4632421507916366e-08, "loss": 0.1228, "step": 20560 }, { "epoch": 2.9032759107596724, "grad_norm": 3.760951903687996, "learning_rate": 5.447340406268042e-08, "loss": 0.1724, "step": 20561 }, { "epoch": 2.9034171138096583, "grad_norm": 3.4254710868456293, "learning_rate": 5.43146177471876e-08, "loss": 0.1647, "step": 20562 }, { "epoch": 2.903558316859644, "grad_norm": 2.913810170412307, "learning_rate": 5.4156062565128284e-08, "loss": 0.1206, "step": 20563 }, { "epoch": 2.90369951990963, "grad_norm": 2.77004274528071, "learning_rate": 5.3997738520186194e-08, "loss": 0.1112, "step": 20564 }, { "epoch": 2.903840722959616, "grad_norm": 3.633839262611724, "learning_rate": 5.3839645616041716e-08, "loss": 0.1905, "step": 20565 }, { "epoch": 2.903981926009602, "grad_norm": 2.5640380386202066, "learning_rate": 5.3681783856367466e-08, "loss": 0.1067, "step": 20566 }, { "epoch": 2.9041231290595877, "grad_norm": 2.6716682409252104, "learning_rate": 5.3524153244833844e-08, "loss": 0.1169, "step": 20567 }, { "epoch": 2.9042643321095736, "grad_norm": 3.4311303208707002, "learning_rate": 5.336675378510348e-08, "loss": 0.1808, "step": 20568 }, { "epoch": 2.9044055351595595, "grad_norm": 3.0770370668437153, "learning_rate": 5.3209585480834546e-08, "loss": 0.1479, "step": 20569 }, { "epoch": 2.9045467382095453, "grad_norm": 2.82957882467653, "learning_rate": 5.305264833567747e-08, "loss": 0.1381, "step": 20570 }, { "epoch": 2.9046879412595312, "grad_norm": 2.9235539149470755, "learning_rate": 5.289594235328266e-08, "loss": 0.1247, "step": 20571 }, { "epoch": 2.904829144309517, "grad_norm": 3.069535426301139, "learning_rate": 5.273946753728942e-08, "loss": 0.1348, "step": 20572 }, { "epoch": 2.904970347359503, "grad_norm": 2.5431649632960496, "learning_rate": 5.2583223891335963e-08, "loss": 0.1154, "step": 20573 }, { "epoch": 2.905111550409489, "grad_norm": 2.792071088708111, "learning_rate": 5.2427211419051605e-08, "loss": 0.1085, "step": 20574 }, { "epoch": 2.9052527534594748, "grad_norm": 3.111142221769998, "learning_rate": 5.227143012406344e-08, "loss": 0.132, "step": 20575 }, { "epoch": 2.9053939565094606, "grad_norm": 3.012555053401586, "learning_rate": 5.2115880009990796e-08, "loss": 0.1299, "step": 20576 }, { "epoch": 2.9055351595594465, "grad_norm": 3.585818064677974, "learning_rate": 5.1960561080448556e-08, "loss": 0.1811, "step": 20577 }, { "epoch": 2.9056763626094324, "grad_norm": 3.6598738957239303, "learning_rate": 5.1805473339047173e-08, "loss": 0.1452, "step": 20578 }, { "epoch": 2.9058175656594183, "grad_norm": 3.190465222885676, "learning_rate": 5.165061678939043e-08, "loss": 0.1712, "step": 20579 }, { "epoch": 2.905958768709404, "grad_norm": 4.6852118542538275, "learning_rate": 5.1495991435076555e-08, "loss": 0.2239, "step": 20580 }, { "epoch": 2.90609997175939, "grad_norm": 2.5084334614923836, "learning_rate": 5.134159727970045e-08, "loss": 0.077, "step": 20581 }, { "epoch": 2.906241174809376, "grad_norm": 3.7488561526009287, "learning_rate": 5.118743432684814e-08, "loss": 0.1459, "step": 20582 }, { "epoch": 2.906382377859362, "grad_norm": 3.0030546666322824, "learning_rate": 5.1033502580103424e-08, "loss": 0.1444, "step": 20583 }, { "epoch": 2.9065235809093477, "grad_norm": 3.1552555327796172, "learning_rate": 5.0879802043043434e-08, "loss": 0.1241, "step": 20584 }, { "epoch": 2.9066647839593336, "grad_norm": 3.012337417866576, "learning_rate": 5.0726332719240875e-08, "loss": 0.1334, "step": 20585 }, { "epoch": 2.9068059870093195, "grad_norm": 2.9748743749248723, "learning_rate": 5.0573094612260676e-08, "loss": 0.1298, "step": 20586 }, { "epoch": 2.9069471900593054, "grad_norm": 3.933147051744183, "learning_rate": 5.0420087725664424e-08, "loss": 0.1597, "step": 20587 }, { "epoch": 2.9070883931092912, "grad_norm": 2.659026973823948, "learning_rate": 5.0267312063009275e-08, "loss": 0.1261, "step": 20588 }, { "epoch": 2.907229596159277, "grad_norm": 3.8984229382983395, "learning_rate": 5.011476762784462e-08, "loss": 0.1396, "step": 20589 }, { "epoch": 2.907370799209263, "grad_norm": 3.445395458874534, "learning_rate": 4.99624544237165e-08, "loss": 0.1649, "step": 20590 }, { "epoch": 2.907512002259249, "grad_norm": 3.8573817686669156, "learning_rate": 4.9810372454163204e-08, "loss": 0.1699, "step": 20591 }, { "epoch": 2.9076532053092348, "grad_norm": 3.1669987064714795, "learning_rate": 4.9658521722719676e-08, "loss": 0.1478, "step": 20592 }, { "epoch": 2.9077944083592207, "grad_norm": 3.3638835504309457, "learning_rate": 4.950690223291532e-08, "loss": 0.1473, "step": 20593 }, { "epoch": 2.9079356114092065, "grad_norm": 2.448279382540172, "learning_rate": 4.9355513988272874e-08, "loss": 0.119, "step": 20594 }, { "epoch": 2.9080768144591924, "grad_norm": 3.065077446080416, "learning_rate": 4.920435699231063e-08, "loss": 0.1366, "step": 20595 }, { "epoch": 2.9082180175091783, "grad_norm": 3.0248104246976135, "learning_rate": 4.905343124854245e-08, "loss": 0.1252, "step": 20596 }, { "epoch": 2.908359220559164, "grad_norm": 3.112156540386536, "learning_rate": 4.890273676047441e-08, "loss": 0.1782, "step": 20597 }, { "epoch": 2.90850042360915, "grad_norm": 3.4431935218503615, "learning_rate": 4.8752273531609276e-08, "loss": 0.1335, "step": 20598 }, { "epoch": 2.908641626659136, "grad_norm": 2.4756058760101745, "learning_rate": 4.860204156544423e-08, "loss": 0.1018, "step": 20599 }, { "epoch": 2.908782829709122, "grad_norm": 3.024293720603294, "learning_rate": 4.845204086546984e-08, "loss": 0.1285, "step": 20600 }, { "epoch": 2.9089240327591077, "grad_norm": 3.3369630983548104, "learning_rate": 4.8302271435172185e-08, "loss": 0.1658, "step": 20601 }, { "epoch": 2.9090652358090936, "grad_norm": 3.189440499259207, "learning_rate": 4.815273327803183e-08, "loss": 0.128, "step": 20602 }, { "epoch": 2.9092064388590795, "grad_norm": 4.185070068262172, "learning_rate": 4.800342639752376e-08, "loss": 0.1894, "step": 20603 }, { "epoch": 2.9093476419090654, "grad_norm": 2.794209858605379, "learning_rate": 4.785435079711853e-08, "loss": 0.1342, "step": 20604 }, { "epoch": 2.9094888449590512, "grad_norm": 3.0896557774257465, "learning_rate": 4.770550648028005e-08, "loss": 0.1591, "step": 20605 }, { "epoch": 2.909630048009037, "grad_norm": 3.068451601135974, "learning_rate": 4.7556893450466656e-08, "loss": 0.1225, "step": 20606 }, { "epoch": 2.909771251059023, "grad_norm": 3.143305517524094, "learning_rate": 4.7408511711134476e-08, "loss": 0.1341, "step": 20607 }, { "epoch": 2.909912454109009, "grad_norm": 2.872643059985098, "learning_rate": 4.7260361265729635e-08, "loss": 0.1242, "step": 20608 }, { "epoch": 2.9100536571589948, "grad_norm": 3.3369244685605635, "learning_rate": 4.7112442117696056e-08, "loss": 0.1608, "step": 20609 }, { "epoch": 2.9101948602089807, "grad_norm": 2.542858835176921, "learning_rate": 4.696475427047098e-08, "loss": 0.1075, "step": 20610 }, { "epoch": 2.9103360632589665, "grad_norm": 3.074874785051404, "learning_rate": 4.681729772748611e-08, "loss": 0.116, "step": 20611 }, { "epoch": 2.9104772663089524, "grad_norm": 2.568794795805062, "learning_rate": 4.66700724921687e-08, "loss": 0.1251, "step": 20612 }, { "epoch": 2.9106184693589383, "grad_norm": 3.548891185444923, "learning_rate": 4.6523078567941584e-08, "loss": 0.1394, "step": 20613 }, { "epoch": 2.910759672408924, "grad_norm": 3.4916704760749617, "learning_rate": 4.6376315958218676e-08, "loss": 0.179, "step": 20614 }, { "epoch": 2.91090087545891, "grad_norm": 3.079411566346087, "learning_rate": 4.622978466641059e-08, "loss": 0.1381, "step": 20615 }, { "epoch": 2.911042078508896, "grad_norm": 4.009749453657712, "learning_rate": 4.608348469592461e-08, "loss": 0.1848, "step": 20616 }, { "epoch": 2.9111832815588814, "grad_norm": 2.5861744635033, "learning_rate": 4.593741605016022e-08, "loss": 0.1272, "step": 20617 }, { "epoch": 2.9113244846088673, "grad_norm": 3.8874165149774735, "learning_rate": 4.5791578732510276e-08, "loss": 0.1574, "step": 20618 }, { "epoch": 2.911465687658853, "grad_norm": 3.7686938576421705, "learning_rate": 4.5645972746366504e-08, "loss": 0.2157, "step": 20619 }, { "epoch": 2.911606890708839, "grad_norm": 2.4213737039995946, "learning_rate": 4.5500598095110645e-08, "loss": 0.1254, "step": 20620 }, { "epoch": 2.911748093758825, "grad_norm": 3.371569193850073, "learning_rate": 4.535545478212111e-08, "loss": 0.1302, "step": 20621 }, { "epoch": 2.911889296808811, "grad_norm": 4.105715252389308, "learning_rate": 4.5210542810771864e-08, "loss": 0.177, "step": 20622 }, { "epoch": 2.9120304998587967, "grad_norm": 3.7299333545776747, "learning_rate": 4.5065862184431316e-08, "loss": 0.1319, "step": 20623 }, { "epoch": 2.9121717029087826, "grad_norm": 3.946706279814059, "learning_rate": 4.492141290646124e-08, "loss": 0.1511, "step": 20624 }, { "epoch": 2.9123129059587685, "grad_norm": 3.470777075526393, "learning_rate": 4.477719498021782e-08, "loss": 0.1478, "step": 20625 }, { "epoch": 2.9124541090087543, "grad_norm": 2.9124860624509914, "learning_rate": 4.463320840905283e-08, "loss": 0.1345, "step": 20626 }, { "epoch": 2.9125953120587402, "grad_norm": 2.888242426750076, "learning_rate": 4.448945319631248e-08, "loss": 0.1295, "step": 20627 }, { "epoch": 2.912736515108726, "grad_norm": 3.668216460436277, "learning_rate": 4.4345929345337436e-08, "loss": 0.1344, "step": 20628 }, { "epoch": 2.912877718158712, "grad_norm": 3.0647255760013583, "learning_rate": 4.420263685946391e-08, "loss": 0.15, "step": 20629 }, { "epoch": 2.913018921208698, "grad_norm": 3.4077466793394797, "learning_rate": 4.405957574202147e-08, "loss": 0.1246, "step": 20630 }, { "epoch": 2.9131601242586838, "grad_norm": 3.1885507043893395, "learning_rate": 4.391674599633522e-08, "loss": 0.1644, "step": 20631 }, { "epoch": 2.9133013273086696, "grad_norm": 3.65860796835861, "learning_rate": 4.377414762572474e-08, "loss": 0.1784, "step": 20632 }, { "epoch": 2.9134425303586555, "grad_norm": 3.069461625502871, "learning_rate": 4.363178063350182e-08, "loss": 0.1368, "step": 20633 }, { "epoch": 2.9135837334086414, "grad_norm": 2.7567457953426335, "learning_rate": 4.348964502297714e-08, "loss": 0.1208, "step": 20634 }, { "epoch": 2.9137249364586273, "grad_norm": 2.7005781743177604, "learning_rate": 4.334774079745363e-08, "loss": 0.1408, "step": 20635 }, { "epoch": 2.913866139508613, "grad_norm": 3.0146244281751713, "learning_rate": 4.320606796022864e-08, "loss": 0.1615, "step": 20636 }, { "epoch": 2.914007342558599, "grad_norm": 3.4123430480684935, "learning_rate": 4.306462651459398e-08, "loss": 0.1468, "step": 20637 }, { "epoch": 2.914148545608585, "grad_norm": 2.702399196490913, "learning_rate": 4.292341646383813e-08, "loss": 0.1259, "step": 20638 }, { "epoch": 2.914289748658571, "grad_norm": 3.548458024606317, "learning_rate": 4.278243781124069e-08, "loss": 0.1612, "step": 20639 }, { "epoch": 2.9144309517085567, "grad_norm": 3.1274009864496817, "learning_rate": 4.264169056008016e-08, "loss": 0.1338, "step": 20640 }, { "epoch": 2.9145721547585426, "grad_norm": 3.8548837279296135, "learning_rate": 4.250117471362614e-08, "loss": 0.1303, "step": 20641 }, { "epoch": 2.9147133578085285, "grad_norm": 3.6116792849151427, "learning_rate": 4.23608902751449e-08, "loss": 0.1382, "step": 20642 }, { "epoch": 2.9148545608585144, "grad_norm": 2.9574634916319256, "learning_rate": 4.222083724789605e-08, "loss": 0.1178, "step": 20643 }, { "epoch": 2.9149957639085002, "grad_norm": 2.909926077374777, "learning_rate": 4.208101563513367e-08, "loss": 0.1305, "step": 20644 }, { "epoch": 2.915136966958486, "grad_norm": 3.3274497252526922, "learning_rate": 4.1941425440108484e-08, "loss": 0.1734, "step": 20645 }, { "epoch": 2.915278170008472, "grad_norm": 3.2982580975748683, "learning_rate": 4.1802066666064564e-08, "loss": 0.1617, "step": 20646 }, { "epoch": 2.915419373058458, "grad_norm": 2.7671825821532794, "learning_rate": 4.166293931624044e-08, "loss": 0.1181, "step": 20647 }, { "epoch": 2.9155605761084438, "grad_norm": 2.5012952267238893, "learning_rate": 4.152404339386795e-08, "loss": 0.1176, "step": 20648 }, { "epoch": 2.9157017791584297, "grad_norm": 2.995845114732307, "learning_rate": 4.1385378902175644e-08, "loss": 0.1525, "step": 20649 }, { "epoch": 2.9158429822084155, "grad_norm": 3.1210798974708114, "learning_rate": 4.1246945844387595e-08, "loss": 0.1441, "step": 20650 }, { "epoch": 2.9159841852584014, "grad_norm": 3.5658370907133, "learning_rate": 4.110874422371902e-08, "loss": 0.1688, "step": 20651 }, { "epoch": 2.9161253883083873, "grad_norm": 3.014379952635346, "learning_rate": 4.097077404338179e-08, "loss": 0.138, "step": 20652 }, { "epoch": 2.916266591358373, "grad_norm": 2.8224202474785347, "learning_rate": 4.083303530658334e-08, "loss": 0.1542, "step": 20653 }, { "epoch": 2.916407794408359, "grad_norm": 4.142879778442981, "learning_rate": 4.069552801652443e-08, "loss": 0.1627, "step": 20654 }, { "epoch": 2.916548997458345, "grad_norm": 3.2803040965345387, "learning_rate": 4.0558252176399196e-08, "loss": 0.1616, "step": 20655 }, { "epoch": 2.916690200508331, "grad_norm": 3.0218104968711583, "learning_rate": 4.042120778939951e-08, "loss": 0.1472, "step": 20656 }, { "epoch": 2.9168314035583167, "grad_norm": 2.811577288548532, "learning_rate": 4.0284394858710605e-08, "loss": 0.1463, "step": 20657 }, { "epoch": 2.9169726066083026, "grad_norm": 4.012848255537006, "learning_rate": 4.014781338751106e-08, "loss": 0.1628, "step": 20658 }, { "epoch": 2.9171138096582885, "grad_norm": 3.4371211572305826, "learning_rate": 4.001146337897388e-08, "loss": 0.1498, "step": 20659 }, { "epoch": 2.9172550127082744, "grad_norm": 2.791681621900621, "learning_rate": 3.987534483626987e-08, "loss": 0.127, "step": 20660 }, { "epoch": 2.9173962157582602, "grad_norm": 2.476333091612153, "learning_rate": 3.973945776256094e-08, "loss": 0.1118, "step": 20661 }, { "epoch": 2.917537418808246, "grad_norm": 4.105454122562266, "learning_rate": 3.9603802161005675e-08, "loss": 0.1472, "step": 20662 }, { "epoch": 2.917678621858232, "grad_norm": 3.9702008244447806, "learning_rate": 3.9468378034756006e-08, "loss": 0.1659, "step": 20663 }, { "epoch": 2.917819824908218, "grad_norm": 2.6704580893739545, "learning_rate": 3.933318538695941e-08, "loss": 0.1313, "step": 20664 }, { "epoch": 2.917961027958204, "grad_norm": 3.235185417130201, "learning_rate": 3.919822422075892e-08, "loss": 0.1334, "step": 20665 }, { "epoch": 2.9181022310081897, "grad_norm": 2.8602905402509773, "learning_rate": 3.906349453928981e-08, "loss": 0.1215, "step": 20666 }, { "epoch": 2.9182434340581755, "grad_norm": 2.910720535632143, "learning_rate": 3.892899634568292e-08, "loss": 0.1148, "step": 20667 }, { "epoch": 2.9183846371081614, "grad_norm": 3.275019393866095, "learning_rate": 3.8794729643064635e-08, "loss": 0.1658, "step": 20668 }, { "epoch": 2.9185258401581473, "grad_norm": 2.7619975483667036, "learning_rate": 3.866069443455467e-08, "loss": 0.1083, "step": 20669 }, { "epoch": 2.918667043208133, "grad_norm": 3.665868116531876, "learning_rate": 3.852689072326832e-08, "loss": 0.2055, "step": 20670 }, { "epoch": 2.918808246258119, "grad_norm": 3.6270764814892793, "learning_rate": 3.839331851231421e-08, "loss": 0.1548, "step": 20671 }, { "epoch": 2.918949449308105, "grad_norm": 3.231434037687821, "learning_rate": 3.8259977804797624e-08, "loss": 0.1238, "step": 20672 }, { "epoch": 2.919090652358091, "grad_norm": 3.0116236623376227, "learning_rate": 3.81268686038172e-08, "loss": 0.1157, "step": 20673 }, { "epoch": 2.9192318554080767, "grad_norm": 3.2034714968187816, "learning_rate": 3.799399091246603e-08, "loss": 0.1764, "step": 20674 }, { "epoch": 2.9193730584580626, "grad_norm": 2.1730914696243318, "learning_rate": 3.786134473383385e-08, "loss": 0.0982, "step": 20675 }, { "epoch": 2.9195142615080485, "grad_norm": 2.7394912855776874, "learning_rate": 3.772893007100042e-08, "loss": 0.1043, "step": 20676 }, { "epoch": 2.9196554645580344, "grad_norm": 2.9064309918986333, "learning_rate": 3.759674692704329e-08, "loss": 0.1529, "step": 20677 }, { "epoch": 2.9197966676080203, "grad_norm": 3.1629995701856384, "learning_rate": 3.7464795305036664e-08, "loss": 0.1466, "step": 20678 }, { "epoch": 2.919937870658006, "grad_norm": 3.6583183622244233, "learning_rate": 3.733307520804475e-08, "loss": 0.1486, "step": 20679 }, { "epoch": 2.920079073707992, "grad_norm": 2.7883636266396845, "learning_rate": 3.720158663913065e-08, "loss": 0.1476, "step": 20680 }, { "epoch": 2.920220276757978, "grad_norm": 3.678963589093205, "learning_rate": 3.7070329601348595e-08, "loss": 0.1674, "step": 20681 }, { "epoch": 2.920361479807964, "grad_norm": 3.175279569782027, "learning_rate": 3.693930409774948e-08, "loss": 0.1507, "step": 20682 }, { "epoch": 2.9205026828579497, "grad_norm": 3.5342030900986035, "learning_rate": 3.680851013137754e-08, "loss": 0.1295, "step": 20683 }, { "epoch": 2.9206438859079356, "grad_norm": 2.8709879688393145, "learning_rate": 3.6677947705273664e-08, "loss": 0.1314, "step": 20684 }, { "epoch": 2.9207850889579214, "grad_norm": 3.197324100402615, "learning_rate": 3.6547616822472096e-08, "loss": 0.1309, "step": 20685 }, { "epoch": 2.9209262920079073, "grad_norm": 2.9835732558840347, "learning_rate": 3.641751748600042e-08, "loss": 0.1314, "step": 20686 }, { "epoch": 2.921067495057893, "grad_norm": 2.5876950558789558, "learning_rate": 3.628764969888288e-08, "loss": 0.1126, "step": 20687 }, { "epoch": 2.921208698107879, "grad_norm": 3.0394205686051556, "learning_rate": 3.6158013464137056e-08, "loss": 0.1478, "step": 20688 }, { "epoch": 2.921349901157865, "grad_norm": 3.071610915454667, "learning_rate": 3.60286087847761e-08, "loss": 0.1247, "step": 20689 }, { "epoch": 2.921491104207851, "grad_norm": 3.00361193045679, "learning_rate": 3.589943566380649e-08, "loss": 0.1513, "step": 20690 }, { "epoch": 2.9216323072578367, "grad_norm": 3.0007849268330937, "learning_rate": 3.577049410423139e-08, "loss": 0.1575, "step": 20691 }, { "epoch": 2.9217735103078226, "grad_norm": 3.1187967551886517, "learning_rate": 3.5641784109047286e-08, "loss": 0.1358, "step": 20692 }, { "epoch": 2.9219147133578085, "grad_norm": 3.054698407647, "learning_rate": 3.5513305681244005e-08, "loss": 0.136, "step": 20693 }, { "epoch": 2.9220559164077944, "grad_norm": 2.915835599753541, "learning_rate": 3.538505882380916e-08, "loss": 0.1364, "step": 20694 }, { "epoch": 2.9221971194577803, "grad_norm": 3.1620959998214113, "learning_rate": 3.525704353972037e-08, "loss": 0.1336, "step": 20695 }, { "epoch": 2.922338322507766, "grad_norm": 3.0924892481007427, "learning_rate": 3.5129259831956366e-08, "loss": 0.1444, "step": 20696 }, { "epoch": 2.922479525557752, "grad_norm": 3.470002681050187, "learning_rate": 3.5001707703483654e-08, "loss": 0.1745, "step": 20697 }, { "epoch": 2.922620728607738, "grad_norm": 2.9846891055410776, "learning_rate": 3.4874387157268766e-08, "loss": 0.1446, "step": 20698 }, { "epoch": 2.922761931657724, "grad_norm": 2.7678246979430225, "learning_rate": 3.474729819626821e-08, "loss": 0.1088, "step": 20699 }, { "epoch": 2.9229031347077097, "grad_norm": 3.1710246294727544, "learning_rate": 3.4620440823438517e-08, "loss": 0.1143, "step": 20700 }, { "epoch": 2.9230443377576956, "grad_norm": 3.6941462057199694, "learning_rate": 3.449381504172511e-08, "loss": 0.1398, "step": 20701 }, { "epoch": 2.9231855408076814, "grad_norm": 3.855436662236348, "learning_rate": 3.436742085407119e-08, "loss": 0.1729, "step": 20702 }, { "epoch": 2.9233267438576673, "grad_norm": 3.4460999537157244, "learning_rate": 3.42412582634144e-08, "loss": 0.1375, "step": 20703 }, { "epoch": 2.923467946907653, "grad_norm": 2.988793469569093, "learning_rate": 3.411532727268796e-08, "loss": 0.1449, "step": 20704 }, { "epoch": 2.923609149957639, "grad_norm": 2.565248733656489, "learning_rate": 3.39896278848173e-08, "loss": 0.1066, "step": 20705 }, { "epoch": 2.923750353007625, "grad_norm": 3.3259557448948174, "learning_rate": 3.386416010272342e-08, "loss": 0.1599, "step": 20706 }, { "epoch": 2.923891556057611, "grad_norm": 3.325760222419784, "learning_rate": 3.373892392932177e-08, "loss": 0.158, "step": 20707 }, { "epoch": 2.9240327591075967, "grad_norm": 3.4260832924187716, "learning_rate": 3.361391936752445e-08, "loss": 0.1645, "step": 20708 }, { "epoch": 2.9241739621575826, "grad_norm": 3.041494824850197, "learning_rate": 3.348914642023471e-08, "loss": 0.1202, "step": 20709 }, { "epoch": 2.9243151652075685, "grad_norm": 3.5002303042328693, "learning_rate": 3.336460509035355e-08, "loss": 0.1636, "step": 20710 }, { "epoch": 2.9244563682575544, "grad_norm": 2.6280002463912866, "learning_rate": 3.324029538077422e-08, "loss": 0.1082, "step": 20711 }, { "epoch": 2.9245975713075403, "grad_norm": 2.8471540734428586, "learning_rate": 3.311621729438552e-08, "loss": 0.1349, "step": 20712 }, { "epoch": 2.924738774357526, "grad_norm": 2.836371985922127, "learning_rate": 3.299237083407292e-08, "loss": 0.1094, "step": 20713 }, { "epoch": 2.924879977407512, "grad_norm": 2.7592170590034395, "learning_rate": 3.2868756002712997e-08, "loss": 0.1278, "step": 20714 }, { "epoch": 2.925021180457498, "grad_norm": 3.4417930735856674, "learning_rate": 3.274537280317791e-08, "loss": 0.1548, "step": 20715 }, { "epoch": 2.925162383507484, "grad_norm": 2.7308953201801556, "learning_rate": 3.262222123833647e-08, "loss": 0.1038, "step": 20716 }, { "epoch": 2.9253035865574697, "grad_norm": 2.7316602019666765, "learning_rate": 3.249930131105083e-08, "loss": 0.1113, "step": 20717 }, { "epoch": 2.9254447896074556, "grad_norm": 2.214370781847232, "learning_rate": 3.2376613024175384e-08, "loss": 0.0972, "step": 20718 }, { "epoch": 2.9255859926574415, "grad_norm": 3.68726946261553, "learning_rate": 3.2254156380562284e-08, "loss": 0.1657, "step": 20719 }, { "epoch": 2.9257271957074273, "grad_norm": 3.206053138256326, "learning_rate": 3.2131931383059256e-08, "loss": 0.1352, "step": 20720 }, { "epoch": 2.9258683987574132, "grad_norm": 2.7041246542698967, "learning_rate": 3.200993803450514e-08, "loss": 0.1212, "step": 20721 }, { "epoch": 2.926009601807399, "grad_norm": 4.0396321016783325, "learning_rate": 3.1888176337734334e-08, "loss": 0.1911, "step": 20722 }, { "epoch": 2.926150804857385, "grad_norm": 3.6614859253957563, "learning_rate": 3.176664629557902e-08, "loss": 0.1632, "step": 20723 }, { "epoch": 2.926292007907371, "grad_norm": 3.2530270875938125, "learning_rate": 3.1645347910860266e-08, "loss": 0.1532, "step": 20724 }, { "epoch": 2.9264332109573568, "grad_norm": 3.9304211142910948, "learning_rate": 3.152428118639916e-08, "loss": 0.1747, "step": 20725 }, { "epoch": 2.9265744140073426, "grad_norm": 3.9863289913120044, "learning_rate": 3.140344612500901e-08, "loss": 0.1595, "step": 20726 }, { "epoch": 2.9267156170573285, "grad_norm": 3.4857989278888613, "learning_rate": 3.1282842729497556e-08, "loss": 0.1155, "step": 20727 }, { "epoch": 2.9268568201073144, "grad_norm": 2.8658592154206417, "learning_rate": 3.1162471002668113e-08, "loss": 0.1171, "step": 20728 }, { "epoch": 2.9269980231573003, "grad_norm": 4.260285034632423, "learning_rate": 3.1042330947316235e-08, "loss": 0.2423, "step": 20729 }, { "epoch": 2.927139226207286, "grad_norm": 3.131936961444449, "learning_rate": 3.092242256623634e-08, "loss": 0.1283, "step": 20730 }, { "epoch": 2.927280429257272, "grad_norm": 3.5823427070378586, "learning_rate": 3.080274586221399e-08, "loss": 0.1457, "step": 20731 }, { "epoch": 2.927421632307258, "grad_norm": 3.048283654211916, "learning_rate": 3.0683300838030285e-08, "loss": 0.114, "step": 20732 }, { "epoch": 2.927562835357244, "grad_norm": 3.5571135335614965, "learning_rate": 3.056408749646189e-08, "loss": 0.1377, "step": 20733 }, { "epoch": 2.9277040384072297, "grad_norm": 3.338133328715287, "learning_rate": 3.044510584027771e-08, "loss": 0.1666, "step": 20734 }, { "epoch": 2.9278452414572156, "grad_norm": 4.745238509575811, "learning_rate": 3.032635587224442e-08, "loss": 0.1735, "step": 20735 }, { "epoch": 2.9279864445072015, "grad_norm": 3.770963985580543, "learning_rate": 3.02078375951198e-08, "loss": 0.1478, "step": 20736 }, { "epoch": 2.9281276475571874, "grad_norm": 3.037137935565298, "learning_rate": 3.008955101166056e-08, "loss": 0.1166, "step": 20737 }, { "epoch": 2.9282688506071732, "grad_norm": 2.5014160603580553, "learning_rate": 2.997149612461447e-08, "loss": 0.0992, "step": 20738 }, { "epoch": 2.928410053657159, "grad_norm": 3.9436648003834134, "learning_rate": 2.985367293672492e-08, "loss": 0.1892, "step": 20739 }, { "epoch": 2.928551256707145, "grad_norm": 4.399151555603122, "learning_rate": 2.9736081450730813e-08, "loss": 0.2066, "step": 20740 }, { "epoch": 2.928692459757131, "grad_norm": 3.2549887831564885, "learning_rate": 2.9618721669363302e-08, "loss": 0.1314, "step": 20741 }, { "epoch": 2.9288336628071168, "grad_norm": 2.9503089534110805, "learning_rate": 2.950159359535132e-08, "loss": 0.1133, "step": 20742 }, { "epoch": 2.9289748658571026, "grad_norm": 2.9893571070488236, "learning_rate": 2.9384697231416016e-08, "loss": 0.1379, "step": 20743 }, { "epoch": 2.9291160689070885, "grad_norm": 3.9061673127968777, "learning_rate": 2.9268032580275218e-08, "loss": 0.175, "step": 20744 }, { "epoch": 2.9292572719570744, "grad_norm": 2.9599605293832982, "learning_rate": 2.9151599644638984e-08, "loss": 0.1169, "step": 20745 }, { "epoch": 2.9293984750070603, "grad_norm": 2.8603940390651803, "learning_rate": 2.9035398427212925e-08, "loss": 0.1288, "step": 20746 }, { "epoch": 2.929539678057046, "grad_norm": 3.8325177411621754, "learning_rate": 2.8919428930698213e-08, "loss": 0.1478, "step": 20747 }, { "epoch": 2.929680881107032, "grad_norm": 3.0140631099091215, "learning_rate": 2.880369115778936e-08, "loss": 0.1334, "step": 20748 }, { "epoch": 2.929822084157018, "grad_norm": 2.940696012982415, "learning_rate": 2.8688185111177546e-08, "loss": 0.1175, "step": 20749 }, { "epoch": 2.929963287207004, "grad_norm": 3.149891532154961, "learning_rate": 2.8572910793546183e-08, "loss": 0.1458, "step": 20750 }, { "epoch": 2.9301044902569897, "grad_norm": 3.4771600330852497, "learning_rate": 2.8457868207573126e-08, "loss": 0.1097, "step": 20751 }, { "epoch": 2.9302456933069756, "grad_norm": 3.4219733941442403, "learning_rate": 2.8343057355932902e-08, "loss": 0.1362, "step": 20752 }, { "epoch": 2.9303868963569615, "grad_norm": 3.518276664395193, "learning_rate": 2.8228478241294487e-08, "loss": 0.1813, "step": 20753 }, { "epoch": 2.9305280994069474, "grad_norm": 3.2968734481728816, "learning_rate": 2.8114130866319088e-08, "loss": 0.1406, "step": 20754 }, { "epoch": 2.9306693024569332, "grad_norm": 2.463559615728865, "learning_rate": 2.800001523366458e-08, "loss": 0.1286, "step": 20755 }, { "epoch": 2.930810505506919, "grad_norm": 3.3645537975815065, "learning_rate": 2.788613134598328e-08, "loss": 0.1299, "step": 20756 }, { "epoch": 2.930951708556905, "grad_norm": 3.7870126863352565, "learning_rate": 2.7772479205921964e-08, "loss": 0.1905, "step": 20757 }, { "epoch": 2.931092911606891, "grad_norm": 2.7123416693710003, "learning_rate": 2.7659058816121855e-08, "loss": 0.1432, "step": 20758 }, { "epoch": 2.9312341146568768, "grad_norm": 2.8392133576822967, "learning_rate": 2.7545870179217503e-08, "loss": 0.1418, "step": 20759 }, { "epoch": 2.9313753177068627, "grad_norm": 3.318621013788601, "learning_rate": 2.7432913297841256e-08, "loss": 0.1475, "step": 20760 }, { "epoch": 2.9315165207568485, "grad_norm": 3.605680501703827, "learning_rate": 2.732018817461657e-08, "loss": 0.192, "step": 20761 }, { "epoch": 2.9316577238068344, "grad_norm": 3.172437720371396, "learning_rate": 2.7207694812164677e-08, "loss": 0.1274, "step": 20762 }, { "epoch": 2.9317989268568203, "grad_norm": 3.6957012736735595, "learning_rate": 2.7095433213097933e-08, "loss": 0.1262, "step": 20763 }, { "epoch": 2.931940129906806, "grad_norm": 3.5779722452478215, "learning_rate": 2.6983403380026474e-08, "loss": 0.1168, "step": 20764 }, { "epoch": 2.932081332956792, "grad_norm": 3.3382984121007704, "learning_rate": 2.6871605315554882e-08, "loss": 0.1689, "step": 20765 }, { "epoch": 2.932222536006778, "grad_norm": 2.8566557995709085, "learning_rate": 2.676003902227886e-08, "loss": 0.1383, "step": 20766 }, { "epoch": 2.932363739056764, "grad_norm": 3.219517369005276, "learning_rate": 2.6648704502792998e-08, "loss": 0.1364, "step": 20767 }, { "epoch": 2.9325049421067497, "grad_norm": 5.2435036441773715, "learning_rate": 2.6537601759684118e-08, "loss": 0.2208, "step": 20768 }, { "epoch": 2.9326461451567356, "grad_norm": 3.254076522943835, "learning_rate": 2.6426730795534594e-08, "loss": 0.1591, "step": 20769 }, { "epoch": 2.9327873482067215, "grad_norm": 2.616383097656784, "learning_rate": 2.6316091612920146e-08, "loss": 0.1197, "step": 20770 }, { "epoch": 2.9329285512567074, "grad_norm": 3.4405313979722667, "learning_rate": 2.6205684214412052e-08, "loss": 0.158, "step": 20771 }, { "epoch": 2.9330697543066933, "grad_norm": 2.7847378574641675, "learning_rate": 2.6095508602577147e-08, "loss": 0.0951, "step": 20772 }, { "epoch": 2.933210957356679, "grad_norm": 2.710583778001825, "learning_rate": 2.5985564779974492e-08, "loss": 0.1454, "step": 20773 }, { "epoch": 2.933352160406665, "grad_norm": 3.188162854134947, "learning_rate": 2.5875852749160936e-08, "loss": 0.1725, "step": 20774 }, { "epoch": 2.933493363456651, "grad_norm": 3.1134741308208196, "learning_rate": 2.5766372512684436e-08, "loss": 0.1282, "step": 20775 }, { "epoch": 2.933634566506637, "grad_norm": 3.0486279150946567, "learning_rate": 2.5657124073089622e-08, "loss": 0.1303, "step": 20776 }, { "epoch": 2.9337757695566227, "grad_norm": 2.984271559681901, "learning_rate": 2.554810743291558e-08, "loss": 0.124, "step": 20777 }, { "epoch": 2.9339169726066086, "grad_norm": 2.591107220173452, "learning_rate": 2.5439322594696946e-08, "loss": 0.1023, "step": 20778 }, { "epoch": 2.9340581756565944, "grad_norm": 2.7031621622279283, "learning_rate": 2.5330769560959477e-08, "loss": 0.1113, "step": 20779 }, { "epoch": 2.9341993787065803, "grad_norm": 3.952708399348134, "learning_rate": 2.5222448334227822e-08, "loss": 0.1727, "step": 20780 }, { "epoch": 2.934340581756566, "grad_norm": 3.926308624944096, "learning_rate": 2.5114358917018857e-08, "loss": 0.1761, "step": 20781 }, { "epoch": 2.934481784806552, "grad_norm": 3.822256228825777, "learning_rate": 2.50065013118439e-08, "loss": 0.1519, "step": 20782 }, { "epoch": 2.934622987856538, "grad_norm": 3.3908326680839, "learning_rate": 2.4898875521209842e-08, "loss": 0.1205, "step": 20783 }, { "epoch": 2.934764190906524, "grad_norm": 3.1866167394019715, "learning_rate": 2.4791481547619123e-08, "loss": 0.1505, "step": 20784 }, { "epoch": 2.9349053939565097, "grad_norm": 2.5914952553931925, "learning_rate": 2.4684319393565303e-08, "loss": 0.0917, "step": 20785 }, { "epoch": 2.9350465970064956, "grad_norm": 2.772943975611951, "learning_rate": 2.4577389061539724e-08, "loss": 0.1345, "step": 20786 }, { "epoch": 2.935187800056481, "grad_norm": 3.294901325673289, "learning_rate": 2.4470690554028175e-08, "loss": 0.1438, "step": 20787 }, { "epoch": 2.935329003106467, "grad_norm": 3.6826856904299894, "learning_rate": 2.4364223873509785e-08, "loss": 0.1319, "step": 20788 }, { "epoch": 2.935470206156453, "grad_norm": 3.178624285318878, "learning_rate": 2.425798902245924e-08, "loss": 0.139, "step": 20789 }, { "epoch": 2.9356114092064387, "grad_norm": 2.268061069568154, "learning_rate": 2.4151986003343453e-08, "loss": 0.1087, "step": 20790 }, { "epoch": 2.9357526122564246, "grad_norm": 2.3688306421038874, "learning_rate": 2.4046214818628234e-08, "loss": 0.0939, "step": 20791 }, { "epoch": 2.9358938153064105, "grad_norm": 2.891617853686542, "learning_rate": 2.39406754707705e-08, "loss": 0.1202, "step": 20792 }, { "epoch": 2.9360350183563964, "grad_norm": 2.5410854851905125, "learning_rate": 2.3835367962222744e-08, "loss": 0.1177, "step": 20793 }, { "epoch": 2.9361762214063822, "grad_norm": 3.217081686721352, "learning_rate": 2.3730292295433e-08, "loss": 0.1476, "step": 20794 }, { "epoch": 2.936317424456368, "grad_norm": 3.928461664318603, "learning_rate": 2.362544847284265e-08, "loss": 0.14, "step": 20795 }, { "epoch": 2.936458627506354, "grad_norm": 3.857188635862983, "learning_rate": 2.352083649688863e-08, "loss": 0.124, "step": 20796 }, { "epoch": 2.93659983055634, "grad_norm": 3.582835451304525, "learning_rate": 2.3416456370002337e-08, "loss": 0.1484, "step": 20797 }, { "epoch": 2.9367410336063258, "grad_norm": 2.758273603749329, "learning_rate": 2.3312308094607382e-08, "loss": 0.1182, "step": 20798 }, { "epoch": 2.9368822366563117, "grad_norm": 3.641176472208826, "learning_rate": 2.3208391673127383e-08, "loss": 0.1748, "step": 20799 }, { "epoch": 2.9370234397062975, "grad_norm": 3.536891627822034, "learning_rate": 2.3104707107974857e-08, "loss": 0.1521, "step": 20800 }, { "epoch": 2.9371646427562834, "grad_norm": 3.444384909863173, "learning_rate": 2.30012544015612e-08, "loss": 0.164, "step": 20801 }, { "epoch": 2.9373058458062693, "grad_norm": 3.181454131820245, "learning_rate": 2.2898033556288946e-08, "loss": 0.1446, "step": 20802 }, { "epoch": 2.937447048856255, "grad_norm": 2.9297525219099816, "learning_rate": 2.279504457455728e-08, "loss": 0.1714, "step": 20803 }, { "epoch": 2.937588251906241, "grad_norm": 3.0786078644924983, "learning_rate": 2.2692287458760953e-08, "loss": 0.1504, "step": 20804 }, { "epoch": 2.937729454956227, "grad_norm": 2.8226802411412355, "learning_rate": 2.258976221128695e-08, "loss": 0.1028, "step": 20805 }, { "epoch": 2.937870658006213, "grad_norm": 3.1255491047069297, "learning_rate": 2.248746883451669e-08, "loss": 0.1193, "step": 20806 }, { "epoch": 2.9380118610561987, "grad_norm": 3.3759019348553623, "learning_rate": 2.238540733082939e-08, "loss": 0.1583, "step": 20807 }, { "epoch": 2.9381530641061846, "grad_norm": 3.1394799695288085, "learning_rate": 2.2283577702596482e-08, "loss": 0.15, "step": 20808 }, { "epoch": 2.9382942671561705, "grad_norm": 3.080033697115591, "learning_rate": 2.2181979952183852e-08, "loss": 0.1319, "step": 20809 }, { "epoch": 2.9384354702061564, "grad_norm": 2.646659003409431, "learning_rate": 2.2080614081954054e-08, "loss": 0.1124, "step": 20810 }, { "epoch": 2.9385766732561422, "grad_norm": 2.7532735925940774, "learning_rate": 2.1979480094260763e-08, "loss": 0.097, "step": 20811 }, { "epoch": 2.938717876306128, "grad_norm": 2.8327961331223546, "learning_rate": 2.187857799145432e-08, "loss": 0.1046, "step": 20812 }, { "epoch": 2.938859079356114, "grad_norm": 3.4852017056970164, "learning_rate": 2.1777907775881735e-08, "loss": 0.1559, "step": 20813 }, { "epoch": 2.9390002824061, "grad_norm": 3.096529974575009, "learning_rate": 2.167746944988114e-08, "loss": 0.1454, "step": 20814 }, { "epoch": 2.939141485456086, "grad_norm": 3.292291730196601, "learning_rate": 2.1577263015786222e-08, "loss": 0.1325, "step": 20815 }, { "epoch": 2.9392826885060717, "grad_norm": 3.7611463170135586, "learning_rate": 2.1477288475926227e-08, "loss": 0.1872, "step": 20816 }, { "epoch": 2.9394238915560575, "grad_norm": 3.589243375904884, "learning_rate": 2.137754583262486e-08, "loss": 0.1394, "step": 20817 }, { "epoch": 2.9395650946060434, "grad_norm": 3.0733195817462162, "learning_rate": 2.1278035088200255e-08, "loss": 0.1301, "step": 20818 }, { "epoch": 2.9397062976560293, "grad_norm": 3.9807362779359607, "learning_rate": 2.1178756244965014e-08, "loss": 0.2042, "step": 20819 }, { "epoch": 2.939847500706015, "grad_norm": 3.1575601435676433, "learning_rate": 2.1079709305226183e-08, "loss": 0.1193, "step": 20820 }, { "epoch": 2.939988703756001, "grad_norm": 3.321944566209505, "learning_rate": 2.0980894271284136e-08, "loss": 0.1326, "step": 20821 }, { "epoch": 2.940129906805987, "grad_norm": 2.7340081362552136, "learning_rate": 2.088231114543704e-08, "loss": 0.1181, "step": 20822 }, { "epoch": 2.940271109855973, "grad_norm": 3.0467824097379803, "learning_rate": 2.0783959929975283e-08, "loss": 0.1418, "step": 20823 }, { "epoch": 2.9404123129059587, "grad_norm": 3.258151619280257, "learning_rate": 2.0685840627184817e-08, "loss": 0.128, "step": 20824 }, { "epoch": 2.9405535159559446, "grad_norm": 3.3751396773628173, "learning_rate": 2.0587953239344926e-08, "loss": 0.1555, "step": 20825 }, { "epoch": 2.9406947190059305, "grad_norm": 3.7667517781513, "learning_rate": 2.049029776873268e-08, "loss": 0.2015, "step": 20826 }, { "epoch": 2.9408359220559164, "grad_norm": 2.9752751070893666, "learning_rate": 2.0392874217615154e-08, "loss": 0.095, "step": 20827 }, { "epoch": 2.9409771251059023, "grad_norm": 3.5186717591036207, "learning_rate": 2.0295682588257205e-08, "loss": 0.1469, "step": 20828 }, { "epoch": 2.941118328155888, "grad_norm": 3.0303472849423256, "learning_rate": 2.0198722882918132e-08, "loss": 0.1354, "step": 20829 }, { "epoch": 2.941259531205874, "grad_norm": 2.9106499232948955, "learning_rate": 2.010199510385058e-08, "loss": 0.143, "step": 20830 }, { "epoch": 2.94140073425586, "grad_norm": 3.2687039673298877, "learning_rate": 2.000549925330275e-08, "loss": 0.1453, "step": 20831 }, { "epoch": 2.941541937305846, "grad_norm": 3.198641735710895, "learning_rate": 1.9909235333517296e-08, "loss": 0.1204, "step": 20832 }, { "epoch": 2.9416831403558317, "grad_norm": 2.797373071666604, "learning_rate": 1.9813203346730203e-08, "loss": 0.1237, "step": 20833 }, { "epoch": 2.9418243434058176, "grad_norm": 3.769523009343869, "learning_rate": 1.9717403295175242e-08, "loss": 0.1657, "step": 20834 }, { "epoch": 2.9419655464558034, "grad_norm": 2.5729950204877805, "learning_rate": 1.9621835181077296e-08, "loss": 0.1051, "step": 20835 }, { "epoch": 2.9421067495057893, "grad_norm": 4.0096278710561295, "learning_rate": 1.952649900665793e-08, "loss": 0.191, "step": 20836 }, { "epoch": 2.942247952555775, "grad_norm": 3.3496934815222947, "learning_rate": 1.9431394774132028e-08, "loss": 0.1363, "step": 20837 }, { "epoch": 2.942389155605761, "grad_norm": 3.612548586756759, "learning_rate": 1.9336522485710053e-08, "loss": 0.1419, "step": 20838 }, { "epoch": 2.942530358655747, "grad_norm": 3.447937164167159, "learning_rate": 1.9241882143596903e-08, "loss": 0.1409, "step": 20839 }, { "epoch": 2.942671561705733, "grad_norm": 3.4358054521185353, "learning_rate": 1.914747374999304e-08, "loss": 0.1601, "step": 20840 }, { "epoch": 2.9428127647557187, "grad_norm": 2.4434554714948225, "learning_rate": 1.9053297307091157e-08, "loss": 0.0956, "step": 20841 }, { "epoch": 2.9429539678057046, "grad_norm": 2.313203894329447, "learning_rate": 1.89593528170795e-08, "loss": 0.0836, "step": 20842 }, { "epoch": 2.9430951708556905, "grad_norm": 3.7312536130921408, "learning_rate": 1.8865640282142995e-08, "loss": 0.1751, "step": 20843 }, { "epoch": 2.9432363739056764, "grad_norm": 2.916352656508384, "learning_rate": 1.877215970445767e-08, "loss": 0.1127, "step": 20844 }, { "epoch": 2.9433775769556623, "grad_norm": 2.7478853134814187, "learning_rate": 1.8678911086197348e-08, "loss": 0.1447, "step": 20845 }, { "epoch": 2.943518780005648, "grad_norm": 3.3637710555863163, "learning_rate": 1.8585894429528073e-08, "loss": 0.1578, "step": 20846 }, { "epoch": 2.943659983055634, "grad_norm": 3.2664600798477728, "learning_rate": 1.8493109736612558e-08, "loss": 0.1806, "step": 20847 }, { "epoch": 2.94380118610562, "grad_norm": 3.137013713899287, "learning_rate": 1.8400557009605746e-08, "loss": 0.113, "step": 20848 }, { "epoch": 2.943942389155606, "grad_norm": 2.9456304613209827, "learning_rate": 1.830823625066036e-08, "loss": 0.1362, "step": 20849 }, { "epoch": 2.9440835922055917, "grad_norm": 2.506184917764803, "learning_rate": 1.821614746191913e-08, "loss": 0.0869, "step": 20850 }, { "epoch": 2.9442247952555776, "grad_norm": 3.5990028623899573, "learning_rate": 1.81242906455259e-08, "loss": 0.1413, "step": 20851 }, { "epoch": 2.9443659983055634, "grad_norm": 2.7972524680367616, "learning_rate": 1.8032665803612294e-08, "loss": 0.1146, "step": 20852 }, { "epoch": 2.9445072013555493, "grad_norm": 2.315812344467535, "learning_rate": 1.794127293830883e-08, "loss": 0.1026, "step": 20853 }, { "epoch": 2.944648404405535, "grad_norm": 3.1038507930266297, "learning_rate": 1.7850112051738255e-08, "loss": 0.1395, "step": 20854 }, { "epoch": 2.944789607455521, "grad_norm": 3.4952038648883867, "learning_rate": 1.7759183146021098e-08, "loss": 0.1402, "step": 20855 }, { "epoch": 2.944930810505507, "grad_norm": 3.279000003336461, "learning_rate": 1.7668486223269e-08, "loss": 0.1333, "step": 20856 }, { "epoch": 2.945072013555493, "grad_norm": 3.084904745799255, "learning_rate": 1.7578021285590274e-08, "loss": 0.1335, "step": 20857 }, { "epoch": 2.9452132166054787, "grad_norm": 2.992898681457165, "learning_rate": 1.7487788335087686e-08, "loss": 0.1547, "step": 20858 }, { "epoch": 2.9453544196554646, "grad_norm": 2.4268520297089453, "learning_rate": 1.7397787373858442e-08, "loss": 0.1021, "step": 20859 }, { "epoch": 2.9454956227054505, "grad_norm": 3.921965799966041, "learning_rate": 1.7308018403991988e-08, "loss": 0.1478, "step": 20860 }, { "epoch": 2.9456368257554364, "grad_norm": 3.4429000397224665, "learning_rate": 1.721848142757665e-08, "loss": 0.1176, "step": 20861 }, { "epoch": 2.9457780288054223, "grad_norm": 2.9609772043201783, "learning_rate": 1.7129176446692986e-08, "loss": 0.1011, "step": 20862 }, { "epoch": 2.945919231855408, "grad_norm": 3.2668807164191627, "learning_rate": 1.704010346341489e-08, "loss": 0.1696, "step": 20863 }, { "epoch": 2.946060434905394, "grad_norm": 3.3291415592027853, "learning_rate": 1.6951262479815155e-08, "loss": 0.1531, "step": 20864 }, { "epoch": 2.94620163795538, "grad_norm": 3.80077216276676, "learning_rate": 1.686265349795546e-08, "loss": 0.1613, "step": 20865 }, { "epoch": 2.946342841005366, "grad_norm": 2.9402486294825683, "learning_rate": 1.6774276519896383e-08, "loss": 0.1373, "step": 20866 }, { "epoch": 2.9464840440553517, "grad_norm": 3.6470327679308916, "learning_rate": 1.668613154769183e-08, "loss": 0.1495, "step": 20867 }, { "epoch": 2.9466252471053376, "grad_norm": 2.5987681240669, "learning_rate": 1.6598218583390168e-08, "loss": 0.1308, "step": 20868 }, { "epoch": 2.9467664501553235, "grad_norm": 3.5234425628350166, "learning_rate": 1.6510537629034208e-08, "loss": 0.1945, "step": 20869 }, { "epoch": 2.9469076532053093, "grad_norm": 3.1796722069975774, "learning_rate": 1.6423088686662313e-08, "loss": 0.1362, "step": 20870 }, { "epoch": 2.9470488562552952, "grad_norm": 4.027731174556413, "learning_rate": 1.63358717583062e-08, "loss": 0.1629, "step": 20871 }, { "epoch": 2.947190059305281, "grad_norm": 3.8269986872090604, "learning_rate": 1.624888684599202e-08, "loss": 0.1549, "step": 20872 }, { "epoch": 2.947331262355267, "grad_norm": 3.6234108516456875, "learning_rate": 1.6162133951742596e-08, "loss": 0.161, "step": 20873 }, { "epoch": 2.947472465405253, "grad_norm": 2.9202614128479016, "learning_rate": 1.6075613077574103e-08, "loss": 0.1285, "step": 20874 }, { "epoch": 2.9476136684552388, "grad_norm": 3.284953817062396, "learning_rate": 1.598932422549604e-08, "loss": 0.1598, "step": 20875 }, { "epoch": 2.9477548715052246, "grad_norm": 3.9186934076941258, "learning_rate": 1.5903267397514576e-08, "loss": 0.1668, "step": 20876 }, { "epoch": 2.9478960745552105, "grad_norm": 3.1555250052329105, "learning_rate": 1.5817442595629228e-08, "loss": 0.1319, "step": 20877 }, { "epoch": 2.9480372776051964, "grad_norm": 2.501259278964831, "learning_rate": 1.5731849821833955e-08, "loss": 0.1232, "step": 20878 }, { "epoch": 2.9481784806551823, "grad_norm": 3.4120601913602044, "learning_rate": 1.5646489078119387e-08, "loss": 0.1432, "step": 20879 }, { "epoch": 2.948319683705168, "grad_norm": 3.424634254147939, "learning_rate": 1.556136036646838e-08, "loss": 0.1313, "step": 20880 }, { "epoch": 2.948460886755154, "grad_norm": 2.8009613495682624, "learning_rate": 1.5476463688859356e-08, "loss": 0.1152, "step": 20881 }, { "epoch": 2.94860208980514, "grad_norm": 3.020895799748451, "learning_rate": 1.5391799047266287e-08, "loss": 0.1615, "step": 20882 }, { "epoch": 2.948743292855126, "grad_norm": 3.439009135289018, "learning_rate": 1.530736644365427e-08, "loss": 0.1548, "step": 20883 }, { "epoch": 2.9488844959051117, "grad_norm": 3.7007266992295005, "learning_rate": 1.522316587998729e-08, "loss": 0.1213, "step": 20884 }, { "epoch": 2.9490256989550976, "grad_norm": 4.13813436384253, "learning_rate": 1.5139197358222668e-08, "loss": 0.2245, "step": 20885 }, { "epoch": 2.9491669020050835, "grad_norm": 3.057575122095157, "learning_rate": 1.5055460880311068e-08, "loss": 0.1597, "step": 20886 }, { "epoch": 2.9493081050550694, "grad_norm": 3.4989824142155, "learning_rate": 1.49719564481976e-08, "loss": 0.1324, "step": 20887 }, { "epoch": 2.9494493081050552, "grad_norm": 3.855893413690058, "learning_rate": 1.4888684063824044e-08, "loss": 0.1419, "step": 20888 }, { "epoch": 2.9495905111550407, "grad_norm": 3.0457680316268414, "learning_rate": 1.4805643729124408e-08, "loss": 0.1368, "step": 20889 }, { "epoch": 2.9497317142050266, "grad_norm": 2.435071140875976, "learning_rate": 1.4722835446030481e-08, "loss": 0.1057, "step": 20890 }, { "epoch": 2.9498729172550124, "grad_norm": 3.185032546782885, "learning_rate": 1.464025921646406e-08, "loss": 0.1293, "step": 20891 }, { "epoch": 2.9500141203049983, "grad_norm": 3.306850261169892, "learning_rate": 1.4557915042346937e-08, "loss": 0.1336, "step": 20892 }, { "epoch": 2.950155323354984, "grad_norm": 3.3474712984701878, "learning_rate": 1.447580292559092e-08, "loss": 0.1735, "step": 20893 }, { "epoch": 2.95029652640497, "grad_norm": 3.6664451191902647, "learning_rate": 1.4393922868105591e-08, "loss": 0.1462, "step": 20894 }, { "epoch": 2.950437729454956, "grad_norm": 3.2735761219995143, "learning_rate": 1.4312274871792763e-08, "loss": 0.1255, "step": 20895 }, { "epoch": 2.950578932504942, "grad_norm": 3.1394314327220463, "learning_rate": 1.4230858938549808e-08, "loss": 0.1374, "step": 20896 }, { "epoch": 2.9507201355549277, "grad_norm": 2.9706231605602422, "learning_rate": 1.4149675070269653e-08, "loss": 0.164, "step": 20897 }, { "epoch": 2.9508613386049136, "grad_norm": 2.917511909952137, "learning_rate": 1.4068723268837459e-08, "loss": 0.1377, "step": 20898 }, { "epoch": 2.9510025416548995, "grad_norm": 3.4381218576849113, "learning_rate": 1.3988003536137273e-08, "loss": 0.1484, "step": 20899 }, { "epoch": 2.9511437447048854, "grad_norm": 3.2422735335112516, "learning_rate": 1.3907515874042044e-08, "loss": 0.1288, "step": 20900 }, { "epoch": 2.9512849477548713, "grad_norm": 2.7846083691438324, "learning_rate": 1.3827260284423604e-08, "loss": 0.1007, "step": 20901 }, { "epoch": 2.951426150804857, "grad_norm": 2.6141712434729447, "learning_rate": 1.3747236769147133e-08, "loss": 0.118, "step": 20902 }, { "epoch": 2.951567353854843, "grad_norm": 3.192281116888545, "learning_rate": 1.366744533007225e-08, "loss": 0.1359, "step": 20903 }, { "epoch": 2.951708556904829, "grad_norm": 2.5669603554333316, "learning_rate": 1.3587885969051917e-08, "loss": 0.1363, "step": 20904 }, { "epoch": 2.951849759954815, "grad_norm": 4.427866181204975, "learning_rate": 1.350855868793799e-08, "loss": 0.1925, "step": 20905 }, { "epoch": 2.9519909630048007, "grad_norm": 2.565464631903426, "learning_rate": 1.3429463488571216e-08, "loss": 0.1145, "step": 20906 }, { "epoch": 2.9521321660547866, "grad_norm": 3.1719569338103115, "learning_rate": 1.3350600372791234e-08, "loss": 0.1474, "step": 20907 }, { "epoch": 2.9522733691047724, "grad_norm": 3.0280094087104543, "learning_rate": 1.3271969342431023e-08, "loss": 0.1519, "step": 20908 }, { "epoch": 2.9524145721547583, "grad_norm": 3.539706646074939, "learning_rate": 1.3193570399316902e-08, "loss": 0.2107, "step": 20909 }, { "epoch": 2.952555775204744, "grad_norm": 3.575091544901411, "learning_rate": 1.3115403545270744e-08, "loss": 0.1377, "step": 20910 }, { "epoch": 2.95269697825473, "grad_norm": 3.4694779669667373, "learning_rate": 1.3037468782109986e-08, "loss": 0.1592, "step": 20911 }, { "epoch": 2.952838181304716, "grad_norm": 3.324585501213045, "learning_rate": 1.29597661116454e-08, "loss": 0.1723, "step": 20912 }, { "epoch": 2.952979384354702, "grad_norm": 3.7274612407735046, "learning_rate": 1.288229553568221e-08, "loss": 0.199, "step": 20913 }, { "epoch": 2.9531205874046877, "grad_norm": 3.386342526587411, "learning_rate": 1.2805057056022307e-08, "loss": 0.1356, "step": 20914 }, { "epoch": 2.9532617904546736, "grad_norm": 2.859239229103449, "learning_rate": 1.2728050674459814e-08, "loss": 0.1276, "step": 20915 }, { "epoch": 2.9534029935046595, "grad_norm": 3.382259907885206, "learning_rate": 1.2651276392783297e-08, "loss": 0.1651, "step": 20916 }, { "epoch": 2.9535441965546454, "grad_norm": 3.2308894305501044, "learning_rate": 1.2574734212779105e-08, "loss": 0.124, "step": 20917 }, { "epoch": 2.9536853996046313, "grad_norm": 3.50807596465905, "learning_rate": 1.2498424136223597e-08, "loss": 0.1603, "step": 20918 }, { "epoch": 2.953826602654617, "grad_norm": 3.8764045845226427, "learning_rate": 1.2422346164892018e-08, "loss": 0.1738, "step": 20919 }, { "epoch": 2.953967805704603, "grad_norm": 2.967259658853862, "learning_rate": 1.2346500300551844e-08, "loss": 0.123, "step": 20920 }, { "epoch": 2.954109008754589, "grad_norm": 2.7684775757952464, "learning_rate": 1.227088654496611e-08, "loss": 0.1197, "step": 20921 }, { "epoch": 2.954250211804575, "grad_norm": 4.133150322753982, "learning_rate": 1.2195504899890076e-08, "loss": 0.1811, "step": 20922 }, { "epoch": 2.9543914148545607, "grad_norm": 3.1322301932909955, "learning_rate": 1.2120355367079007e-08, "loss": 0.1194, "step": 20923 }, { "epoch": 2.9545326179045466, "grad_norm": 3.065874403059459, "learning_rate": 1.2045437948275952e-08, "loss": 0.1825, "step": 20924 }, { "epoch": 2.9546738209545325, "grad_norm": 3.4732700956114315, "learning_rate": 1.197075264522396e-08, "loss": 0.1311, "step": 20925 }, { "epoch": 2.9548150240045183, "grad_norm": 2.627620326962049, "learning_rate": 1.1896299459658311e-08, "loss": 0.0914, "step": 20926 }, { "epoch": 2.9549562270545042, "grad_norm": 2.7751812151786233, "learning_rate": 1.1822078393309844e-08, "loss": 0.0985, "step": 20927 }, { "epoch": 2.95509743010449, "grad_norm": 3.394453796661725, "learning_rate": 1.1748089447901623e-08, "loss": 0.2011, "step": 20928 }, { "epoch": 2.955238633154476, "grad_norm": 3.200775885712225, "learning_rate": 1.1674332625154494e-08, "loss": 0.1338, "step": 20929 }, { "epoch": 2.955379836204462, "grad_norm": 3.338893544431498, "learning_rate": 1.1600807926782642e-08, "loss": 0.1339, "step": 20930 }, { "epoch": 2.9555210392544478, "grad_norm": 4.018132346422408, "learning_rate": 1.152751535449359e-08, "loss": 0.1388, "step": 20931 }, { "epoch": 2.9556622423044336, "grad_norm": 2.6722062048004656, "learning_rate": 1.145445490999153e-08, "loss": 0.1203, "step": 20932 }, { "epoch": 2.9558034453544195, "grad_norm": 2.951709143765773, "learning_rate": 1.1381626594975103e-08, "loss": 0.0877, "step": 20933 }, { "epoch": 2.9559446484044054, "grad_norm": 3.3025445750709794, "learning_rate": 1.130903041113518e-08, "loss": 0.139, "step": 20934 }, { "epoch": 2.9560858514543913, "grad_norm": 3.5946964043945098, "learning_rate": 1.1236666360159299e-08, "loss": 0.1584, "step": 20935 }, { "epoch": 2.956227054504377, "grad_norm": 2.5610504318441287, "learning_rate": 1.1164534443730557e-08, "loss": 0.0978, "step": 20936 }, { "epoch": 2.956368257554363, "grad_norm": 3.692911897300744, "learning_rate": 1.1092634663523171e-08, "loss": 0.1365, "step": 20937 }, { "epoch": 2.956509460604349, "grad_norm": 3.187921788572486, "learning_rate": 1.1020967021210249e-08, "loss": 0.1421, "step": 20938 }, { "epoch": 2.956650663654335, "grad_norm": 3.496858424771091, "learning_rate": 1.0949531518454904e-08, "loss": 0.1912, "step": 20939 }, { "epoch": 2.9567918667043207, "grad_norm": 3.167092149933974, "learning_rate": 1.0878328156919139e-08, "loss": 0.1522, "step": 20940 }, { "epoch": 2.9569330697543066, "grad_norm": 3.1919593841485296, "learning_rate": 1.0807356938256074e-08, "loss": 0.1589, "step": 20941 }, { "epoch": 2.9570742728042925, "grad_norm": 3.479331699096202, "learning_rate": 1.0736617864117727e-08, "loss": 0.1433, "step": 20942 }, { "epoch": 2.9572154758542784, "grad_norm": 3.6290364438021863, "learning_rate": 1.0666110936145002e-08, "loss": 0.1454, "step": 20943 }, { "epoch": 2.9573566789042642, "grad_norm": 3.7750765923061675, "learning_rate": 1.0595836155978811e-08, "loss": 0.1834, "step": 20944 }, { "epoch": 2.95749788195425, "grad_norm": 3.124862124433934, "learning_rate": 1.0525793525250072e-08, "loss": 0.1292, "step": 20945 }, { "epoch": 2.957639085004236, "grad_norm": 2.4536237906543814, "learning_rate": 1.0455983045588591e-08, "loss": 0.1277, "step": 20946 }, { "epoch": 2.957780288054222, "grad_norm": 3.2828696618730837, "learning_rate": 1.0386404718616406e-08, "loss": 0.1387, "step": 20947 }, { "epoch": 2.9579214911042078, "grad_norm": 3.6300171237004752, "learning_rate": 1.0317058545948888e-08, "loss": 0.1914, "step": 20948 }, { "epoch": 2.9580626941541937, "grad_norm": 3.6384366386314038, "learning_rate": 1.0247944529199193e-08, "loss": 0.1461, "step": 20949 }, { "epoch": 2.9582038972041795, "grad_norm": 4.67957418413978, "learning_rate": 1.0179062669972705e-08, "loss": 0.1885, "step": 20950 }, { "epoch": 2.9583451002541654, "grad_norm": 2.7183991668721834, "learning_rate": 1.0110412969871475e-08, "loss": 0.1327, "step": 20951 }, { "epoch": 2.9584863033041513, "grad_norm": 3.055314771943912, "learning_rate": 1.0041995430488671e-08, "loss": 0.1323, "step": 20952 }, { "epoch": 2.958627506354137, "grad_norm": 2.942627860393065, "learning_rate": 9.973810053416356e-09, "loss": 0.1219, "step": 20953 }, { "epoch": 2.958768709404123, "grad_norm": 3.0529522882345583, "learning_rate": 9.905856840238815e-09, "loss": 0.1319, "step": 20954 }, { "epoch": 2.958909912454109, "grad_norm": 2.783210766361708, "learning_rate": 9.838135792533676e-09, "loss": 0.1419, "step": 20955 }, { "epoch": 2.959051115504095, "grad_norm": 2.8611864191240746, "learning_rate": 9.770646911876347e-09, "loss": 0.1164, "step": 20956 }, { "epoch": 2.9591923185540807, "grad_norm": 3.777205700060491, "learning_rate": 9.703390199834461e-09, "loss": 0.135, "step": 20957 }, { "epoch": 2.9593335216040666, "grad_norm": 3.269026498811678, "learning_rate": 9.636365657971215e-09, "loss": 0.1673, "step": 20958 }, { "epoch": 2.9594747246540525, "grad_norm": 2.485896563978982, "learning_rate": 9.569573287845357e-09, "loss": 0.1108, "step": 20959 }, { "epoch": 2.9596159277040384, "grad_norm": 3.1275251110341507, "learning_rate": 9.503013091006763e-09, "loss": 0.1184, "step": 20960 }, { "epoch": 2.9597571307540242, "grad_norm": 3.274138137800996, "learning_rate": 9.436685069004192e-09, "loss": 0.1611, "step": 20961 }, { "epoch": 2.95989833380401, "grad_norm": 3.443405447376654, "learning_rate": 9.370589223378635e-09, "loss": 0.1544, "step": 20962 }, { "epoch": 2.960039536853996, "grad_norm": 3.0748457951091575, "learning_rate": 9.304725555665528e-09, "loss": 0.1338, "step": 20963 }, { "epoch": 2.960180739903982, "grad_norm": 3.7556867007562444, "learning_rate": 9.239094067396982e-09, "loss": 0.1654, "step": 20964 }, { "epoch": 2.9603219429539678, "grad_norm": 2.9073318374364776, "learning_rate": 9.173694760096219e-09, "loss": 0.1232, "step": 20965 }, { "epoch": 2.9604631460039537, "grad_norm": 3.109219157025226, "learning_rate": 9.108527635284248e-09, "loss": 0.1385, "step": 20966 }, { "epoch": 2.9606043490539395, "grad_norm": 2.8004784197050863, "learning_rate": 9.043592694475412e-09, "loss": 0.1295, "step": 20967 }, { "epoch": 2.9607455521039254, "grad_norm": 2.9274664219014173, "learning_rate": 8.978889939178503e-09, "loss": 0.1478, "step": 20968 }, { "epoch": 2.9608867551539113, "grad_norm": 3.1895038946071277, "learning_rate": 8.914419370897876e-09, "loss": 0.13, "step": 20969 }, { "epoch": 2.961027958203897, "grad_norm": 3.372245409808537, "learning_rate": 8.850180991131219e-09, "loss": 0.1103, "step": 20970 }, { "epoch": 2.961169161253883, "grad_norm": 3.4135469954778737, "learning_rate": 8.786174801370673e-09, "loss": 0.1647, "step": 20971 }, { "epoch": 2.961310364303869, "grad_norm": 3.4334545812809387, "learning_rate": 8.722400803106157e-09, "loss": 0.1682, "step": 20972 }, { "epoch": 2.961451567353855, "grad_norm": 2.8223747300517874, "learning_rate": 8.658858997816488e-09, "loss": 0.1364, "step": 20973 }, { "epoch": 2.9615927704038407, "grad_norm": 4.014651242224271, "learning_rate": 8.595549386981595e-09, "loss": 0.1786, "step": 20974 }, { "epoch": 2.9617339734538266, "grad_norm": 2.930614623484727, "learning_rate": 8.53247197206919e-09, "loss": 0.1343, "step": 20975 }, { "epoch": 2.9618751765038125, "grad_norm": 3.9461326274396904, "learning_rate": 8.469626754549209e-09, "loss": 0.2161, "step": 20976 }, { "epoch": 2.9620163795537984, "grad_norm": 2.930159604387823, "learning_rate": 8.407013735878267e-09, "loss": 0.1272, "step": 20977 }, { "epoch": 2.9621575826037843, "grad_norm": 3.6321178927510305, "learning_rate": 8.344632917515194e-09, "loss": 0.1635, "step": 20978 }, { "epoch": 2.96229878565377, "grad_norm": 3.26693423482731, "learning_rate": 8.282484300906613e-09, "loss": 0.1289, "step": 20979 }, { "epoch": 2.962439988703756, "grad_norm": 2.8588647364914195, "learning_rate": 8.220567887498033e-09, "loss": 0.0958, "step": 20980 }, { "epoch": 2.962581191753742, "grad_norm": 2.869305698897458, "learning_rate": 8.158883678728303e-09, "loss": 0.1353, "step": 20981 }, { "epoch": 2.962722394803728, "grad_norm": 2.8776957220445754, "learning_rate": 8.09743167603072e-09, "loss": 0.1239, "step": 20982 }, { "epoch": 2.9628635978537137, "grad_norm": 3.1754993459158776, "learning_rate": 8.036211880834144e-09, "loss": 0.1182, "step": 20983 }, { "epoch": 2.9630048009036996, "grad_norm": 3.5422919290700525, "learning_rate": 7.975224294560769e-09, "loss": 0.1364, "step": 20984 }, { "epoch": 2.9631460039536854, "grad_norm": 2.948514609484278, "learning_rate": 7.914468918628348e-09, "loss": 0.1126, "step": 20985 }, { "epoch": 2.9632872070036713, "grad_norm": 3.8391716517794556, "learning_rate": 7.853945754447977e-09, "loss": 0.1898, "step": 20986 }, { "epoch": 2.963428410053657, "grad_norm": 2.7152258391253667, "learning_rate": 7.793654803426309e-09, "loss": 0.1309, "step": 20987 }, { "epoch": 2.963569613103643, "grad_norm": 2.927256147875384, "learning_rate": 7.733596066965555e-09, "loss": 0.1163, "step": 20988 }, { "epoch": 2.963710816153629, "grad_norm": 2.9628600210019416, "learning_rate": 7.673769546460153e-09, "loss": 0.1321, "step": 20989 }, { "epoch": 2.963852019203615, "grad_norm": 2.8048862223948423, "learning_rate": 7.614175243301213e-09, "loss": 0.1322, "step": 20990 }, { "epoch": 2.9639932222536007, "grad_norm": 2.345982602760121, "learning_rate": 7.554813158873186e-09, "loss": 0.1028, "step": 20991 }, { "epoch": 2.9641344253035866, "grad_norm": 3.110268873537656, "learning_rate": 7.495683294556078e-09, "loss": 0.148, "step": 20992 }, { "epoch": 2.9642756283535725, "grad_norm": 2.8176698707425216, "learning_rate": 7.436785651724343e-09, "loss": 0.1146, "step": 20993 }, { "epoch": 2.9644168314035584, "grad_norm": 3.1147915172981606, "learning_rate": 7.378120231745778e-09, "loss": 0.1445, "step": 20994 }, { "epoch": 2.9645580344535443, "grad_norm": 4.279855369330541, "learning_rate": 7.319687035983735e-09, "loss": 0.1946, "step": 20995 }, { "epoch": 2.96469923750353, "grad_norm": 2.896818949598069, "learning_rate": 7.26148606579713e-09, "loss": 0.1172, "step": 20996 }, { "epoch": 2.964840440553516, "grad_norm": 2.9577200511844866, "learning_rate": 7.203517322538211e-09, "loss": 0.1302, "step": 20997 }, { "epoch": 2.964981643603502, "grad_norm": 3.2391628617275887, "learning_rate": 7.145780807553681e-09, "loss": 0.1411, "step": 20998 }, { "epoch": 2.965122846653488, "grad_norm": 3.1520440350106766, "learning_rate": 7.0882765221858e-09, "loss": 0.134, "step": 20999 }, { "epoch": 2.9652640497034737, "grad_norm": 3.390775876141292, "learning_rate": 7.031004467771274e-09, "loss": 0.1648, "step": 21000 }, { "epoch": 2.9654052527534596, "grad_norm": 3.0235267782748534, "learning_rate": 6.973964645640152e-09, "loss": 0.1264, "step": 21001 }, { "epoch": 2.9655464558034454, "grad_norm": 3.219361656633308, "learning_rate": 6.91715705711804e-09, "loss": 0.1436, "step": 21002 }, { "epoch": 2.9656876588534313, "grad_norm": 3.1221555150131217, "learning_rate": 6.860581703526104e-09, "loss": 0.1568, "step": 21003 }, { "epoch": 2.965828861903417, "grad_norm": 3.409120810950975, "learning_rate": 6.804238586177736e-09, "loss": 0.1383, "step": 21004 }, { "epoch": 2.965970064953403, "grad_norm": 3.0107659771206094, "learning_rate": 6.748127706384111e-09, "loss": 0.1291, "step": 21005 }, { "epoch": 2.966111268003389, "grad_norm": 3.3365191404253434, "learning_rate": 6.692249065447521e-09, "loss": 0.1471, "step": 21006 }, { "epoch": 2.966252471053375, "grad_norm": 3.191440889233095, "learning_rate": 6.636602664668035e-09, "loss": 0.1224, "step": 21007 }, { "epoch": 2.9663936741033607, "grad_norm": 3.0344741004497213, "learning_rate": 6.5811885053368444e-09, "loss": 0.1666, "step": 21008 }, { "epoch": 2.9665348771533466, "grad_norm": 2.6352788398214595, "learning_rate": 6.526006588744027e-09, "loss": 0.0776, "step": 21009 }, { "epoch": 2.9666760802033325, "grad_norm": 3.822208840567407, "learning_rate": 6.471056916170782e-09, "loss": 0.1409, "step": 21010 }, { "epoch": 2.9668172832533184, "grad_norm": 3.153845501046757, "learning_rate": 6.416339488893864e-09, "loss": 0.1121, "step": 21011 }, { "epoch": 2.9669584863033043, "grad_norm": 4.389716641536935, "learning_rate": 6.361854308185589e-09, "loss": 0.1773, "step": 21012 }, { "epoch": 2.96709968935329, "grad_norm": 3.0115845511978487, "learning_rate": 6.307601375312722e-09, "loss": 0.1454, "step": 21013 }, { "epoch": 2.967240892403276, "grad_norm": 2.8532419260155804, "learning_rate": 6.253580691534255e-09, "loss": 0.12, "step": 21014 }, { "epoch": 2.967382095453262, "grad_norm": 2.7633431529123023, "learning_rate": 6.199792258106962e-09, "loss": 0.1505, "step": 21015 }, { "epoch": 2.967523298503248, "grad_norm": 2.9669673198935524, "learning_rate": 6.146236076279843e-09, "loss": 0.1325, "step": 21016 }, { "epoch": 2.9676645015532337, "grad_norm": 3.4762966648418288, "learning_rate": 6.0929121472996785e-09, "loss": 0.1553, "step": 21017 }, { "epoch": 2.9678057046032196, "grad_norm": 3.1925919631151105, "learning_rate": 6.039820472403257e-09, "loss": 0.1671, "step": 21018 }, { "epoch": 2.9679469076532055, "grad_norm": 3.3063965822323897, "learning_rate": 5.986961052825146e-09, "loss": 0.1477, "step": 21019 }, { "epoch": 2.9680881107031913, "grad_norm": 3.851778152619592, "learning_rate": 5.934333889794364e-09, "loss": 0.1696, "step": 21020 }, { "epoch": 2.9682293137531772, "grad_norm": 3.0275365503824263, "learning_rate": 5.881938984533264e-09, "loss": 0.1249, "step": 21021 }, { "epoch": 2.968370516803163, "grad_norm": 3.9395511172534263, "learning_rate": 5.8297763382597625e-09, "loss": 0.1583, "step": 21022 }, { "epoch": 2.968511719853149, "grad_norm": 3.1316371878544444, "learning_rate": 5.777845952186223e-09, "loss": 0.143, "step": 21023 }, { "epoch": 2.968652922903135, "grad_norm": 2.7669193176657396, "learning_rate": 5.726147827519457e-09, "loss": 0.1267, "step": 21024 }, { "epoch": 2.9687941259531208, "grad_norm": 3.3651175504250945, "learning_rate": 5.674681965460727e-09, "loss": 0.1292, "step": 21025 }, { "epoch": 2.9689353290031066, "grad_norm": 2.954779052251237, "learning_rate": 5.623448367205741e-09, "loss": 0.1644, "step": 21026 }, { "epoch": 2.9690765320530925, "grad_norm": 3.915213771424255, "learning_rate": 5.5724470339468815e-09, "loss": 0.1481, "step": 21027 }, { "epoch": 2.9692177351030784, "grad_norm": 3.2810421365250226, "learning_rate": 5.521677966866534e-09, "loss": 0.1499, "step": 21028 }, { "epoch": 2.9693589381530643, "grad_norm": 3.562055691743463, "learning_rate": 5.471141167147087e-09, "loss": 0.1563, "step": 21029 }, { "epoch": 2.96950014120305, "grad_norm": 2.9352896283052177, "learning_rate": 5.4208366359620455e-09, "loss": 0.1299, "step": 21030 }, { "epoch": 2.969641344253036, "grad_norm": 2.567186035594047, "learning_rate": 5.370764374480475e-09, "loss": 0.0946, "step": 21031 }, { "epoch": 2.969782547303022, "grad_norm": 3.403408823164045, "learning_rate": 5.3209243838647784e-09, "loss": 0.1416, "step": 21032 }, { "epoch": 2.969923750353008, "grad_norm": 2.372018652193145, "learning_rate": 5.271316665275139e-09, "loss": 0.1044, "step": 21033 }, { "epoch": 2.9700649534029937, "grad_norm": 3.5854331070162604, "learning_rate": 5.221941219863969e-09, "loss": 0.1798, "step": 21034 }, { "epoch": 2.9702061564529796, "grad_norm": 3.0709747911060443, "learning_rate": 5.172798048779237e-09, "loss": 0.1476, "step": 21035 }, { "epoch": 2.9703473595029655, "grad_norm": 3.1385426535132357, "learning_rate": 5.123887153161145e-09, "loss": 0.1289, "step": 21036 }, { "epoch": 2.9704885625529514, "grad_norm": 2.78171843382014, "learning_rate": 5.075208534147669e-09, "loss": 0.1261, "step": 21037 }, { "epoch": 2.9706297656029372, "grad_norm": 2.8368198044646786, "learning_rate": 5.026762192870127e-09, "loss": 0.1267, "step": 21038 }, { "epoch": 2.970770968652923, "grad_norm": 2.7312542141403426, "learning_rate": 4.9785481304531755e-09, "loss": 0.1283, "step": 21039 }, { "epoch": 2.970912171702909, "grad_norm": 2.9421763362159306, "learning_rate": 4.93056634801925e-09, "loss": 0.1321, "step": 21040 }, { "epoch": 2.971053374752895, "grad_norm": 4.228816865880618, "learning_rate": 4.882816846681904e-09, "loss": 0.1799, "step": 21041 }, { "epoch": 2.9711945778028808, "grad_norm": 2.6729053331570425, "learning_rate": 4.83529962755247e-09, "loss": 0.1309, "step": 21042 }, { "epoch": 2.9713357808528666, "grad_norm": 2.75361075539676, "learning_rate": 4.78801469173229e-09, "loss": 0.1394, "step": 21043 }, { "epoch": 2.9714769839028525, "grad_norm": 3.14082367980495, "learning_rate": 4.740962040323815e-09, "loss": 0.1167, "step": 21044 }, { "epoch": 2.9716181869528384, "grad_norm": 3.2894315443992515, "learning_rate": 4.694141674417285e-09, "loss": 0.1402, "step": 21045 }, { "epoch": 2.9717593900028243, "grad_norm": 3.9349673646756753, "learning_rate": 4.647553595102938e-09, "loss": 0.1645, "step": 21046 }, { "epoch": 2.97190059305281, "grad_norm": 3.672937943481367, "learning_rate": 4.601197803463242e-09, "loss": 0.1462, "step": 21047 }, { "epoch": 2.972041796102796, "grad_norm": 3.296551551324094, "learning_rate": 4.555074300574003e-09, "loss": 0.1636, "step": 21048 }, { "epoch": 2.972182999152782, "grad_norm": 2.809555824825626, "learning_rate": 4.5091830875088065e-09, "loss": 0.1154, "step": 21049 }, { "epoch": 2.972324202202768, "grad_norm": 3.677021917473877, "learning_rate": 4.463524165333466e-09, "loss": 0.1592, "step": 21050 }, { "epoch": 2.9724654052527537, "grad_norm": 3.000358821077733, "learning_rate": 4.418097535108246e-09, "loss": 0.141, "step": 21051 }, { "epoch": 2.9726066083027396, "grad_norm": 4.976978817166408, "learning_rate": 4.372903197891188e-09, "loss": 0.1496, "step": 21052 }, { "epoch": 2.9727478113527255, "grad_norm": 3.225628744055724, "learning_rate": 4.327941154730342e-09, "loss": 0.1508, "step": 21053 }, { "epoch": 2.9728890144027114, "grad_norm": 3.2535841996822783, "learning_rate": 4.283211406670429e-09, "loss": 0.1521, "step": 21054 }, { "epoch": 2.9730302174526972, "grad_norm": 3.438221282418488, "learning_rate": 4.238713954752838e-09, "loss": 0.1422, "step": 21055 }, { "epoch": 2.973171420502683, "grad_norm": 2.9365113390660817, "learning_rate": 4.194448800011186e-09, "loss": 0.1253, "step": 21056 }, { "epoch": 2.973312623552669, "grad_norm": 3.24073373641248, "learning_rate": 4.15041594347243e-09, "loss": 0.1129, "step": 21057 }, { "epoch": 2.973453826602655, "grad_norm": 2.889166141985397, "learning_rate": 4.1066153861624155e-09, "loss": 0.1286, "step": 21058 }, { "epoch": 2.9735950296526403, "grad_norm": 2.878492829553914, "learning_rate": 4.063047129096998e-09, "loss": 0.1165, "step": 21059 }, { "epoch": 2.973736232702626, "grad_norm": 4.188067370799378, "learning_rate": 4.019711173289809e-09, "loss": 0.2055, "step": 21060 }, { "epoch": 2.973877435752612, "grad_norm": 3.5985988997527913, "learning_rate": 3.976607519746712e-09, "loss": 0.1289, "step": 21061 }, { "epoch": 2.974018638802598, "grad_norm": 3.7009330604687656, "learning_rate": 3.933736169471347e-09, "loss": 0.117, "step": 21062 }, { "epoch": 2.974159841852584, "grad_norm": 2.2304069444430707, "learning_rate": 3.891097123458476e-09, "loss": 0.0897, "step": 21063 }, { "epoch": 2.9743010449025697, "grad_norm": 4.053806122203301, "learning_rate": 3.8486903826995266e-09, "loss": 0.155, "step": 21064 }, { "epoch": 2.9744422479525556, "grad_norm": 3.3111002416821447, "learning_rate": 3.806515948180378e-09, "loss": 0.1769, "step": 21065 }, { "epoch": 2.9745834510025415, "grad_norm": 3.2506154676597427, "learning_rate": 3.764573820880246e-09, "loss": 0.1625, "step": 21066 }, { "epoch": 2.9747246540525274, "grad_norm": 2.5605092057957872, "learning_rate": 3.7228640017750172e-09, "loss": 0.0988, "step": 21067 }, { "epoch": 2.9748658571025133, "grad_norm": 2.888765786394192, "learning_rate": 3.6813864918328057e-09, "loss": 0.1058, "step": 21068 }, { "epoch": 2.975007060152499, "grad_norm": 3.32867913699476, "learning_rate": 3.6401412920183955e-09, "loss": 0.1416, "step": 21069 }, { "epoch": 2.975148263202485, "grad_norm": 3.118885428620935, "learning_rate": 3.5991284032899087e-09, "loss": 0.1346, "step": 21070 }, { "epoch": 2.975289466252471, "grad_norm": 3.1699936544855154, "learning_rate": 3.558347826599917e-09, "loss": 0.1228, "step": 21071 }, { "epoch": 2.975430669302457, "grad_norm": 3.1865535699609984, "learning_rate": 3.5177995628976613e-09, "loss": 0.1215, "step": 21072 }, { "epoch": 2.9755718723524427, "grad_norm": 3.105781313668679, "learning_rate": 3.4774836131246103e-09, "loss": 0.1481, "step": 21073 }, { "epoch": 2.9757130754024286, "grad_norm": 3.4706337869050174, "learning_rate": 3.437399978216682e-09, "loss": 0.1798, "step": 21074 }, { "epoch": 2.9758542784524145, "grad_norm": 2.740235095286872, "learning_rate": 3.3975486591075746e-09, "loss": 0.112, "step": 21075 }, { "epoch": 2.9759954815024003, "grad_norm": 3.1589986946627198, "learning_rate": 3.357929656722103e-09, "loss": 0.1235, "step": 21076 }, { "epoch": 2.9761366845523862, "grad_norm": 2.9260076655295553, "learning_rate": 3.318542971980643e-09, "loss": 0.1068, "step": 21077 }, { "epoch": 2.976277887602372, "grad_norm": 3.0146099607281975, "learning_rate": 3.2793886057991277e-09, "loss": 0.1215, "step": 21078 }, { "epoch": 2.976419090652358, "grad_norm": 2.5100639859106417, "learning_rate": 3.24046655908683e-09, "loss": 0.1068, "step": 21079 }, { "epoch": 2.976560293702344, "grad_norm": 2.6233960554489646, "learning_rate": 3.201776832749692e-09, "loss": 0.0988, "step": 21080 }, { "epoch": 2.9767014967523298, "grad_norm": 2.8405040234156966, "learning_rate": 3.163319427685885e-09, "loss": 0.094, "step": 21081 }, { "epoch": 2.9768426998023156, "grad_norm": 3.127971042032498, "learning_rate": 3.125094344789137e-09, "loss": 0.1662, "step": 21082 }, { "epoch": 2.9769839028523015, "grad_norm": 2.72878225235653, "learning_rate": 3.0871015849476272e-09, "loss": 0.0945, "step": 21083 }, { "epoch": 2.9771251059022874, "grad_norm": 2.224175064313002, "learning_rate": 3.049341149045093e-09, "loss": 0.0856, "step": 21084 }, { "epoch": 2.9772663089522733, "grad_norm": 3.0064869120432043, "learning_rate": 3.0118130379575005e-09, "loss": 0.1234, "step": 21085 }, { "epoch": 2.977407512002259, "grad_norm": 3.1951170089734093, "learning_rate": 2.974517252558595e-09, "loss": 0.1209, "step": 21086 }, { "epoch": 2.977548715052245, "grad_norm": 3.4891742898341462, "learning_rate": 2.937453793714351e-09, "loss": 0.1711, "step": 21087 }, { "epoch": 2.977689918102231, "grad_norm": 3.048334834554927, "learning_rate": 2.9006226622874114e-09, "loss": 0.1423, "step": 21088 }, { "epoch": 2.977831121152217, "grad_norm": 3.4221071258428037, "learning_rate": 2.8640238591315373e-09, "loss": 0.1402, "step": 21089 }, { "epoch": 2.9779723242022027, "grad_norm": 2.3014685420301157, "learning_rate": 2.8276573850982703e-09, "loss": 0.1177, "step": 21090 }, { "epoch": 2.9781135272521886, "grad_norm": 5.052485989789503, "learning_rate": 2.79152324103249e-09, "loss": 0.1685, "step": 21091 }, { "epoch": 2.9782547303021745, "grad_norm": 4.052973782969394, "learning_rate": 2.755621427774635e-09, "loss": 0.1787, "step": 21092 }, { "epoch": 2.9783959333521604, "grad_norm": 2.6202924982325873, "learning_rate": 2.7199519461595937e-09, "loss": 0.0968, "step": 21093 }, { "epoch": 2.9785371364021462, "grad_norm": 2.8629976831012844, "learning_rate": 2.6845147970144816e-09, "loss": 0.1305, "step": 21094 }, { "epoch": 2.978678339452132, "grad_norm": 3.6665539702549275, "learning_rate": 2.649309981163084e-09, "loss": 0.1674, "step": 21095 }, { "epoch": 2.978819542502118, "grad_norm": 3.8962937354647647, "learning_rate": 2.6143374994247463e-09, "loss": 0.1655, "step": 21096 }, { "epoch": 2.978960745552104, "grad_norm": 2.831690331922944, "learning_rate": 2.579597352612151e-09, "loss": 0.1089, "step": 21097 }, { "epoch": 2.9791019486020898, "grad_norm": 2.9452148502652142, "learning_rate": 2.5450895415324306e-09, "loss": 0.1272, "step": 21098 }, { "epoch": 2.9792431516520756, "grad_norm": 3.0433848312705774, "learning_rate": 2.510814066986056e-09, "loss": 0.1315, "step": 21099 }, { "epoch": 2.9793843547020615, "grad_norm": 3.5255456367200573, "learning_rate": 2.4767709297712772e-09, "loss": 0.1774, "step": 21100 }, { "epoch": 2.9795255577520474, "grad_norm": 3.436782858784968, "learning_rate": 2.4429601306785734e-09, "loss": 0.1577, "step": 21101 }, { "epoch": 2.9796667608020333, "grad_norm": 3.0814672461288004, "learning_rate": 2.4093816704950924e-09, "loss": 0.1356, "step": 21102 }, { "epoch": 2.979807963852019, "grad_norm": 4.383456488385386, "learning_rate": 2.37603554999799e-09, "loss": 0.1895, "step": 21103 }, { "epoch": 2.979949166902005, "grad_norm": 3.409497772401768, "learning_rate": 2.3429217699655337e-09, "loss": 0.1448, "step": 21104 }, { "epoch": 2.980090369951991, "grad_norm": 3.521574078287625, "learning_rate": 2.3100403311659967e-09, "loss": 0.1691, "step": 21105 }, { "epoch": 2.980231573001977, "grad_norm": 3.495531140943913, "learning_rate": 2.277391234363213e-09, "loss": 0.1715, "step": 21106 }, { "epoch": 2.9803727760519627, "grad_norm": 3.6714688749552957, "learning_rate": 2.244974480315465e-09, "loss": 0.1517, "step": 21107 }, { "epoch": 2.9805139791019486, "grad_norm": 3.338478865874934, "learning_rate": 2.2127900697777038e-09, "loss": 0.1509, "step": 21108 }, { "epoch": 2.9806551821519345, "grad_norm": 3.6371271216189123, "learning_rate": 2.1808380034959997e-09, "loss": 0.1448, "step": 21109 }, { "epoch": 2.9807963852019204, "grad_norm": 4.227345400460378, "learning_rate": 2.1491182822153124e-09, "loss": 0.1784, "step": 21110 }, { "epoch": 2.9809375882519062, "grad_norm": 3.397280636148819, "learning_rate": 2.117630906670609e-09, "loss": 0.1531, "step": 21111 }, { "epoch": 2.981078791301892, "grad_norm": 2.5294953311501436, "learning_rate": 2.086375877593527e-09, "loss": 0.1153, "step": 21112 }, { "epoch": 2.981219994351878, "grad_norm": 3.054876120908973, "learning_rate": 2.0553531957123727e-09, "loss": 0.1289, "step": 21113 }, { "epoch": 2.981361197401864, "grad_norm": 3.078202445487692, "learning_rate": 2.0245628617465706e-09, "loss": 0.1338, "step": 21114 }, { "epoch": 2.9815024004518498, "grad_norm": 2.1669573568182887, "learning_rate": 1.9940048764122143e-09, "loss": 0.0836, "step": 21115 }, { "epoch": 2.9816436035018357, "grad_norm": 3.7449990755583586, "learning_rate": 1.963679240419847e-09, "loss": 0.1636, "step": 21116 }, { "epoch": 2.9817848065518215, "grad_norm": 2.3625193269504403, "learning_rate": 1.9335859544733494e-09, "loss": 0.0925, "step": 21117 }, { "epoch": 2.9819260096018074, "grad_norm": 3.0549409637560037, "learning_rate": 1.9037250192732728e-09, "loss": 0.118, "step": 21118 }, { "epoch": 2.9820672126517933, "grad_norm": 3.1049678219054218, "learning_rate": 1.8740964355112856e-09, "loss": 0.1621, "step": 21119 }, { "epoch": 2.982208415701779, "grad_norm": 3.7924889764368954, "learning_rate": 1.8447002038779471e-09, "loss": 0.182, "step": 21120 }, { "epoch": 2.982349618751765, "grad_norm": 2.897342613312911, "learning_rate": 1.8155363250571544e-09, "loss": 0.1326, "step": 21121 }, { "epoch": 2.982490821801751, "grad_norm": 3.3585873024921993, "learning_rate": 1.7866047997239234e-09, "loss": 0.1578, "step": 21122 }, { "epoch": 2.982632024851737, "grad_norm": 3.120244303035706, "learning_rate": 1.757905628552159e-09, "loss": 0.1313, "step": 21123 }, { "epoch": 2.9827732279017227, "grad_norm": 2.772743037595081, "learning_rate": 1.7294388122102157e-09, "loss": 0.1247, "step": 21124 }, { "epoch": 2.9829144309517086, "grad_norm": 3.054165423606782, "learning_rate": 1.7012043513564559e-09, "loss": 0.1221, "step": 21125 }, { "epoch": 2.9830556340016945, "grad_norm": 2.9533676475735366, "learning_rate": 1.6732022466503516e-09, "loss": 0.1306, "step": 21126 }, { "epoch": 2.9831968370516804, "grad_norm": 3.1956215631648517, "learning_rate": 1.6454324987391635e-09, "loss": 0.1428, "step": 21127 }, { "epoch": 2.9833380401016663, "grad_norm": 2.622750148317922, "learning_rate": 1.6178951082712613e-09, "loss": 0.1104, "step": 21128 }, { "epoch": 2.983479243151652, "grad_norm": 3.4886743088415524, "learning_rate": 1.5905900758861336e-09, "loss": 0.1751, "step": 21129 }, { "epoch": 2.983620446201638, "grad_norm": 2.8352584820410742, "learning_rate": 1.5635174022166077e-09, "loss": 0.1304, "step": 21130 }, { "epoch": 2.983761649251624, "grad_norm": 3.0115357331675927, "learning_rate": 1.5366770878932903e-09, "loss": 0.1377, "step": 21131 }, { "epoch": 2.98390285230161, "grad_norm": 2.5115279881909482, "learning_rate": 1.5100691335401262e-09, "loss": 0.1036, "step": 21132 }, { "epoch": 2.9840440553515957, "grad_norm": 3.0314406693483025, "learning_rate": 1.4836935397744e-09, "loss": 0.0977, "step": 21133 }, { "epoch": 2.9841852584015816, "grad_norm": 3.652863479113, "learning_rate": 1.4575503072100649e-09, "loss": 0.1233, "step": 21134 }, { "epoch": 2.9843264614515674, "grad_norm": 3.3373424720308718, "learning_rate": 1.4316394364533027e-09, "loss": 0.1312, "step": 21135 }, { "epoch": 2.9844676645015533, "grad_norm": 3.304181899976067, "learning_rate": 1.4059609281080743e-09, "loss": 0.1677, "step": 21136 }, { "epoch": 2.984608867551539, "grad_norm": 2.9154801031986555, "learning_rate": 1.3805147827694599e-09, "loss": 0.1271, "step": 21137 }, { "epoch": 2.984750070601525, "grad_norm": 3.2550673438798507, "learning_rate": 1.355301001030318e-09, "loss": 0.1319, "step": 21138 }, { "epoch": 2.984891273651511, "grad_norm": 2.849257536520862, "learning_rate": 1.3303195834757366e-09, "loss": 0.1204, "step": 21139 }, { "epoch": 2.985032476701497, "grad_norm": 3.238168205683666, "learning_rate": 1.305570530686362e-09, "loss": 0.1294, "step": 21140 }, { "epoch": 2.9851736797514827, "grad_norm": 2.832533835269369, "learning_rate": 1.2810538432372898e-09, "loss": 0.1378, "step": 21141 }, { "epoch": 2.9853148828014686, "grad_norm": 3.540595304676716, "learning_rate": 1.256769521699175e-09, "loss": 0.1601, "step": 21142 }, { "epoch": 2.9854560858514545, "grad_norm": 2.6431349966950606, "learning_rate": 1.2327175666349e-09, "loss": 0.0831, "step": 21143 }, { "epoch": 2.9855972889014404, "grad_norm": 3.88829567403951, "learning_rate": 1.2088979786040179e-09, "loss": 0.1712, "step": 21144 }, { "epoch": 2.9857384919514263, "grad_norm": 3.4441098148055107, "learning_rate": 1.1853107581605294e-09, "loss": 0.1599, "step": 21145 }, { "epoch": 2.985879695001412, "grad_norm": 2.8017997825614303, "learning_rate": 1.1619559058517748e-09, "loss": 0.1425, "step": 21146 }, { "epoch": 2.986020898051398, "grad_norm": 4.011926025480318, "learning_rate": 1.1388334222217633e-09, "loss": 0.1702, "step": 21147 }, { "epoch": 2.986162101101384, "grad_norm": 3.018036377814512, "learning_rate": 1.1159433078067327e-09, "loss": 0.1316, "step": 21148 }, { "epoch": 2.98630330415137, "grad_norm": 3.216344570894812, "learning_rate": 1.0932855631384797e-09, "loss": 0.1772, "step": 21149 }, { "epoch": 2.9864445072013557, "grad_norm": 2.722246572315768, "learning_rate": 1.0708601887454706e-09, "loss": 0.1259, "step": 21150 }, { "epoch": 2.9865857102513416, "grad_norm": 3.7183543215382557, "learning_rate": 1.0486671851461794e-09, "loss": 0.1375, "step": 21151 }, { "epoch": 2.9867269133013274, "grad_norm": 2.870385681131172, "learning_rate": 1.02670655285797e-09, "loss": 0.1505, "step": 21152 }, { "epoch": 2.9868681163513133, "grad_norm": 3.3498521284143203, "learning_rate": 1.004978292390435e-09, "loss": 0.1224, "step": 21153 }, { "epoch": 2.987009319401299, "grad_norm": 3.217192814941655, "learning_rate": 9.83482404249836e-10, "loss": 0.1341, "step": 21154 }, { "epoch": 2.987150522451285, "grad_norm": 3.125034627936791, "learning_rate": 9.62218888934663e-10, "loss": 0.1407, "step": 21155 }, { "epoch": 2.987291725501271, "grad_norm": 2.759093585643996, "learning_rate": 9.411877469389652e-10, "loss": 0.1187, "step": 21156 }, { "epoch": 2.987432928551257, "grad_norm": 3.3029977592846644, "learning_rate": 9.20388978751241e-10, "loss": 0.1506, "step": 21157 }, { "epoch": 2.9875741316012427, "grad_norm": 2.944636840080118, "learning_rate": 8.998225848566577e-10, "loss": 0.162, "step": 21158 }, { "epoch": 2.9877153346512286, "grad_norm": 2.9440498603984704, "learning_rate": 8.794885657315011e-10, "loss": 0.1241, "step": 21159 }, { "epoch": 2.9878565377012145, "grad_norm": 3.358942362266349, "learning_rate": 8.593869218487261e-10, "loss": 0.1742, "step": 21160 }, { "epoch": 2.9879977407512, "grad_norm": 3.592256949866937, "learning_rate": 8.395176536746263e-10, "loss": 0.1676, "step": 21161 }, { "epoch": 2.988138943801186, "grad_norm": 3.363488430018918, "learning_rate": 8.198807616732752e-10, "loss": 0.1312, "step": 21162 }, { "epoch": 2.9882801468511717, "grad_norm": 3.9130253572060956, "learning_rate": 8.004762462987536e-10, "loss": 0.1834, "step": 21163 }, { "epoch": 2.9884213499011576, "grad_norm": 2.754768880174701, "learning_rate": 7.813041080029226e-10, "loss": 0.1216, "step": 21164 }, { "epoch": 2.9885625529511435, "grad_norm": 3.4143643785401747, "learning_rate": 7.623643472309817e-10, "loss": 0.1476, "step": 21165 }, { "epoch": 2.9887037560011294, "grad_norm": 3.545533543053335, "learning_rate": 7.43656964423689e-10, "loss": 0.1363, "step": 21166 }, { "epoch": 2.9888449590511152, "grad_norm": 2.739064658654357, "learning_rate": 7.251819600151422e-10, "loss": 0.1542, "step": 21167 }, { "epoch": 2.988986162101101, "grad_norm": 2.839532311737879, "learning_rate": 7.069393344361075e-10, "loss": 0.1165, "step": 21168 }, { "epoch": 2.989127365151087, "grad_norm": 3.527188385730754, "learning_rate": 6.889290881084699e-10, "loss": 0.1761, "step": 21169 }, { "epoch": 2.989268568201073, "grad_norm": 2.348909904801069, "learning_rate": 6.711512214518934e-10, "loss": 0.1035, "step": 21170 }, { "epoch": 2.989409771251059, "grad_norm": 2.6624329810704435, "learning_rate": 6.536057348793811e-10, "loss": 0.1141, "step": 21171 }, { "epoch": 2.9895509743010447, "grad_norm": 3.598588927650186, "learning_rate": 6.36292628798385e-10, "loss": 0.1782, "step": 21172 }, { "epoch": 2.9896921773510305, "grad_norm": 2.9683948139247054, "learning_rate": 6.19211903611916e-10, "loss": 0.1433, "step": 21173 }, { "epoch": 2.9898333804010164, "grad_norm": 3.2741962867233823, "learning_rate": 6.023635597163236e-10, "loss": 0.1528, "step": 21174 }, { "epoch": 2.9899745834510023, "grad_norm": 3.9889610858261255, "learning_rate": 5.857475975035165e-10, "loss": 0.1567, "step": 21175 }, { "epoch": 2.990115786500988, "grad_norm": 3.97499141797581, "learning_rate": 5.693640173598525e-10, "loss": 0.1725, "step": 21176 }, { "epoch": 2.990256989550974, "grad_norm": 2.8821019858930574, "learning_rate": 5.532128196650277e-10, "loss": 0.1447, "step": 21177 }, { "epoch": 2.99039819260096, "grad_norm": 3.347830996279111, "learning_rate": 5.372940047965181e-10, "loss": 0.1532, "step": 21178 }, { "epoch": 2.990539395650946, "grad_norm": 3.218164190064572, "learning_rate": 5.216075731218073e-10, "loss": 0.1461, "step": 21179 }, { "epoch": 2.9906805987009317, "grad_norm": 3.543413364851795, "learning_rate": 5.061535250061589e-10, "loss": 0.1603, "step": 21180 }, { "epoch": 2.9908218017509176, "grad_norm": 3.4391258838684697, "learning_rate": 4.90931860809285e-10, "loss": 0.158, "step": 21181 }, { "epoch": 2.9909630048009035, "grad_norm": 3.853763735772975, "learning_rate": 4.759425808853468e-10, "loss": 0.1836, "step": 21182 }, { "epoch": 2.9911042078508894, "grad_norm": 3.261740455148176, "learning_rate": 4.6118568558184416e-10, "loss": 0.1705, "step": 21183 }, { "epoch": 2.9912454109008753, "grad_norm": 2.524731214061247, "learning_rate": 4.466611752418359e-10, "loss": 0.1262, "step": 21184 }, { "epoch": 2.991386613950861, "grad_norm": 2.961045452925279, "learning_rate": 4.323690502017197e-10, "loss": 0.1162, "step": 21185 }, { "epoch": 2.991527817000847, "grad_norm": 3.608405115846408, "learning_rate": 4.183093107967828e-10, "loss": 0.1989, "step": 21186 }, { "epoch": 2.991669020050833, "grad_norm": 3.6698333364642752, "learning_rate": 4.044819573501002e-10, "loss": 0.1596, "step": 21187 }, { "epoch": 2.991810223100819, "grad_norm": 2.5951753431454163, "learning_rate": 3.9088699018585695e-10, "loss": 0.1159, "step": 21188 }, { "epoch": 2.9919514261508047, "grad_norm": 2.7999160175995588, "learning_rate": 3.7752440961935646e-10, "loss": 0.1189, "step": 21189 }, { "epoch": 2.9920926292007906, "grad_norm": 3.2784412296945376, "learning_rate": 3.6439421595924065e-10, "loss": 0.1602, "step": 21190 }, { "epoch": 2.9922338322507764, "grad_norm": 2.8073575839091536, "learning_rate": 3.514964095130413e-10, "loss": 0.1105, "step": 21191 }, { "epoch": 2.9923750353007623, "grad_norm": 3.1496331674521416, "learning_rate": 3.388309905794085e-10, "loss": 0.1398, "step": 21192 }, { "epoch": 2.992516238350748, "grad_norm": 2.886957132950518, "learning_rate": 3.263979594525513e-10, "loss": 0.1288, "step": 21193 }, { "epoch": 2.992657441400734, "grad_norm": 3.2426044611686424, "learning_rate": 3.1419731642223785e-10, "loss": 0.1463, "step": 21194 }, { "epoch": 2.99279864445072, "grad_norm": 3.4230329906680272, "learning_rate": 3.022290617715751e-10, "loss": 0.1411, "step": 21195 }, { "epoch": 2.992939847500706, "grad_norm": 3.9497303817317397, "learning_rate": 2.9049319577811873e-10, "loss": 0.1464, "step": 21196 }, { "epoch": 2.9930810505506917, "grad_norm": 2.872521890581678, "learning_rate": 2.789897187149837e-10, "loss": 0.1363, "step": 21197 }, { "epoch": 2.9932222536006776, "grad_norm": 3.342145491270167, "learning_rate": 2.677186308497337e-10, "loss": 0.106, "step": 21198 }, { "epoch": 2.9933634566506635, "grad_norm": 2.8513249741824014, "learning_rate": 2.5667993244327117e-10, "loss": 0.1162, "step": 21199 }, { "epoch": 2.9935046597006494, "grad_norm": 2.4461003914573967, "learning_rate": 2.45873623754278e-10, "loss": 0.1045, "step": 21200 }, { "epoch": 2.9936458627506353, "grad_norm": 2.6283048423563433, "learning_rate": 2.352997050325545e-10, "loss": 0.0858, "step": 21201 }, { "epoch": 2.993787065800621, "grad_norm": 3.5032680108407592, "learning_rate": 2.2495817652345987e-10, "loss": 0.17, "step": 21202 }, { "epoch": 2.993928268850607, "grad_norm": 2.8068712309916033, "learning_rate": 2.1484903846791249e-10, "loss": 0.1174, "step": 21203 }, { "epoch": 2.994069471900593, "grad_norm": 3.1647878159157123, "learning_rate": 2.0497229110016948e-10, "loss": 0.1225, "step": 21204 }, { "epoch": 2.994210674950579, "grad_norm": 3.6600308908665533, "learning_rate": 1.9532793465115717e-10, "loss": 0.1748, "step": 21205 }, { "epoch": 2.9943518780005647, "grad_norm": 4.068971957180614, "learning_rate": 1.8591596934292022e-10, "loss": 0.1282, "step": 21206 }, { "epoch": 2.9944930810505506, "grad_norm": 3.4939923260106984, "learning_rate": 1.7673639539639298e-10, "loss": 0.1379, "step": 21207 }, { "epoch": 2.9946342841005364, "grad_norm": 2.662586259248584, "learning_rate": 1.6778921302362805e-10, "loss": 0.1135, "step": 21208 }, { "epoch": 2.9947754871505223, "grad_norm": 2.7633733998962624, "learning_rate": 1.5907442243334737e-10, "loss": 0.1476, "step": 21209 }, { "epoch": 2.994916690200508, "grad_norm": 2.6972341571498624, "learning_rate": 1.505920238265013e-10, "loss": 0.115, "step": 21210 }, { "epoch": 2.995057893250494, "grad_norm": 4.002315734873148, "learning_rate": 1.4234201740292996e-10, "loss": 0.141, "step": 21211 }, { "epoch": 2.99519909630048, "grad_norm": 2.7485028377414737, "learning_rate": 1.343244033513713e-10, "loss": 0.1146, "step": 21212 }, { "epoch": 2.995340299350466, "grad_norm": 3.6629749286156157, "learning_rate": 1.26539181859453e-10, "loss": 0.1772, "step": 21213 }, { "epoch": 2.9954815024004517, "grad_norm": 3.8394837439769156, "learning_rate": 1.1898635310925167e-10, "loss": 0.1848, "step": 21214 }, { "epoch": 2.9956227054504376, "grad_norm": 2.9268400303010154, "learning_rate": 1.1166591727396203e-10, "loss": 0.1246, "step": 21215 }, { "epoch": 2.9957639085004235, "grad_norm": 2.983780546292802, "learning_rate": 1.045778745256687e-10, "loss": 0.1369, "step": 21216 }, { "epoch": 2.9959051115504094, "grad_norm": 2.863031170850118, "learning_rate": 9.772222502868467e-11, "loss": 0.1428, "step": 21217 }, { "epoch": 2.9960463146003953, "grad_norm": 3.441383046772891, "learning_rate": 9.109896894066161e-11, "loss": 0.1689, "step": 21218 }, { "epoch": 2.996187517650381, "grad_norm": 3.614117090739518, "learning_rate": 8.470810641814098e-11, "loss": 0.171, "step": 21219 }, { "epoch": 2.996328720700367, "grad_norm": 3.3628339118364683, "learning_rate": 7.85496376076722e-11, "loss": 0.1397, "step": 21220 }, { "epoch": 2.996469923750353, "grad_norm": 3.498444297002625, "learning_rate": 7.262356265358428e-11, "loss": 0.1429, "step": 21221 }, { "epoch": 2.996611126800339, "grad_norm": 2.7747380806840063, "learning_rate": 6.692988169243463e-11, "loss": 0.1371, "step": 21222 }, { "epoch": 2.9967523298503247, "grad_norm": 3.5198455762033953, "learning_rate": 6.146859485745004e-11, "loss": 0.1575, "step": 21223 }, { "epoch": 2.9968935329003106, "grad_norm": 3.8969432157436565, "learning_rate": 5.623970227630615e-11, "loss": 0.1183, "step": 21224 }, { "epoch": 2.9970347359502965, "grad_norm": 2.7465664965288337, "learning_rate": 5.124320406890704e-11, "loss": 0.1289, "step": 21225 }, { "epoch": 2.9971759390002823, "grad_norm": 3.7654282928742804, "learning_rate": 4.64791003507159e-11, "loss": 0.1436, "step": 21226 }, { "epoch": 2.9973171420502682, "grad_norm": 2.8271615052594785, "learning_rate": 4.1947391234975485e-11, "loss": 0.1164, "step": 21227 }, { "epoch": 2.997458345100254, "grad_norm": 3.694541271159491, "learning_rate": 3.764807682604676e-11, "loss": 0.156, "step": 21228 }, { "epoch": 2.99759954815024, "grad_norm": 3.0891591260523046, "learning_rate": 3.358115722273958e-11, "loss": 0.1155, "step": 21229 }, { "epoch": 2.997740751200226, "grad_norm": 3.5969610125661844, "learning_rate": 2.9746632520533116e-11, "loss": 0.1573, "step": 21230 }, { "epoch": 2.9978819542502118, "grad_norm": 3.635837060906462, "learning_rate": 2.6144502808245208e-11, "loss": 0.1391, "step": 21231 }, { "epoch": 2.9980231573001976, "grad_norm": 3.02853436563727, "learning_rate": 2.2774768170252813e-11, "loss": 0.1228, "step": 21232 }, { "epoch": 2.9981643603501835, "grad_norm": 3.281446540367409, "learning_rate": 1.963742868316132e-11, "loss": 0.1561, "step": 21233 }, { "epoch": 2.9983055634001694, "grad_norm": 2.7518352719091537, "learning_rate": 1.673248442246589e-11, "loss": 0.1302, "step": 21234 }, { "epoch": 2.9984467664501553, "grad_norm": 3.114354127384235, "learning_rate": 1.405993545255946e-11, "loss": 0.1409, "step": 21235 }, { "epoch": 2.998587969500141, "grad_norm": 2.9323082282173982, "learning_rate": 1.1619781838945188e-11, "loss": 0.1485, "step": 21236 }, { "epoch": 2.998729172550127, "grad_norm": 3.2013813218348774, "learning_rate": 9.412023636024003e-12, "loss": 0.1403, "step": 21237 }, { "epoch": 2.998870375600113, "grad_norm": 2.954438412189926, "learning_rate": 7.436660894866165e-12, "loss": 0.1135, "step": 21238 }, { "epoch": 2.999011578650099, "grad_norm": 3.3089000963642805, "learning_rate": 5.693693663211264e-12, "loss": 0.1359, "step": 21239 }, { "epoch": 2.9991527817000847, "grad_norm": 3.0678087293266443, "learning_rate": 4.1831219799171044e-12, "loss": 0.1393, "step": 21240 }, { "epoch": 2.9992939847500706, "grad_norm": 2.3132531390417244, "learning_rate": 2.904945881621046e-12, "loss": 0.1006, "step": 21241 }, { "epoch": 2.9994351878000565, "grad_norm": 2.7932270418459773, "learning_rate": 1.859165397188889e-12, "loss": 0.1659, "step": 21242 }, { "epoch": 2.9995763908500424, "grad_norm": 2.857614043883779, "learning_rate": 1.0457805499353157e-12, "loss": 0.1342, "step": 21243 }, { "epoch": 2.9997175939000282, "grad_norm": 2.8310409274034054, "learning_rate": 4.647913598443409e-13, "loss": 0.0953, "step": 21244 }, { "epoch": 2.999858796950014, "grad_norm": 3.174553736792835, "learning_rate": 1.16197840238641e-13, "loss": 0.1432, "step": 21245 }, { "epoch": 3.0, "grad_norm": 2.2926056070486545, "learning_rate": 0.0, "loss": 0.0818, "step": 21246 }, { "epoch": 3.0, "step": 21246, "total_flos": 190632695685120.0, "train_loss": 0.5977588871135013, "train_runtime": 93801.157, "train_samples_per_second": 1.812, "train_steps_per_second": 0.227 } ], "logging_steps": 1.0, "max_steps": 21246, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 190632695685120.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }