{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999197238500441, "eval_steps": 500, "global_step": 4671, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00021406973321559498, "grad_norm": 1.1166919279503258, "learning_rate": 8.510638297872341e-07, "loss": 1.1827, "step": 1 }, { "epoch": 0.00042813946643118997, "grad_norm": 1.1708289727451116, "learning_rate": 1.7021276595744682e-06, "loss": 1.1359, "step": 2 }, { "epoch": 0.0006422091996467849, "grad_norm": 1.0907211593333068, "learning_rate": 2.553191489361702e-06, "loss": 1.1557, "step": 3 }, { "epoch": 0.0008562789328623799, "grad_norm": 1.0197824487418166, "learning_rate": 3.4042553191489363e-06, "loss": 1.1925, "step": 4 }, { "epoch": 0.001070348666077975, "grad_norm": 0.7931135221390712, "learning_rate": 4.255319148936171e-06, "loss": 1.1818, "step": 5 }, { "epoch": 0.0012844183992935698, "grad_norm": 1.2062759929754334, "learning_rate": 5.106382978723404e-06, "loss": 1.1836, "step": 6 }, { "epoch": 0.001498488132509165, "grad_norm": 0.7236664543575566, "learning_rate": 5.957446808510638e-06, "loss": 1.1129, "step": 7 }, { "epoch": 0.0017125578657247599, "grad_norm": 0.876566120952773, "learning_rate": 6.808510638297873e-06, "loss": 1.1466, "step": 8 }, { "epoch": 0.0019266275989403548, "grad_norm": 0.9927832245845079, "learning_rate": 7.659574468085107e-06, "loss": 1.089, "step": 9 }, { "epoch": 0.00214069733215595, "grad_norm": 0.6601070620009425, "learning_rate": 8.510638297872341e-06, "loss": 1.0964, "step": 10 }, { "epoch": 0.0023547670653715448, "grad_norm": 1.2645696188115831, "learning_rate": 9.361702127659576e-06, "loss": 1.1111, "step": 11 }, { "epoch": 0.0025688367985871397, "grad_norm": 0.9038447023024733, "learning_rate": 1.0212765957446808e-05, "loss": 1.1165, "step": 12 }, { "epoch": 0.0027829065318027346, "grad_norm": 0.7339109817654351, "learning_rate": 1.1063829787234044e-05, "loss": 1.1966, "step": 13 }, { "epoch": 0.00299697626501833, "grad_norm": 0.9353478021948082, "learning_rate": 1.1914893617021277e-05, "loss": 1.1288, "step": 14 }, { "epoch": 0.003211045998233925, "grad_norm": 0.7288553042986659, "learning_rate": 1.2765957446808513e-05, "loss": 1.1335, "step": 15 }, { "epoch": 0.0034251157314495197, "grad_norm": 0.7356599947982664, "learning_rate": 1.3617021276595745e-05, "loss": 1.07, "step": 16 }, { "epoch": 0.0036391854646651146, "grad_norm": 0.8088448974520007, "learning_rate": 1.4468085106382981e-05, "loss": 1.0834, "step": 17 }, { "epoch": 0.0038532551978807095, "grad_norm": 0.6185232663513837, "learning_rate": 1.5319148936170214e-05, "loss": 1.0397, "step": 18 }, { "epoch": 0.004067324931096305, "grad_norm": 0.6953950133146246, "learning_rate": 1.6170212765957446e-05, "loss": 1.0825, "step": 19 }, { "epoch": 0.0042813946643119, "grad_norm": 0.6338356520863616, "learning_rate": 1.7021276595744682e-05, "loss": 1.0906, "step": 20 }, { "epoch": 0.004495464397527495, "grad_norm": 0.5541703704791683, "learning_rate": 1.7872340425531915e-05, "loss": 1.0918, "step": 21 }, { "epoch": 0.0047095341307430896, "grad_norm": 0.5579133715396074, "learning_rate": 1.872340425531915e-05, "loss": 1.0435, "step": 22 }, { "epoch": 0.0049236038639586845, "grad_norm": 0.5909042405991046, "learning_rate": 1.9574468085106384e-05, "loss": 1.1142, "step": 23 }, { "epoch": 0.005137673597174279, "grad_norm": 0.601096746808294, "learning_rate": 2.0425531914893616e-05, "loss": 1.0311, "step": 24 }, { "epoch": 0.005351743330389874, "grad_norm": 0.606153601431191, "learning_rate": 2.1276595744680852e-05, "loss": 1.0409, "step": 25 }, { "epoch": 0.005565813063605469, "grad_norm": 0.7133301692826983, "learning_rate": 2.2127659574468088e-05, "loss": 1.0529, "step": 26 }, { "epoch": 0.005779882796821064, "grad_norm": 0.9435346016766639, "learning_rate": 2.2978723404255324e-05, "loss": 1.0484, "step": 27 }, { "epoch": 0.00599395253003666, "grad_norm": 1.176958680484456, "learning_rate": 2.3829787234042553e-05, "loss": 1.057, "step": 28 }, { "epoch": 0.006208022263252255, "grad_norm": 0.7760780885243199, "learning_rate": 2.468085106382979e-05, "loss": 1.0051, "step": 29 }, { "epoch": 0.00642209199646785, "grad_norm": 0.6677333961706371, "learning_rate": 2.5531914893617025e-05, "loss": 0.9905, "step": 30 }, { "epoch": 0.0066361617296834445, "grad_norm": 0.834480954253743, "learning_rate": 2.6382978723404255e-05, "loss": 1.0342, "step": 31 }, { "epoch": 0.0068502314628990394, "grad_norm": 1.0974033292691274, "learning_rate": 2.723404255319149e-05, "loss": 1.0149, "step": 32 }, { "epoch": 0.007064301196114634, "grad_norm": 1.038976902577752, "learning_rate": 2.8085106382978727e-05, "loss": 1.0572, "step": 33 }, { "epoch": 0.007278370929330229, "grad_norm": 0.8852689841618762, "learning_rate": 2.8936170212765963e-05, "loss": 0.9999, "step": 34 }, { "epoch": 0.007492440662545824, "grad_norm": 0.9301997082176462, "learning_rate": 2.9787234042553192e-05, "loss": 1.0109, "step": 35 }, { "epoch": 0.007706510395761419, "grad_norm": 1.5082522529592066, "learning_rate": 3.063829787234043e-05, "loss": 1.0071, "step": 36 }, { "epoch": 0.007920580128977015, "grad_norm": 0.7195107242376084, "learning_rate": 3.1489361702127664e-05, "loss": 1.0669, "step": 37 }, { "epoch": 0.00813464986219261, "grad_norm": 0.9748082972789284, "learning_rate": 3.234042553191489e-05, "loss": 0.9628, "step": 38 }, { "epoch": 0.008348719595408205, "grad_norm": 1.1875078001762558, "learning_rate": 3.319148936170213e-05, "loss": 0.9952, "step": 39 }, { "epoch": 0.0085627893286238, "grad_norm": 1.0391692344066028, "learning_rate": 3.4042553191489365e-05, "loss": 1.0394, "step": 40 }, { "epoch": 0.008776859061839394, "grad_norm": 1.4756395878896853, "learning_rate": 3.48936170212766e-05, "loss": 1.0107, "step": 41 }, { "epoch": 0.00899092879505499, "grad_norm": 0.6635851734986676, "learning_rate": 3.574468085106383e-05, "loss": 0.9681, "step": 42 }, { "epoch": 0.009204998528270584, "grad_norm": 1.2729496957274005, "learning_rate": 3.6595744680851066e-05, "loss": 0.9411, "step": 43 }, { "epoch": 0.009419068261486179, "grad_norm": 0.7233378367122119, "learning_rate": 3.74468085106383e-05, "loss": 0.9916, "step": 44 }, { "epoch": 0.009633137994701774, "grad_norm": 1.1693159971090483, "learning_rate": 3.829787234042554e-05, "loss": 0.9817, "step": 45 }, { "epoch": 0.009847207727917369, "grad_norm": 0.7455612181912622, "learning_rate": 3.914893617021277e-05, "loss": 0.9939, "step": 46 }, { "epoch": 0.010061277461132964, "grad_norm": 1.290433563215881, "learning_rate": 4e-05, "loss": 0.9576, "step": 47 }, { "epoch": 0.010275347194348559, "grad_norm": 1.1054549329447891, "learning_rate": 3.999999538401831e-05, "loss": 0.9123, "step": 48 }, { "epoch": 0.010489416927564154, "grad_norm": 1.0856558491489532, "learning_rate": 3.999998153607536e-05, "loss": 0.9401, "step": 49 }, { "epoch": 0.010703486660779749, "grad_norm": 1.0090349725115968, "learning_rate": 3.9999958456177544e-05, "loss": 0.9271, "step": 50 }, { "epoch": 0.010917556393995343, "grad_norm": 1.2254714038725856, "learning_rate": 3.999992614433551e-05, "loss": 0.9731, "step": 51 }, { "epoch": 0.011131626127210938, "grad_norm": 0.7146351542113133, "learning_rate": 3.999988460056418e-05, "loss": 0.951, "step": 52 }, { "epoch": 0.011345695860426533, "grad_norm": 0.8766895624956862, "learning_rate": 3.999983382488274e-05, "loss": 0.9421, "step": 53 }, { "epoch": 0.011559765593642128, "grad_norm": 1.0455184491547953, "learning_rate": 3.99997738173146e-05, "loss": 0.9302, "step": 54 }, { "epoch": 0.011773835326857725, "grad_norm": 0.9023316909274607, "learning_rate": 3.9999704577887497e-05, "loss": 0.9737, "step": 55 }, { "epoch": 0.01198790506007332, "grad_norm": 0.8468310557786813, "learning_rate": 3.9999626106633364e-05, "loss": 0.9569, "step": 56 }, { "epoch": 0.012201974793288915, "grad_norm": 0.7796679102876239, "learning_rate": 3.9999538403588424e-05, "loss": 0.959, "step": 57 }, { "epoch": 0.01241604452650451, "grad_norm": 0.7611741736834936, "learning_rate": 3.999944146879317e-05, "loss": 0.9388, "step": 58 }, { "epoch": 0.012630114259720104, "grad_norm": 0.6692315664257814, "learning_rate": 3.999933530229235e-05, "loss": 1.0062, "step": 59 }, { "epoch": 0.0128441839929357, "grad_norm": 0.5812982258666269, "learning_rate": 3.999921990413496e-05, "loss": 0.9834, "step": 60 }, { "epoch": 0.013058253726151294, "grad_norm": 0.6197457280615808, "learning_rate": 3.9999095274374274e-05, "loss": 0.9347, "step": 61 }, { "epoch": 0.013272323459366889, "grad_norm": 0.5540448143621675, "learning_rate": 3.999896141306782e-05, "loss": 0.9421, "step": 62 }, { "epoch": 0.013486393192582484, "grad_norm": 0.5797201354380859, "learning_rate": 3.999881832027739e-05, "loss": 0.9639, "step": 63 }, { "epoch": 0.013700462925798079, "grad_norm": 0.5593714489515639, "learning_rate": 3.999866599606903e-05, "loss": 0.9042, "step": 64 }, { "epoch": 0.013914532659013674, "grad_norm": 0.4702809339742805, "learning_rate": 3.9998504440513055e-05, "loss": 0.921, "step": 65 }, { "epoch": 0.014128602392229269, "grad_norm": 0.89907196888218, "learning_rate": 3.999833365368403e-05, "loss": 0.9141, "step": 66 }, { "epoch": 0.014342672125444864, "grad_norm": 0.47718688760050365, "learning_rate": 3.999815363566081e-05, "loss": 0.9056, "step": 67 }, { "epoch": 0.014556741858660458, "grad_norm": 0.4821685450306697, "learning_rate": 3.999796438652648e-05, "loss": 0.9617, "step": 68 }, { "epoch": 0.014770811591876053, "grad_norm": 0.5711772498703427, "learning_rate": 3.9997765906368394e-05, "loss": 0.9217, "step": 69 }, { "epoch": 0.014984881325091648, "grad_norm": 0.5043638438184408, "learning_rate": 3.999755819527817e-05, "loss": 0.9546, "step": 70 }, { "epoch": 0.015198951058307243, "grad_norm": 0.51402390704286, "learning_rate": 3.999734125335169e-05, "loss": 0.9214, "step": 71 }, { "epoch": 0.015413020791522838, "grad_norm": 0.5094766948616595, "learning_rate": 3.99971150806891e-05, "loss": 0.9068, "step": 72 }, { "epoch": 0.015627090524738433, "grad_norm": 0.5203099086883713, "learning_rate": 3.99968796773948e-05, "loss": 0.9343, "step": 73 }, { "epoch": 0.01584116025795403, "grad_norm": 0.5036751618637034, "learning_rate": 3.999663504357743e-05, "loss": 0.9594, "step": 74 }, { "epoch": 0.016055229991169623, "grad_norm": 0.46537339509159126, "learning_rate": 3.999638117934994e-05, "loss": 0.9341, "step": 75 }, { "epoch": 0.01626929972438522, "grad_norm": 0.48878004609491565, "learning_rate": 3.99961180848295e-05, "loss": 0.9085, "step": 76 }, { "epoch": 0.016483369457600813, "grad_norm": 0.5050176828983611, "learning_rate": 3.9995845760137556e-05, "loss": 0.945, "step": 77 }, { "epoch": 0.01669743919081641, "grad_norm": 0.4990493792937551, "learning_rate": 3.999556420539981e-05, "loss": 0.9205, "step": 78 }, { "epoch": 0.016911508924032002, "grad_norm": 0.6134109651957985, "learning_rate": 3.9995273420746235e-05, "loss": 0.8763, "step": 79 }, { "epoch": 0.0171255786572476, "grad_norm": 0.7417954503922389, "learning_rate": 3.999497340631106e-05, "loss": 0.9216, "step": 80 }, { "epoch": 0.017339648390463192, "grad_norm": 0.8060670544765532, "learning_rate": 3.999466416223275e-05, "loss": 0.9099, "step": 81 }, { "epoch": 0.01755371812367879, "grad_norm": 0.8087969162506505, "learning_rate": 3.9994345688654063e-05, "loss": 0.9038, "step": 82 }, { "epoch": 0.017767787856894382, "grad_norm": 0.7004889255347019, "learning_rate": 3.999401798572201e-05, "loss": 0.9014, "step": 83 }, { "epoch": 0.01798185759010998, "grad_norm": 0.5802585102032293, "learning_rate": 3.999368105358786e-05, "loss": 0.9031, "step": 84 }, { "epoch": 0.018195927323325572, "grad_norm": 0.6408609753897807, "learning_rate": 3.9993334892407135e-05, "loss": 0.895, "step": 85 }, { "epoch": 0.01840999705654117, "grad_norm": 0.6980937286960113, "learning_rate": 3.999297950233962e-05, "loss": 0.905, "step": 86 }, { "epoch": 0.01862406678975676, "grad_norm": 0.7130072915591322, "learning_rate": 3.999261488354937e-05, "loss": 0.8795, "step": 87 }, { "epoch": 0.018838136522972358, "grad_norm": 0.6452008052964264, "learning_rate": 3.999224103620468e-05, "loss": 0.8989, "step": 88 }, { "epoch": 0.019052206256187955, "grad_norm": 0.5404873483636125, "learning_rate": 3.999185796047813e-05, "loss": 0.8825, "step": 89 }, { "epoch": 0.019266275989403548, "grad_norm": 0.6099564670308534, "learning_rate": 3.9991465656546536e-05, "loss": 0.892, "step": 90 }, { "epoch": 0.019480345722619145, "grad_norm": 0.6069250708476934, "learning_rate": 3.9991064124591e-05, "loss": 0.9067, "step": 91 }, { "epoch": 0.019694415455834738, "grad_norm": 0.5483146337131054, "learning_rate": 3.999065336479685e-05, "loss": 0.9025, "step": 92 }, { "epoch": 0.019908485189050334, "grad_norm": 0.44566955352426096, "learning_rate": 3.9990233377353706e-05, "loss": 0.9234, "step": 93 }, { "epoch": 0.020122554922265928, "grad_norm": 0.4936248836222646, "learning_rate": 3.998980416245543e-05, "loss": 0.9049, "step": 94 }, { "epoch": 0.020336624655481524, "grad_norm": 0.5566756740675656, "learning_rate": 3.998936572030015e-05, "loss": 0.9151, "step": 95 }, { "epoch": 0.020550694388697117, "grad_norm": 0.49376355752853496, "learning_rate": 3.998891805109024e-05, "loss": 0.904, "step": 96 }, { "epoch": 0.020764764121912714, "grad_norm": 0.4792881635842234, "learning_rate": 3.9988461155032344e-05, "loss": 0.8702, "step": 97 }, { "epoch": 0.020978833855128307, "grad_norm": 0.5267230099582725, "learning_rate": 3.998799503233738e-05, "loss": 0.8907, "step": 98 }, { "epoch": 0.021192903588343904, "grad_norm": 0.467618771139337, "learning_rate": 3.9987519683220483e-05, "loss": 0.8849, "step": 99 }, { "epoch": 0.021406973321559497, "grad_norm": 0.48943080169295844, "learning_rate": 3.99870351079011e-05, "loss": 0.8757, "step": 100 }, { "epoch": 0.021621043054775094, "grad_norm": 0.5455761297358167, "learning_rate": 3.9986541306602894e-05, "loss": 0.874, "step": 101 }, { "epoch": 0.021835112787990687, "grad_norm": 0.5544546890537473, "learning_rate": 3.998603827955381e-05, "loss": 0.8614, "step": 102 }, { "epoch": 0.022049182521206283, "grad_norm": 0.5223485214178217, "learning_rate": 3.9985526026986046e-05, "loss": 0.8871, "step": 103 }, { "epoch": 0.022263252254421877, "grad_norm": 0.562423681293549, "learning_rate": 3.998500454913605e-05, "loss": 0.9012, "step": 104 }, { "epoch": 0.022477321987637473, "grad_norm": 0.6315099710292131, "learning_rate": 3.998447384624454e-05, "loss": 0.8732, "step": 105 }, { "epoch": 0.022691391720853066, "grad_norm": 0.5791228632908744, "learning_rate": 3.9983933918556476e-05, "loss": 0.8617, "step": 106 }, { "epoch": 0.022905461454068663, "grad_norm": 0.5634022864648549, "learning_rate": 3.9983384766321106e-05, "loss": 0.853, "step": 107 }, { "epoch": 0.023119531187284256, "grad_norm": 0.5070564223042243, "learning_rate": 3.99828263897919e-05, "loss": 0.8726, "step": 108 }, { "epoch": 0.023333600920499853, "grad_norm": 0.3773709430324831, "learning_rate": 3.9982258789226625e-05, "loss": 0.9322, "step": 109 }, { "epoch": 0.02354767065371545, "grad_norm": 0.3609456838554747, "learning_rate": 3.998168196488727e-05, "loss": 0.8814, "step": 110 }, { "epoch": 0.023761740386931043, "grad_norm": 0.412295909527699, "learning_rate": 3.9981095917040094e-05, "loss": 0.8747, "step": 111 }, { "epoch": 0.02397581012014664, "grad_norm": 0.3762488153855239, "learning_rate": 3.998050064595562e-05, "loss": 0.8616, "step": 112 }, { "epoch": 0.024189879853362233, "grad_norm": 0.36866061675524436, "learning_rate": 3.997989615190862e-05, "loss": 0.8622, "step": 113 }, { "epoch": 0.02440394958657783, "grad_norm": 0.47042581407904815, "learning_rate": 3.9979282435178135e-05, "loss": 0.9049, "step": 114 }, { "epoch": 0.024618019319793422, "grad_norm": 0.37162201772872094, "learning_rate": 3.9978659496047456e-05, "loss": 0.8515, "step": 115 }, { "epoch": 0.02483208905300902, "grad_norm": 0.37771982910788005, "learning_rate": 3.997802733480412e-05, "loss": 0.8841, "step": 116 }, { "epoch": 0.025046158786224612, "grad_norm": 0.3798986424281923, "learning_rate": 3.9977385951739935e-05, "loss": 0.8686, "step": 117 }, { "epoch": 0.02526022851944021, "grad_norm": 0.3799809323160474, "learning_rate": 3.997673534715097e-05, "loss": 0.8673, "step": 118 }, { "epoch": 0.025474298252655802, "grad_norm": 0.4943355337856385, "learning_rate": 3.9976075521337534e-05, "loss": 0.8803, "step": 119 }, { "epoch": 0.0256883679858714, "grad_norm": 0.39176012267746063, "learning_rate": 3.997540647460421e-05, "loss": 0.8276, "step": 120 }, { "epoch": 0.02590243771908699, "grad_norm": 0.35847329416041274, "learning_rate": 3.997472820725982e-05, "loss": 0.8546, "step": 121 }, { "epoch": 0.02611650745230259, "grad_norm": 0.3973695987050465, "learning_rate": 3.997404071961745e-05, "loss": 0.8595, "step": 122 }, { "epoch": 0.02633057718551818, "grad_norm": 0.4179535298926474, "learning_rate": 3.9973344011994453e-05, "loss": 0.892, "step": 123 }, { "epoch": 0.026544646918733778, "grad_norm": 0.4482449207513205, "learning_rate": 3.9972638084712424e-05, "loss": 0.8723, "step": 124 }, { "epoch": 0.02675871665194937, "grad_norm": 0.47235961115937525, "learning_rate": 3.997192293809722e-05, "loss": 0.9035, "step": 125 }, { "epoch": 0.026972786385164968, "grad_norm": 0.49655813852674496, "learning_rate": 3.997119857247894e-05, "loss": 0.8758, "step": 126 }, { "epoch": 0.02718685611838056, "grad_norm": 0.48475706708204314, "learning_rate": 3.9970464988191965e-05, "loss": 0.8822, "step": 127 }, { "epoch": 0.027400925851596158, "grad_norm": 0.40880945016693543, "learning_rate": 3.99697221855749e-05, "loss": 0.8634, "step": 128 }, { "epoch": 0.02761499558481175, "grad_norm": 0.3655103522350681, "learning_rate": 3.996897016497063e-05, "loss": 0.9002, "step": 129 }, { "epoch": 0.027829065318027348, "grad_norm": 0.43172410026660996, "learning_rate": 3.9968208926726296e-05, "loss": 0.8895, "step": 130 }, { "epoch": 0.02804313505124294, "grad_norm": 0.46427343900301987, "learning_rate": 3.9967438471193265e-05, "loss": 0.8669, "step": 131 }, { "epoch": 0.028257204784458537, "grad_norm": 0.5504951668967951, "learning_rate": 3.99666587987272e-05, "loss": 0.8596, "step": 132 }, { "epoch": 0.028471274517674134, "grad_norm": 0.650575248512708, "learning_rate": 3.9965869909687966e-05, "loss": 0.8193, "step": 133 }, { "epoch": 0.028685344250889727, "grad_norm": 0.7382625130650217, "learning_rate": 3.996507180443975e-05, "loss": 0.8905, "step": 134 }, { "epoch": 0.028899413984105324, "grad_norm": 0.7854088207409329, "learning_rate": 3.996426448335092e-05, "loss": 0.8695, "step": 135 }, { "epoch": 0.029113483717320917, "grad_norm": 0.7567675030960987, "learning_rate": 3.996344794679416e-05, "loss": 0.8604, "step": 136 }, { "epoch": 0.029327553450536514, "grad_norm": 0.726503109872982, "learning_rate": 3.996262219514637e-05, "loss": 0.8397, "step": 137 }, { "epoch": 0.029541623183752107, "grad_norm": 0.6381158401869549, "learning_rate": 3.996178722878872e-05, "loss": 0.8965, "step": 138 }, { "epoch": 0.029755692916967703, "grad_norm": 0.48767806648620604, "learning_rate": 3.996094304810663e-05, "loss": 0.8345, "step": 139 }, { "epoch": 0.029969762650183297, "grad_norm": 0.45817659042886905, "learning_rate": 3.996008965348976e-05, "loss": 0.8845, "step": 140 }, { "epoch": 0.030183832383398893, "grad_norm": 0.47155499357081787, "learning_rate": 3.995922704533205e-05, "loss": 0.8762, "step": 141 }, { "epoch": 0.030397902116614486, "grad_norm": 0.5133652574437176, "learning_rate": 3.995835522403167e-05, "loss": 0.8772, "step": 142 }, { "epoch": 0.030611971849830083, "grad_norm": 0.47623028462087225, "learning_rate": 3.995747418999105e-05, "loss": 0.8216, "step": 143 }, { "epoch": 0.030826041583045676, "grad_norm": 0.43088130022426235, "learning_rate": 3.9956583943616885e-05, "loss": 0.8725, "step": 144 }, { "epoch": 0.031040111316261273, "grad_norm": 0.4990034504213856, "learning_rate": 3.9955684485320094e-05, "loss": 0.8879, "step": 145 }, { "epoch": 0.031254181049476866, "grad_norm": 0.509088423722999, "learning_rate": 3.9954775815515885e-05, "loss": 0.8806, "step": 146 }, { "epoch": 0.03146825078269246, "grad_norm": 0.4878196898279725, "learning_rate": 3.995385793462369e-05, "loss": 0.8159, "step": 147 }, { "epoch": 0.03168232051590806, "grad_norm": 0.5378457617051259, "learning_rate": 3.995293084306719e-05, "loss": 0.854, "step": 148 }, { "epoch": 0.03189639024912365, "grad_norm": 0.4847293019964765, "learning_rate": 3.9951994541274345e-05, "loss": 0.8999, "step": 149 }, { "epoch": 0.032110459982339246, "grad_norm": 0.45735207708928377, "learning_rate": 3.9951049029677336e-05, "loss": 0.8507, "step": 150 }, { "epoch": 0.03232452971555484, "grad_norm": 0.4015977411530065, "learning_rate": 3.995009430871262e-05, "loss": 0.8433, "step": 151 }, { "epoch": 0.03253859944877044, "grad_norm": 0.4659213769968778, "learning_rate": 3.994913037882089e-05, "loss": 0.8377, "step": 152 }, { "epoch": 0.03275266918198603, "grad_norm": 0.496543964665041, "learning_rate": 3.99481572404471e-05, "loss": 0.8754, "step": 153 }, { "epoch": 0.032966738915201625, "grad_norm": 0.8011256219814623, "learning_rate": 3.994717489404044e-05, "loss": 0.8792, "step": 154 }, { "epoch": 0.03318080864841722, "grad_norm": 0.3857661806297209, "learning_rate": 3.994618334005437e-05, "loss": 0.8511, "step": 155 }, { "epoch": 0.03339487838163282, "grad_norm": 0.4892529277103356, "learning_rate": 3.994518257894658e-05, "loss": 0.856, "step": 156 }, { "epoch": 0.033608948114848415, "grad_norm": 0.5032040745248906, "learning_rate": 3.994417261117902e-05, "loss": 0.8869, "step": 157 }, { "epoch": 0.033823017848064005, "grad_norm": 0.4514552548655316, "learning_rate": 3.9943153437217894e-05, "loss": 0.867, "step": 158 }, { "epoch": 0.0340370875812796, "grad_norm": 0.4536746086133719, "learning_rate": 3.994212505753365e-05, "loss": 0.8517, "step": 159 }, { "epoch": 0.0342511573144952, "grad_norm": 0.4066334374512129, "learning_rate": 3.994108747260098e-05, "loss": 0.864, "step": 160 }, { "epoch": 0.034465227047710795, "grad_norm": 0.4055298856419892, "learning_rate": 3.994004068289884e-05, "loss": 0.8737, "step": 161 }, { "epoch": 0.034679296780926384, "grad_norm": 2.104779557084848, "learning_rate": 3.9938984688910424e-05, "loss": 0.8407, "step": 162 }, { "epoch": 0.03489336651414198, "grad_norm": 0.6346506015412093, "learning_rate": 3.9937919491123175e-05, "loss": 0.828, "step": 163 }, { "epoch": 0.03510743624735758, "grad_norm": 0.5957786882455672, "learning_rate": 3.9936845090028784e-05, "loss": 0.8925, "step": 164 }, { "epoch": 0.035321505980573174, "grad_norm": 0.5329421128953273, "learning_rate": 3.9935761486123204e-05, "loss": 0.8558, "step": 165 }, { "epoch": 0.035535575713788764, "grad_norm": 0.5299129923584438, "learning_rate": 3.9934668679906606e-05, "loss": 0.8133, "step": 166 }, { "epoch": 0.03574964544700436, "grad_norm": 0.5218280672821705, "learning_rate": 3.9933566671883434e-05, "loss": 0.8681, "step": 167 }, { "epoch": 0.03596371518021996, "grad_norm": 0.550560457642017, "learning_rate": 3.993245546256239e-05, "loss": 0.8476, "step": 168 }, { "epoch": 0.036177784913435554, "grad_norm": 0.4621680061241431, "learning_rate": 3.993133505245638e-05, "loss": 0.8354, "step": 169 }, { "epoch": 0.036391854646651144, "grad_norm": 0.6373155217327401, "learning_rate": 3.9930205442082595e-05, "loss": 0.8599, "step": 170 }, { "epoch": 0.03660592437986674, "grad_norm": 0.5203859749008951, "learning_rate": 3.992906663196247e-05, "loss": 0.8332, "step": 171 }, { "epoch": 0.03681999411308234, "grad_norm": 0.4363094179995528, "learning_rate": 3.992791862262166e-05, "loss": 0.86, "step": 172 }, { "epoch": 0.037034063846297934, "grad_norm": 0.42070978411572774, "learning_rate": 3.992676141459011e-05, "loss": 0.8536, "step": 173 }, { "epoch": 0.03724813357951352, "grad_norm": 0.45121027670694946, "learning_rate": 3.992559500840195e-05, "loss": 0.8453, "step": 174 }, { "epoch": 0.03746220331272912, "grad_norm": 0.4341733950661765, "learning_rate": 3.992441940459561e-05, "loss": 0.8563, "step": 175 }, { "epoch": 0.037676273045944716, "grad_norm": 0.47227901314281506, "learning_rate": 3.992323460371376e-05, "loss": 0.8721, "step": 176 }, { "epoch": 0.03789034277916031, "grad_norm": 0.48029119556045824, "learning_rate": 3.992204060630328e-05, "loss": 0.8403, "step": 177 }, { "epoch": 0.03810441251237591, "grad_norm": 0.4303295879743548, "learning_rate": 3.992083741291533e-05, "loss": 0.8446, "step": 178 }, { "epoch": 0.0383184822455915, "grad_norm": 0.3874629842751901, "learning_rate": 3.991962502410529e-05, "loss": 0.904, "step": 179 }, { "epoch": 0.038532551978807096, "grad_norm": 0.3858328343883844, "learning_rate": 3.99184034404328e-05, "loss": 0.832, "step": 180 }, { "epoch": 0.03874662171202269, "grad_norm": 0.4411864662637025, "learning_rate": 3.991717266246175e-05, "loss": 0.841, "step": 181 }, { "epoch": 0.03896069144523829, "grad_norm": 0.4291021290425202, "learning_rate": 3.991593269076026e-05, "loss": 0.8698, "step": 182 }, { "epoch": 0.03917476117845388, "grad_norm": 0.3683048851246173, "learning_rate": 3.991468352590069e-05, "loss": 0.8542, "step": 183 }, { "epoch": 0.039388830911669476, "grad_norm": 0.3398453757458759, "learning_rate": 3.9913425168459666e-05, "loss": 0.8906, "step": 184 }, { "epoch": 0.03960290064488507, "grad_norm": 0.38735565990380716, "learning_rate": 3.991215761901804e-05, "loss": 0.8205, "step": 185 }, { "epoch": 0.03981697037810067, "grad_norm": 0.38136761019907073, "learning_rate": 3.99108808781609e-05, "loss": 0.8542, "step": 186 }, { "epoch": 0.04003104011131626, "grad_norm": 0.43905108617330535, "learning_rate": 3.99095949464776e-05, "loss": 0.8698, "step": 187 }, { "epoch": 0.040245109844531855, "grad_norm": 0.4061491303471741, "learning_rate": 3.990829982456172e-05, "loss": 0.8415, "step": 188 }, { "epoch": 0.04045917957774745, "grad_norm": 0.41124646823204997, "learning_rate": 3.9906995513011084e-05, "loss": 0.8895, "step": 189 }, { "epoch": 0.04067324931096305, "grad_norm": 0.3334094320198046, "learning_rate": 3.990568201242775e-05, "loss": 0.8292, "step": 190 }, { "epoch": 0.04088731904417864, "grad_norm": 0.3844590336608152, "learning_rate": 3.9904359323418055e-05, "loss": 0.8981, "step": 191 }, { "epoch": 0.041101388777394235, "grad_norm": 0.4031068929590705, "learning_rate": 3.990302744659252e-05, "loss": 0.8412, "step": 192 }, { "epoch": 0.04131545851060983, "grad_norm": 0.36692441593346126, "learning_rate": 3.9901686382565954e-05, "loss": 0.8415, "step": 193 }, { "epoch": 0.04152952824382543, "grad_norm": 0.2831303025455792, "learning_rate": 3.9900336131957386e-05, "loss": 0.8312, "step": 194 }, { "epoch": 0.04174359797704102, "grad_norm": 0.366415168679558, "learning_rate": 3.989897669539009e-05, "loss": 0.8522, "step": 195 }, { "epoch": 0.041957667710256615, "grad_norm": 0.3943598843733109, "learning_rate": 3.989760807349157e-05, "loss": 0.853, "step": 196 }, { "epoch": 0.04217173744347221, "grad_norm": 0.3474071769471279, "learning_rate": 3.989623026689359e-05, "loss": 0.8656, "step": 197 }, { "epoch": 0.04238580717668781, "grad_norm": 0.31608169403589165, "learning_rate": 3.989484327623215e-05, "loss": 0.8117, "step": 198 }, { "epoch": 0.042599876909903404, "grad_norm": 0.37963562176126103, "learning_rate": 3.9893447102147466e-05, "loss": 0.8231, "step": 199 }, { "epoch": 0.042813946643118994, "grad_norm": 0.40816728349425735, "learning_rate": 3.989204174528402e-05, "loss": 0.8681, "step": 200 }, { "epoch": 0.04302801637633459, "grad_norm": 0.4747247682443595, "learning_rate": 3.9890627206290505e-05, "loss": 0.836, "step": 201 }, { "epoch": 0.04324208610955019, "grad_norm": 0.49508739667588336, "learning_rate": 3.988920348581989e-05, "loss": 0.8707, "step": 202 }, { "epoch": 0.043456155842765784, "grad_norm": 0.49791535308207097, "learning_rate": 3.988777058452936e-05, "loss": 0.8198, "step": 203 }, { "epoch": 0.043670225575981374, "grad_norm": 0.34082252492079196, "learning_rate": 3.988632850308033e-05, "loss": 0.8037, "step": 204 }, { "epoch": 0.04388429530919697, "grad_norm": 0.3294186089388978, "learning_rate": 3.988487724213847e-05, "loss": 0.8362, "step": 205 }, { "epoch": 0.04409836504241257, "grad_norm": 0.35026743920813685, "learning_rate": 3.988341680237367e-05, "loss": 0.8548, "step": 206 }, { "epoch": 0.044312434775628164, "grad_norm": 0.3687532517516464, "learning_rate": 3.9881947184460076e-05, "loss": 0.8676, "step": 207 }, { "epoch": 0.04452650450884375, "grad_norm": 0.32475090803242124, "learning_rate": 3.988046838907606e-05, "loss": 0.8353, "step": 208 }, { "epoch": 0.04474057424205935, "grad_norm": 0.29199110503381154, "learning_rate": 3.9878980416904224e-05, "loss": 0.8643, "step": 209 }, { "epoch": 0.04495464397527495, "grad_norm": 0.4389870509714204, "learning_rate": 3.987748326863141e-05, "loss": 0.826, "step": 210 }, { "epoch": 0.04516871370849054, "grad_norm": 0.36760355389096555, "learning_rate": 3.987597694494872e-05, "loss": 0.8298, "step": 211 }, { "epoch": 0.04538278344170613, "grad_norm": 0.33074126382300445, "learning_rate": 3.9874461446551446e-05, "loss": 0.8178, "step": 212 }, { "epoch": 0.04559685317492173, "grad_norm": 0.3643273559583193, "learning_rate": 3.9872936774139156e-05, "loss": 0.8111, "step": 213 }, { "epoch": 0.045810922908137326, "grad_norm": 0.41777917894694583, "learning_rate": 3.987140292841563e-05, "loss": 0.8217, "step": 214 }, { "epoch": 0.04602499264135292, "grad_norm": 0.36907755400467723, "learning_rate": 3.986985991008888e-05, "loss": 0.821, "step": 215 }, { "epoch": 0.04623906237456851, "grad_norm": 0.4232893993657819, "learning_rate": 3.986830771987118e-05, "loss": 0.8158, "step": 216 }, { "epoch": 0.04645313210778411, "grad_norm": 0.4239041644027544, "learning_rate": 3.9866746358479e-05, "loss": 0.8421, "step": 217 }, { "epoch": 0.046667201840999706, "grad_norm": 0.40093565968948486, "learning_rate": 3.986517582663307e-05, "loss": 0.8159, "step": 218 }, { "epoch": 0.0468812715742153, "grad_norm": 0.4106319864596722, "learning_rate": 3.986359612505835e-05, "loss": 0.8368, "step": 219 }, { "epoch": 0.0470953413074309, "grad_norm": 0.4223283176069799, "learning_rate": 3.9862007254484006e-05, "loss": 0.8199, "step": 220 }, { "epoch": 0.04730941104064649, "grad_norm": 0.37936290326812794, "learning_rate": 3.986040921564349e-05, "loss": 0.838, "step": 221 }, { "epoch": 0.047523480773862085, "grad_norm": 0.35203034317166726, "learning_rate": 3.985880200927442e-05, "loss": 0.8538, "step": 222 }, { "epoch": 0.04773755050707768, "grad_norm": 0.3858112167023, "learning_rate": 3.98571856361187e-05, "loss": 0.8241, "step": 223 }, { "epoch": 0.04795162024029328, "grad_norm": 0.42734155787690564, "learning_rate": 3.9855560096922445e-05, "loss": 0.8149, "step": 224 }, { "epoch": 0.04816568997350887, "grad_norm": 0.4914044224155271, "learning_rate": 3.985392539243599e-05, "loss": 0.8224, "step": 225 }, { "epoch": 0.048379759706724465, "grad_norm": 0.5686722761141866, "learning_rate": 3.9852281523413926e-05, "loss": 0.8315, "step": 226 }, { "epoch": 0.04859382943994006, "grad_norm": 0.4801795715672078, "learning_rate": 3.9850628490615047e-05, "loss": 0.8342, "step": 227 }, { "epoch": 0.04880789917315566, "grad_norm": 0.39210217708523265, "learning_rate": 3.9848966294802395e-05, "loss": 0.8082, "step": 228 }, { "epoch": 0.04902196890637125, "grad_norm": 0.3469412230066025, "learning_rate": 3.9847294936743234e-05, "loss": 0.7959, "step": 229 }, { "epoch": 0.049236038639586845, "grad_norm": 0.3796726457058207, "learning_rate": 3.984561441720907e-05, "loss": 0.8481, "step": 230 }, { "epoch": 0.04945010837280244, "grad_norm": 0.49537905000215987, "learning_rate": 3.984392473697561e-05, "loss": 0.8591, "step": 231 }, { "epoch": 0.04966417810601804, "grad_norm": 0.4251626976992016, "learning_rate": 3.984222589682282e-05, "loss": 0.8062, "step": 232 }, { "epoch": 0.04987824783923363, "grad_norm": 0.4025171048830565, "learning_rate": 3.984051789753488e-05, "loss": 0.8282, "step": 233 }, { "epoch": 0.050092317572449224, "grad_norm": 0.36901048716118134, "learning_rate": 3.98388007399002e-05, "loss": 0.824, "step": 234 }, { "epoch": 0.05030638730566482, "grad_norm": 0.3534585695479168, "learning_rate": 3.983707442471141e-05, "loss": 0.9405, "step": 235 }, { "epoch": 0.05052045703888042, "grad_norm": 0.4027690525572557, "learning_rate": 3.983533895276538e-05, "loss": 0.8181, "step": 236 }, { "epoch": 0.05073452677209601, "grad_norm": 0.4408889835415928, "learning_rate": 3.98335943248632e-05, "loss": 0.8307, "step": 237 }, { "epoch": 0.050948596505311604, "grad_norm": 0.35131499744012107, "learning_rate": 3.983184054181019e-05, "loss": 0.8083, "step": 238 }, { "epoch": 0.0511626662385272, "grad_norm": 0.32597257746183034, "learning_rate": 3.983007760441589e-05, "loss": 0.8272, "step": 239 }, { "epoch": 0.0513767359717428, "grad_norm": 0.39655163014973094, "learning_rate": 3.9828305513494066e-05, "loss": 0.8326, "step": 240 }, { "epoch": 0.05159080570495839, "grad_norm": 0.4535156910719396, "learning_rate": 3.982652426986271e-05, "loss": 0.8536, "step": 241 }, { "epoch": 0.05180487543817398, "grad_norm": 0.3907601010611339, "learning_rate": 3.982473387434404e-05, "loss": 0.8414, "step": 242 }, { "epoch": 0.05201894517138958, "grad_norm": 0.39771764683531524, "learning_rate": 3.9822934327764516e-05, "loss": 0.8218, "step": 243 }, { "epoch": 0.05223301490460518, "grad_norm": 0.37328579168951637, "learning_rate": 3.98211256309548e-05, "loss": 0.8291, "step": 244 }, { "epoch": 0.05244708463782077, "grad_norm": 0.3633500648158138, "learning_rate": 3.981930778474976e-05, "loss": 0.8349, "step": 245 }, { "epoch": 0.05266115437103636, "grad_norm": 0.3702781367598446, "learning_rate": 3.981748078998854e-05, "loss": 0.8151, "step": 246 }, { "epoch": 0.05287522410425196, "grad_norm": 0.3602753970348582, "learning_rate": 3.981564464751445e-05, "loss": 0.8287, "step": 247 }, { "epoch": 0.053089293837467556, "grad_norm": 0.4071160336422372, "learning_rate": 3.981379935817508e-05, "loss": 0.82, "step": 248 }, { "epoch": 0.05330336357068315, "grad_norm": 0.35917590351006656, "learning_rate": 3.981194492282219e-05, "loss": 0.831, "step": 249 }, { "epoch": 0.05351743330389874, "grad_norm": 0.32204623832014, "learning_rate": 3.9810081342311786e-05, "loss": 0.8394, "step": 250 }, { "epoch": 0.05373150303711434, "grad_norm": 0.3483119661055623, "learning_rate": 3.9808208617504106e-05, "loss": 0.8674, "step": 251 }, { "epoch": 0.053945572770329936, "grad_norm": 0.4360437668310791, "learning_rate": 3.980632674926358e-05, "loss": 0.8223, "step": 252 }, { "epoch": 0.05415964250354553, "grad_norm": 0.4641704582383462, "learning_rate": 3.980443573845889e-05, "loss": 0.8015, "step": 253 }, { "epoch": 0.05437371223676112, "grad_norm": 0.4696957121239442, "learning_rate": 3.980253558596292e-05, "loss": 0.8346, "step": 254 }, { "epoch": 0.05458778196997672, "grad_norm": 0.3737882880960115, "learning_rate": 3.980062629265277e-05, "loss": 0.8209, "step": 255 }, { "epoch": 0.054801851703192316, "grad_norm": 0.2975144847988321, "learning_rate": 3.9798707859409774e-05, "loss": 0.8238, "step": 256 }, { "epoch": 0.05501592143640791, "grad_norm": 0.3667298421919832, "learning_rate": 3.9796780287119466e-05, "loss": 0.8354, "step": 257 }, { "epoch": 0.0552299911696235, "grad_norm": 0.40522416701915287, "learning_rate": 3.9794843576671616e-05, "loss": 0.8178, "step": 258 }, { "epoch": 0.0554440609028391, "grad_norm": 1.309590427072815, "learning_rate": 3.979289772896021e-05, "loss": 0.8378, "step": 259 }, { "epoch": 0.055658130636054695, "grad_norm": 0.42641461789020774, "learning_rate": 3.9790942744883444e-05, "loss": 0.811, "step": 260 }, { "epoch": 0.05587220036927029, "grad_norm": 0.563183288710883, "learning_rate": 3.978897862534374e-05, "loss": 0.8427, "step": 261 }, { "epoch": 0.05608627010248588, "grad_norm": 0.6529241510079115, "learning_rate": 3.978700537124772e-05, "loss": 0.8414, "step": 262 }, { "epoch": 0.05630033983570148, "grad_norm": 0.5458687208648185, "learning_rate": 3.978502298350625e-05, "loss": 0.8278, "step": 263 }, { "epoch": 0.056514409568917075, "grad_norm": 0.4534677439535358, "learning_rate": 3.978303146303438e-05, "loss": 0.8515, "step": 264 }, { "epoch": 0.05672847930213267, "grad_norm": 0.7640035334309163, "learning_rate": 3.978103081075141e-05, "loss": 0.7841, "step": 265 }, { "epoch": 0.05694254903534827, "grad_norm": 0.7697304707149756, "learning_rate": 3.9779021027580827e-05, "loss": 0.8562, "step": 266 }, { "epoch": 0.05715661876856386, "grad_norm": 0.5582322749059998, "learning_rate": 3.977700211445034e-05, "loss": 0.8212, "step": 267 }, { "epoch": 0.057370688501779454, "grad_norm": 0.5278221215123791, "learning_rate": 3.9774974072291884e-05, "loss": 0.8213, "step": 268 }, { "epoch": 0.05758475823499505, "grad_norm": 0.41272355095007723, "learning_rate": 3.977293690204159e-05, "loss": 0.7884, "step": 269 }, { "epoch": 0.05779882796821065, "grad_norm": 0.38048044038534395, "learning_rate": 3.977089060463982e-05, "loss": 0.8024, "step": 270 }, { "epoch": 0.05801289770142624, "grad_norm": 0.4995866625895206, "learning_rate": 3.976883518103115e-05, "loss": 0.7964, "step": 271 }, { "epoch": 0.058226967434641834, "grad_norm": 0.4089655275491415, "learning_rate": 3.9766770632164336e-05, "loss": 0.781, "step": 272 }, { "epoch": 0.05844103716785743, "grad_norm": 0.373373102001395, "learning_rate": 3.976469695899238e-05, "loss": 0.7916, "step": 273 }, { "epoch": 0.05865510690107303, "grad_norm": 0.3804694016639783, "learning_rate": 3.9762614162472496e-05, "loss": 0.7615, "step": 274 }, { "epoch": 0.05886917663428862, "grad_norm": 0.37938721740023695, "learning_rate": 3.976052224356609e-05, "loss": 0.8109, "step": 275 }, { "epoch": 0.059083246367504214, "grad_norm": 0.40528840545316336, "learning_rate": 3.975842120323879e-05, "loss": 0.8283, "step": 276 }, { "epoch": 0.05929731610071981, "grad_norm": 0.3859803919733042, "learning_rate": 3.9756311042460434e-05, "loss": 0.8038, "step": 277 }, { "epoch": 0.05951138583393541, "grad_norm": 0.3330269112958392, "learning_rate": 3.975419176220506e-05, "loss": 0.8686, "step": 278 }, { "epoch": 0.059725455567150997, "grad_norm": 0.30938332914596234, "learning_rate": 3.9752063363450935e-05, "loss": 0.8186, "step": 279 }, { "epoch": 0.05993952530036659, "grad_norm": 0.6301295625426127, "learning_rate": 3.974992584718051e-05, "loss": 0.8481, "step": 280 }, { "epoch": 0.06015359503358219, "grad_norm": 0.4359404599621847, "learning_rate": 3.974777921438048e-05, "loss": 0.8328, "step": 281 }, { "epoch": 0.060367664766797786, "grad_norm": 0.474513946124991, "learning_rate": 3.974562346604171e-05, "loss": 0.8206, "step": 282 }, { "epoch": 0.060581734500013376, "grad_norm": 0.5311517647733177, "learning_rate": 3.9743458603159295e-05, "loss": 0.8154, "step": 283 }, { "epoch": 0.06079580423322897, "grad_norm": 0.4714557871254846, "learning_rate": 3.974128462673253e-05, "loss": 0.8523, "step": 284 }, { "epoch": 0.06100987396644457, "grad_norm": 0.3535252356315764, "learning_rate": 3.973910153776492e-05, "loss": 0.84, "step": 285 }, { "epoch": 0.061223943699660166, "grad_norm": 0.39662502883369144, "learning_rate": 3.9736909337264166e-05, "loss": 0.8414, "step": 286 }, { "epoch": 0.06143801343287576, "grad_norm": 0.42934998708894967, "learning_rate": 3.97347080262422e-05, "loss": 0.8042, "step": 287 }, { "epoch": 0.06165208316609135, "grad_norm": 0.4601344074880732, "learning_rate": 3.9732497605715136e-05, "loss": 0.8316, "step": 288 }, { "epoch": 0.06186615289930695, "grad_norm": 0.4355363882508308, "learning_rate": 3.9730278076703293e-05, "loss": 0.8386, "step": 289 }, { "epoch": 0.062080222632522546, "grad_norm": 0.40532871292062256, "learning_rate": 3.9728049440231216e-05, "loss": 0.815, "step": 290 }, { "epoch": 0.06229429236573814, "grad_norm": 0.3613108221158915, "learning_rate": 3.972581169732762e-05, "loss": 0.7949, "step": 291 }, { "epoch": 0.06250836209895373, "grad_norm": 0.4526241087199315, "learning_rate": 3.972356484902546e-05, "loss": 0.8251, "step": 292 }, { "epoch": 0.06272243183216933, "grad_norm": 0.4780714113750351, "learning_rate": 3.972130889636187e-05, "loss": 0.8441, "step": 293 }, { "epoch": 0.06293650156538493, "grad_norm": 0.4004084776021443, "learning_rate": 3.97190438403782e-05, "loss": 0.8188, "step": 294 }, { "epoch": 0.06315057129860052, "grad_norm": 0.3604097941785592, "learning_rate": 3.971676968211998e-05, "loss": 0.8404, "step": 295 }, { "epoch": 0.06336464103181612, "grad_norm": 0.3982334974950628, "learning_rate": 3.971448642263697e-05, "loss": 0.8249, "step": 296 }, { "epoch": 0.06357871076503172, "grad_norm": 0.47301721418460696, "learning_rate": 3.971219406298312e-05, "loss": 0.8195, "step": 297 }, { "epoch": 0.0637927804982473, "grad_norm": 0.3805707811072661, "learning_rate": 3.9709892604216576e-05, "loss": 0.8268, "step": 298 }, { "epoch": 0.0640068502314629, "grad_norm": 0.4056773003140015, "learning_rate": 3.970758204739968e-05, "loss": 0.8127, "step": 299 }, { "epoch": 0.06422091996467849, "grad_norm": 0.4032926251226971, "learning_rate": 3.9705262393598996e-05, "loss": 0.8351, "step": 300 }, { "epoch": 0.06443498969789409, "grad_norm": 0.40841832469995953, "learning_rate": 3.970293364388526e-05, "loss": 0.7682, "step": 301 }, { "epoch": 0.06464905943110968, "grad_norm": 0.4415611740074898, "learning_rate": 3.970059579933342e-05, "loss": 0.801, "step": 302 }, { "epoch": 0.06486312916432528, "grad_norm": 0.3722992901576134, "learning_rate": 3.969824886102262e-05, "loss": 0.8077, "step": 303 }, { "epoch": 0.06507719889754088, "grad_norm": 0.3257654805382487, "learning_rate": 3.969589283003621e-05, "loss": 0.8045, "step": 304 }, { "epoch": 0.06529126863075647, "grad_norm": 0.34518855917033997, "learning_rate": 3.969352770746173e-05, "loss": 0.8056, "step": 305 }, { "epoch": 0.06550533836397206, "grad_norm": 0.3651226904229758, "learning_rate": 3.96911534943909e-05, "loss": 0.8487, "step": 306 }, { "epoch": 0.06571940809718765, "grad_norm": 0.39292873911020426, "learning_rate": 3.9688770191919665e-05, "loss": 0.8159, "step": 307 }, { "epoch": 0.06593347783040325, "grad_norm": 0.33420897271201755, "learning_rate": 3.968637780114815e-05, "loss": 0.8183, "step": 308 }, { "epoch": 0.06614754756361885, "grad_norm": 0.33910887310227855, "learning_rate": 3.968397632318068e-05, "loss": 0.8023, "step": 309 }, { "epoch": 0.06636161729683444, "grad_norm": 0.4242209288953011, "learning_rate": 3.9681565759125775e-05, "loss": 0.817, "step": 310 }, { "epoch": 0.06657568703005004, "grad_norm": 0.4205864487007387, "learning_rate": 3.967914611009614e-05, "loss": 0.8008, "step": 311 }, { "epoch": 0.06678975676326564, "grad_norm": 0.38211559167478176, "learning_rate": 3.967671737720869e-05, "loss": 0.8234, "step": 312 }, { "epoch": 0.06700382649648123, "grad_norm": 0.30498176756349277, "learning_rate": 3.9674279561584514e-05, "loss": 0.8099, "step": 313 }, { "epoch": 0.06721789622969683, "grad_norm": 0.3177472315752944, "learning_rate": 3.967183266434891e-05, "loss": 0.8241, "step": 314 }, { "epoch": 0.06743196596291241, "grad_norm": 0.3991469669718329, "learning_rate": 3.966937668663136e-05, "loss": 0.8269, "step": 315 }, { "epoch": 0.06764603569612801, "grad_norm": 0.3836837245424652, "learning_rate": 3.9666911629565534e-05, "loss": 0.8051, "step": 316 }, { "epoch": 0.0678601054293436, "grad_norm": 0.33706670379619297, "learning_rate": 3.966443749428931e-05, "loss": 0.8179, "step": 317 }, { "epoch": 0.0680741751625592, "grad_norm": 0.3381706480607538, "learning_rate": 3.966195428194472e-05, "loss": 0.8051, "step": 318 }, { "epoch": 0.0682882448957748, "grad_norm": 0.33121589164954485, "learning_rate": 3.965946199367804e-05, "loss": 0.8183, "step": 319 }, { "epoch": 0.0685023146289904, "grad_norm": 0.32660295615566726, "learning_rate": 3.9656960630639686e-05, "loss": 0.8168, "step": 320 }, { "epoch": 0.06871638436220599, "grad_norm": 0.331583354100291, "learning_rate": 3.965445019398429e-05, "loss": 0.8055, "step": 321 }, { "epoch": 0.06893045409542159, "grad_norm": 0.3266278746059167, "learning_rate": 3.9651930684870666e-05, "loss": 0.8269, "step": 322 }, { "epoch": 0.06914452382863717, "grad_norm": 0.47908107351770507, "learning_rate": 3.96494021044618e-05, "loss": 0.8003, "step": 323 }, { "epoch": 0.06935859356185277, "grad_norm": 0.3530060012652866, "learning_rate": 3.9646864453924905e-05, "loss": 0.8131, "step": 324 }, { "epoch": 0.06957266329506837, "grad_norm": 0.3295787877177966, "learning_rate": 3.9644317734431344e-05, "loss": 0.8097, "step": 325 }, { "epoch": 0.06978673302828396, "grad_norm": 0.3517571963617062, "learning_rate": 3.964176194715667e-05, "loss": 0.8061, "step": 326 }, { "epoch": 0.07000080276149956, "grad_norm": 0.3362094473757138, "learning_rate": 3.963919709328064e-05, "loss": 0.7805, "step": 327 }, { "epoch": 0.07021487249471516, "grad_norm": 0.32807044312133443, "learning_rate": 3.9636623173987176e-05, "loss": 0.8123, "step": 328 }, { "epoch": 0.07042894222793075, "grad_norm": 0.3444530460484444, "learning_rate": 3.963404019046441e-05, "loss": 0.8152, "step": 329 }, { "epoch": 0.07064301196114635, "grad_norm": 0.3442453072545327, "learning_rate": 3.963144814390463e-05, "loss": 0.8282, "step": 330 }, { "epoch": 0.07085708169436195, "grad_norm": 0.3128444035140549, "learning_rate": 3.9628847035504326e-05, "loss": 0.8065, "step": 331 }, { "epoch": 0.07107115142757753, "grad_norm": 0.31629960274605623, "learning_rate": 3.962623686646416e-05, "loss": 0.8214, "step": 332 }, { "epoch": 0.07128522116079312, "grad_norm": 0.3443646437494624, "learning_rate": 3.962361763798899e-05, "loss": 0.8394, "step": 333 }, { "epoch": 0.07149929089400872, "grad_norm": 0.4808659192640893, "learning_rate": 3.962098935128783e-05, "loss": 0.8375, "step": 334 }, { "epoch": 0.07171336062722432, "grad_norm": 0.32973106515477585, "learning_rate": 3.9618352007573906e-05, "loss": 0.7917, "step": 335 }, { "epoch": 0.07192743036043991, "grad_norm": 0.34663939104067665, "learning_rate": 3.961570560806461e-05, "loss": 0.7989, "step": 336 }, { "epoch": 0.07214150009365551, "grad_norm": 0.3151206600229396, "learning_rate": 3.9613050153981515e-05, "loss": 0.8217, "step": 337 }, { "epoch": 0.07235556982687111, "grad_norm": 0.3284998110054707, "learning_rate": 3.9610385646550374e-05, "loss": 0.8002, "step": 338 }, { "epoch": 0.0725696395600867, "grad_norm": 0.3316066108268066, "learning_rate": 3.960771208700111e-05, "loss": 0.8179, "step": 339 }, { "epoch": 0.07278370929330229, "grad_norm": 0.3107803293102406, "learning_rate": 3.9605029476567845e-05, "loss": 0.7983, "step": 340 }, { "epoch": 0.07299777902651788, "grad_norm": 0.3167494965705026, "learning_rate": 3.960233781648886e-05, "loss": 0.8023, "step": 341 }, { "epoch": 0.07321184875973348, "grad_norm": 0.28869946757029363, "learning_rate": 3.959963710800662e-05, "loss": 0.8063, "step": 342 }, { "epoch": 0.07342591849294908, "grad_norm": 0.3356343455698333, "learning_rate": 3.9596927352367774e-05, "loss": 0.8586, "step": 343 }, { "epoch": 0.07363998822616467, "grad_norm": 0.2698423586183263, "learning_rate": 3.959420855082314e-05, "loss": 0.7954, "step": 344 }, { "epoch": 0.07385405795938027, "grad_norm": 0.2797285230172307, "learning_rate": 3.9591480704627695e-05, "loss": 0.8249, "step": 345 }, { "epoch": 0.07406812769259587, "grad_norm": 0.2872525953074487, "learning_rate": 3.958874381504063e-05, "loss": 0.8093, "step": 346 }, { "epoch": 0.07428219742581146, "grad_norm": 0.2794424585889053, "learning_rate": 3.9585997883325275e-05, "loss": 0.8004, "step": 347 }, { "epoch": 0.07449626715902705, "grad_norm": 0.28923703879391893, "learning_rate": 3.958324291074915e-05, "loss": 0.8063, "step": 348 }, { "epoch": 0.07471033689224264, "grad_norm": 0.28718264474262556, "learning_rate": 3.9580478898583946e-05, "loss": 0.8338, "step": 349 }, { "epoch": 0.07492440662545824, "grad_norm": 0.3098163432395999, "learning_rate": 3.9577705848105534e-05, "loss": 0.809, "step": 350 }, { "epoch": 0.07513847635867384, "grad_norm": 0.31522990371085624, "learning_rate": 3.957492376059393e-05, "loss": 0.8154, "step": 351 }, { "epoch": 0.07535254609188943, "grad_norm": 0.28338087888379176, "learning_rate": 3.9572132637333354e-05, "loss": 0.818, "step": 352 }, { "epoch": 0.07556661582510503, "grad_norm": 0.27413238491020875, "learning_rate": 3.956933247961218e-05, "loss": 0.789, "step": 353 }, { "epoch": 0.07578068555832063, "grad_norm": 0.29472918038836227, "learning_rate": 3.956652328872296e-05, "loss": 0.8045, "step": 354 }, { "epoch": 0.07599475529153622, "grad_norm": 0.32822208650805884, "learning_rate": 3.956370506596241e-05, "loss": 0.7943, "step": 355 }, { "epoch": 0.07620882502475182, "grad_norm": 0.3654921111096867, "learning_rate": 3.956087781263141e-05, "loss": 0.8134, "step": 356 }, { "epoch": 0.0764228947579674, "grad_norm": 0.3305771615214981, "learning_rate": 3.955804153003502e-05, "loss": 0.7889, "step": 357 }, { "epoch": 0.076636964491183, "grad_norm": 0.3186337070746346, "learning_rate": 3.9555196219482465e-05, "loss": 0.7702, "step": 358 }, { "epoch": 0.0768510342243986, "grad_norm": 0.315898247232711, "learning_rate": 3.9552341882287126e-05, "loss": 0.7864, "step": 359 }, { "epoch": 0.07706510395761419, "grad_norm": 0.2836453847637815, "learning_rate": 3.9549478519766574e-05, "loss": 0.7744, "step": 360 }, { "epoch": 0.07727917369082979, "grad_norm": 0.30499627108597266, "learning_rate": 3.954660613324252e-05, "loss": 0.8501, "step": 361 }, { "epoch": 0.07749324342404539, "grad_norm": 0.3538275134350595, "learning_rate": 3.9543724724040854e-05, "loss": 0.8076, "step": 362 }, { "epoch": 0.07770731315726098, "grad_norm": 0.30000054335125104, "learning_rate": 3.9540834293491636e-05, "loss": 0.8131, "step": 363 }, { "epoch": 0.07792138289047658, "grad_norm": 0.2931599936516347, "learning_rate": 3.953793484292908e-05, "loss": 0.7891, "step": 364 }, { "epoch": 0.07813545262369216, "grad_norm": 0.29320389703691146, "learning_rate": 3.9535026373691554e-05, "loss": 0.8171, "step": 365 }, { "epoch": 0.07834952235690776, "grad_norm": 0.28709837757977674, "learning_rate": 3.953210888712162e-05, "loss": 0.8229, "step": 366 }, { "epoch": 0.07856359209012335, "grad_norm": 0.29183743422406655, "learning_rate": 3.952918238456599e-05, "loss": 0.785, "step": 367 }, { "epoch": 0.07877766182333895, "grad_norm": 0.3045123243298051, "learning_rate": 3.952624686737551e-05, "loss": 0.8198, "step": 368 }, { "epoch": 0.07899173155655455, "grad_norm": 0.30356376258153456, "learning_rate": 3.952330233690522e-05, "loss": 0.8174, "step": 369 }, { "epoch": 0.07920580128977014, "grad_norm": 0.3057393338323986, "learning_rate": 3.9520348794514316e-05, "loss": 0.8337, "step": 370 }, { "epoch": 0.07941987102298574, "grad_norm": 0.3136540585600542, "learning_rate": 3.951738624156614e-05, "loss": 0.772, "step": 371 }, { "epoch": 0.07963394075620134, "grad_norm": 0.308298984980087, "learning_rate": 3.95144146794282e-05, "loss": 0.8192, "step": 372 }, { "epoch": 0.07984801048941692, "grad_norm": 0.29718555367357496, "learning_rate": 3.9511434109472173e-05, "loss": 0.8334, "step": 373 }, { "epoch": 0.08006208022263252, "grad_norm": 0.2912289078920853, "learning_rate": 3.950844453307387e-05, "loss": 0.7954, "step": 374 }, { "epoch": 0.08027614995584811, "grad_norm": 0.3181402386904103, "learning_rate": 3.9505445951613286e-05, "loss": 0.7862, "step": 375 }, { "epoch": 0.08049021968906371, "grad_norm": 0.30604477014382697, "learning_rate": 3.950243836647456e-05, "loss": 0.8126, "step": 376 }, { "epoch": 0.08070428942227931, "grad_norm": 0.3360179805652376, "learning_rate": 3.949942177904598e-05, "loss": 0.7973, "step": 377 }, { "epoch": 0.0809183591554949, "grad_norm": 0.3686297699994102, "learning_rate": 3.9496396190720004e-05, "loss": 0.7621, "step": 378 }, { "epoch": 0.0811324288887105, "grad_norm": 0.4008325822424953, "learning_rate": 3.9493361602893234e-05, "loss": 0.7653, "step": 379 }, { "epoch": 0.0813464986219261, "grad_norm": 0.3386933604103968, "learning_rate": 3.9490318016966435e-05, "loss": 0.8287, "step": 380 }, { "epoch": 0.0815605683551417, "grad_norm": 0.3011982026024404, "learning_rate": 3.948726543434451e-05, "loss": 0.8307, "step": 381 }, { "epoch": 0.08177463808835728, "grad_norm": 0.28080627474451375, "learning_rate": 3.9484203856436536e-05, "loss": 0.8102, "step": 382 }, { "epoch": 0.08198870782157287, "grad_norm": 0.3080645597652293, "learning_rate": 3.9481133284655736e-05, "loss": 0.7848, "step": 383 }, { "epoch": 0.08220277755478847, "grad_norm": 0.3589399876731314, "learning_rate": 3.9478053720419474e-05, "loss": 0.7941, "step": 384 }, { "epoch": 0.08241684728800407, "grad_norm": 0.34417850338458056, "learning_rate": 3.947496516514926e-05, "loss": 0.8075, "step": 385 }, { "epoch": 0.08263091702121966, "grad_norm": 0.37642738265223674, "learning_rate": 3.947186762027078e-05, "loss": 0.7935, "step": 386 }, { "epoch": 0.08284498675443526, "grad_norm": 0.4211305728036991, "learning_rate": 3.9468761087213864e-05, "loss": 0.8258, "step": 387 }, { "epoch": 0.08305905648765086, "grad_norm": 0.3826173978469827, "learning_rate": 3.946564556741246e-05, "loss": 0.8389, "step": 388 }, { "epoch": 0.08327312622086645, "grad_norm": 0.35118327732229604, "learning_rate": 3.946252106230469e-05, "loss": 0.8192, "step": 389 }, { "epoch": 0.08348719595408204, "grad_norm": 0.35210458632394, "learning_rate": 3.9459387573332826e-05, "loss": 0.8237, "step": 390 }, { "epoch": 0.08370126568729763, "grad_norm": 0.3337749530435175, "learning_rate": 3.945624510194328e-05, "loss": 0.7743, "step": 391 }, { "epoch": 0.08391533542051323, "grad_norm": 0.33434494046784513, "learning_rate": 3.945309364958662e-05, "loss": 0.8695, "step": 392 }, { "epoch": 0.08412940515372883, "grad_norm": 0.3241230752049322, "learning_rate": 3.944993321771754e-05, "loss": 0.8008, "step": 393 }, { "epoch": 0.08434347488694442, "grad_norm": 0.30413418950235466, "learning_rate": 3.9446763807794887e-05, "loss": 0.7955, "step": 394 }, { "epoch": 0.08455754462016002, "grad_norm": 0.33284889602440043, "learning_rate": 3.944358542128166e-05, "loss": 0.7702, "step": 395 }, { "epoch": 0.08477161435337562, "grad_norm": 0.36554769164425394, "learning_rate": 3.944039805964499e-05, "loss": 0.8267, "step": 396 }, { "epoch": 0.08498568408659121, "grad_norm": 0.29715786433131725, "learning_rate": 3.943720172435617e-05, "loss": 0.7628, "step": 397 }, { "epoch": 0.08519975381980681, "grad_norm": 0.38192305128262505, "learning_rate": 3.943399641689061e-05, "loss": 0.8062, "step": 398 }, { "epoch": 0.08541382355302239, "grad_norm": 0.3844664213287207, "learning_rate": 3.943078213872788e-05, "loss": 0.7531, "step": 399 }, { "epoch": 0.08562789328623799, "grad_norm": 0.3358990433389961, "learning_rate": 3.942755889135169e-05, "loss": 0.8012, "step": 400 }, { "epoch": 0.08584196301945358, "grad_norm": 0.43718403319920773, "learning_rate": 3.9424326676249874e-05, "loss": 0.7862, "step": 401 }, { "epoch": 0.08605603275266918, "grad_norm": 0.437345292516579, "learning_rate": 3.942108549491442e-05, "loss": 0.766, "step": 402 }, { "epoch": 0.08627010248588478, "grad_norm": 0.30500226693756965, "learning_rate": 3.941783534884146e-05, "loss": 0.8088, "step": 403 }, { "epoch": 0.08648417221910037, "grad_norm": 0.43272229710292914, "learning_rate": 3.941457623953125e-05, "loss": 0.7802, "step": 404 }, { "epoch": 0.08669824195231597, "grad_norm": 0.48940603312831304, "learning_rate": 3.941130816848818e-05, "loss": 0.7547, "step": 405 }, { "epoch": 0.08691231168553157, "grad_norm": 0.35487060015388955, "learning_rate": 3.940803113722079e-05, "loss": 0.8284, "step": 406 }, { "epoch": 0.08712638141874715, "grad_norm": 0.4442303353198171, "learning_rate": 3.9404745147241765e-05, "loss": 0.8189, "step": 407 }, { "epoch": 0.08734045115196275, "grad_norm": 0.40338436474672607, "learning_rate": 3.94014502000679e-05, "loss": 0.7775, "step": 408 }, { "epoch": 0.08755452088517834, "grad_norm": 0.3570502308426536, "learning_rate": 3.939814629722014e-05, "loss": 0.7955, "step": 409 }, { "epoch": 0.08776859061839394, "grad_norm": 0.38292216978816046, "learning_rate": 3.939483344022355e-05, "loss": 0.7958, "step": 410 }, { "epoch": 0.08798266035160954, "grad_norm": 0.3070187493512314, "learning_rate": 3.9391511630607356e-05, "loss": 0.7875, "step": 411 }, { "epoch": 0.08819673008482513, "grad_norm": 0.3169319812921083, "learning_rate": 3.9388180869904885e-05, "loss": 0.7871, "step": 412 }, { "epoch": 0.08841079981804073, "grad_norm": 0.3646895828122296, "learning_rate": 3.9384841159653617e-05, "loss": 0.8015, "step": 413 }, { "epoch": 0.08862486955125633, "grad_norm": 0.39476208220879444, "learning_rate": 3.9381492501395157e-05, "loss": 0.7908, "step": 414 }, { "epoch": 0.08883893928447191, "grad_norm": 0.3223532335546537, "learning_rate": 3.937813489667524e-05, "loss": 0.7759, "step": 415 }, { "epoch": 0.0890530090176875, "grad_norm": 0.32244150295392837, "learning_rate": 3.9374768347043724e-05, "loss": 0.8035, "step": 416 }, { "epoch": 0.0892670787509031, "grad_norm": 0.27716414649444343, "learning_rate": 3.9371392854054605e-05, "loss": 0.8271, "step": 417 }, { "epoch": 0.0894811484841187, "grad_norm": 0.31640706444434497, "learning_rate": 3.936800841926601e-05, "loss": 0.8002, "step": 418 }, { "epoch": 0.0896952182173343, "grad_norm": 0.28070614377948816, "learning_rate": 3.936461504424018e-05, "loss": 0.7636, "step": 419 }, { "epoch": 0.0899092879505499, "grad_norm": 0.323197171382002, "learning_rate": 3.936121273054349e-05, "loss": 0.7975, "step": 420 }, { "epoch": 0.09012335768376549, "grad_norm": 0.3275896009129179, "learning_rate": 3.935780147974646e-05, "loss": 0.7978, "step": 421 }, { "epoch": 0.09033742741698109, "grad_norm": 0.31620123998333727, "learning_rate": 3.9354381293423684e-05, "loss": 0.8278, "step": 422 }, { "epoch": 0.09055149715019668, "grad_norm": 0.33135603565790595, "learning_rate": 3.935095217315394e-05, "loss": 0.8121, "step": 423 }, { "epoch": 0.09076556688341227, "grad_norm": 0.2709838730320346, "learning_rate": 3.9347514120520104e-05, "loss": 0.7872, "step": 424 }, { "epoch": 0.09097963661662786, "grad_norm": 0.3081730667884663, "learning_rate": 3.934406713710915e-05, "loss": 0.7798, "step": 425 }, { "epoch": 0.09119370634984346, "grad_norm": 0.2936912909537816, "learning_rate": 3.934061122451223e-05, "loss": 0.7912, "step": 426 }, { "epoch": 0.09140777608305906, "grad_norm": 0.29386606384320585, "learning_rate": 3.933714638432458e-05, "loss": 0.7724, "step": 427 }, { "epoch": 0.09162184581627465, "grad_norm": 0.35283616412668806, "learning_rate": 3.9333672618145545e-05, "loss": 0.8262, "step": 428 }, { "epoch": 0.09183591554949025, "grad_norm": 0.33350158381331013, "learning_rate": 3.933018992757862e-05, "loss": 0.8252, "step": 429 }, { "epoch": 0.09204998528270585, "grad_norm": 0.3679928914569241, "learning_rate": 3.9326698314231414e-05, "loss": 0.7915, "step": 430 }, { "epoch": 0.09226405501592144, "grad_norm": 0.3278069512216059, "learning_rate": 3.932319777971564e-05, "loss": 0.782, "step": 431 }, { "epoch": 0.09247812474913703, "grad_norm": 0.30042433795201073, "learning_rate": 3.931968832564716e-05, "loss": 0.7707, "step": 432 }, { "epoch": 0.09269219448235262, "grad_norm": 0.32775465415944327, "learning_rate": 3.931616995364589e-05, "loss": 0.8191, "step": 433 }, { "epoch": 0.09290626421556822, "grad_norm": 0.3317510455541998, "learning_rate": 3.9312642665335946e-05, "loss": 0.774, "step": 434 }, { "epoch": 0.09312033394878381, "grad_norm": 0.3815492582776924, "learning_rate": 3.9309106462345496e-05, "loss": 0.7965, "step": 435 }, { "epoch": 0.09333440368199941, "grad_norm": 0.3181651283506857, "learning_rate": 3.930556134630685e-05, "loss": 0.8283, "step": 436 }, { "epoch": 0.09354847341521501, "grad_norm": 0.30445611297616393, "learning_rate": 3.930200731885643e-05, "loss": 0.7769, "step": 437 }, { "epoch": 0.0937625431484306, "grad_norm": 0.4112724368184057, "learning_rate": 3.9298444381634764e-05, "loss": 0.8069, "step": 438 }, { "epoch": 0.0939766128816462, "grad_norm": 0.26329988235448815, "learning_rate": 3.9294872536286495e-05, "loss": 0.7896, "step": 439 }, { "epoch": 0.0941906826148618, "grad_norm": 0.3415432326279578, "learning_rate": 3.9291291784460384e-05, "loss": 0.7944, "step": 440 }, { "epoch": 0.09440475234807738, "grad_norm": 0.36580755245009844, "learning_rate": 3.92877021278093e-05, "loss": 0.7967, "step": 441 }, { "epoch": 0.09461882208129298, "grad_norm": 0.3063955546059932, "learning_rate": 3.928410356799022e-05, "loss": 0.7832, "step": 442 }, { "epoch": 0.09483289181450857, "grad_norm": 0.2846537103112679, "learning_rate": 3.9280496106664244e-05, "loss": 0.8257, "step": 443 }, { "epoch": 0.09504696154772417, "grad_norm": 0.3025145656909554, "learning_rate": 3.9276879745496546e-05, "loss": 0.7949, "step": 444 }, { "epoch": 0.09526103128093977, "grad_norm": 0.36490858653861336, "learning_rate": 3.9273254486156454e-05, "loss": 0.7866, "step": 445 }, { "epoch": 0.09547510101415536, "grad_norm": 0.3284436117692305, "learning_rate": 3.9269620330317366e-05, "loss": 0.801, "step": 446 }, { "epoch": 0.09568917074737096, "grad_norm": 0.28334521928060885, "learning_rate": 3.9265977279656815e-05, "loss": 0.7989, "step": 447 }, { "epoch": 0.09590324048058656, "grad_norm": 0.2810576285736024, "learning_rate": 3.926232533585642e-05, "loss": 0.7866, "step": 448 }, { "epoch": 0.09611731021380214, "grad_norm": 0.3622475624519783, "learning_rate": 3.9258664500601905e-05, "loss": 0.7673, "step": 449 }, { "epoch": 0.09633137994701774, "grad_norm": 0.3731888046724523, "learning_rate": 3.925499477558311e-05, "loss": 0.8017, "step": 450 }, { "epoch": 0.09654544968023333, "grad_norm": 0.4283601076692715, "learning_rate": 3.925131616249398e-05, "loss": 0.8008, "step": 451 }, { "epoch": 0.09675951941344893, "grad_norm": 0.38686603375095646, "learning_rate": 3.9247628663032546e-05, "loss": 0.7818, "step": 452 }, { "epoch": 0.09697358914666453, "grad_norm": 0.335515955694342, "learning_rate": 3.924393227890096e-05, "loss": 0.7737, "step": 453 }, { "epoch": 0.09718765887988012, "grad_norm": 0.41277152616792745, "learning_rate": 3.9240227011805455e-05, "loss": 0.803, "step": 454 }, { "epoch": 0.09740172861309572, "grad_norm": 0.3704392992755366, "learning_rate": 3.923651286345638e-05, "loss": 0.7652, "step": 455 }, { "epoch": 0.09761579834631132, "grad_norm": 0.348136323262541, "learning_rate": 3.923278983556819e-05, "loss": 0.8068, "step": 456 }, { "epoch": 0.0978298680795269, "grad_norm": 0.2743148395487614, "learning_rate": 3.9229057929859416e-05, "loss": 0.8237, "step": 457 }, { "epoch": 0.0980439378127425, "grad_norm": 0.3201814348066745, "learning_rate": 3.9225317148052704e-05, "loss": 0.7556, "step": 458 }, { "epoch": 0.09825800754595809, "grad_norm": 0.3080337682779766, "learning_rate": 3.9221567491874784e-05, "loss": 0.7774, "step": 459 }, { "epoch": 0.09847207727917369, "grad_norm": 0.33474051456448056, "learning_rate": 3.9217808963056496e-05, "loss": 0.7763, "step": 460 }, { "epoch": 0.09868614701238929, "grad_norm": 0.3233316417468542, "learning_rate": 3.921404156333277e-05, "loss": 0.7565, "step": 461 }, { "epoch": 0.09890021674560488, "grad_norm": 0.35917382285440097, "learning_rate": 3.921026529444264e-05, "loss": 0.8205, "step": 462 }, { "epoch": 0.09911428647882048, "grad_norm": 0.3720804896494926, "learning_rate": 3.920648015812921e-05, "loss": 0.7671, "step": 463 }, { "epoch": 0.09932835621203608, "grad_norm": 0.3605724635094279, "learning_rate": 3.92026861561397e-05, "loss": 0.7583, "step": 464 }, { "epoch": 0.09954242594525167, "grad_norm": 0.33828132604779365, "learning_rate": 3.9198883290225406e-05, "loss": 0.7565, "step": 465 }, { "epoch": 0.09975649567846726, "grad_norm": 0.2869927458519343, "learning_rate": 3.919507156214174e-05, "loss": 0.8346, "step": 466 }, { "epoch": 0.09997056541168285, "grad_norm": 0.29979997671769304, "learning_rate": 3.919125097364817e-05, "loss": 0.7978, "step": 467 }, { "epoch": 0.10018463514489845, "grad_norm": 0.3189819781882113, "learning_rate": 3.918742152650829e-05, "loss": 0.7558, "step": 468 }, { "epoch": 0.10039870487811405, "grad_norm": 0.33446164378667337, "learning_rate": 3.918358322248975e-05, "loss": 0.7986, "step": 469 }, { "epoch": 0.10061277461132964, "grad_norm": 0.3875277277756248, "learning_rate": 3.917973606336431e-05, "loss": 0.7844, "step": 470 }, { "epoch": 0.10082684434454524, "grad_norm": 0.3811898107366712, "learning_rate": 3.9175880050907816e-05, "loss": 0.7778, "step": 471 }, { "epoch": 0.10104091407776083, "grad_norm": 0.3249475455448723, "learning_rate": 3.9172015186900196e-05, "loss": 0.7966, "step": 472 }, { "epoch": 0.10125498381097643, "grad_norm": 0.2814721827325935, "learning_rate": 3.916814147312546e-05, "loss": 0.8198, "step": 473 }, { "epoch": 0.10146905354419201, "grad_norm": 0.29531901355457474, "learning_rate": 3.9164258911371705e-05, "loss": 0.7657, "step": 474 }, { "epoch": 0.10168312327740761, "grad_norm": 0.2911491443407701, "learning_rate": 3.916036750343113e-05, "loss": 0.7798, "step": 475 }, { "epoch": 0.10189719301062321, "grad_norm": 0.3041760892145798, "learning_rate": 3.9156467251099976e-05, "loss": 0.7501, "step": 476 }, { "epoch": 0.1021112627438388, "grad_norm": 0.31156761935336486, "learning_rate": 3.915255815617861e-05, "loss": 0.7758, "step": 477 }, { "epoch": 0.1023253324770544, "grad_norm": 0.2790026848999721, "learning_rate": 3.9148640220471464e-05, "loss": 0.7929, "step": 478 }, { "epoch": 0.10253940221027, "grad_norm": 0.3435996309758551, "learning_rate": 3.914471344578704e-05, "loss": 0.8117, "step": 479 }, { "epoch": 0.1027534719434856, "grad_norm": 0.2956753281942712, "learning_rate": 3.914077783393793e-05, "loss": 0.8041, "step": 480 }, { "epoch": 0.10296754167670119, "grad_norm": 0.3204234448008445, "learning_rate": 3.913683338674083e-05, "loss": 0.8116, "step": 481 }, { "epoch": 0.10318161140991677, "grad_norm": 0.30766385532600793, "learning_rate": 3.913288010601645e-05, "loss": 0.7494, "step": 482 }, { "epoch": 0.10339568114313237, "grad_norm": 0.2922689705524498, "learning_rate": 3.912891799358964e-05, "loss": 0.7799, "step": 483 }, { "epoch": 0.10360975087634797, "grad_norm": 0.3731373712577605, "learning_rate": 3.912494705128931e-05, "loss": 0.7722, "step": 484 }, { "epoch": 0.10382382060956356, "grad_norm": 0.32253916018762024, "learning_rate": 3.912096728094843e-05, "loss": 0.7778, "step": 485 }, { "epoch": 0.10403789034277916, "grad_norm": 0.33121589091006576, "learning_rate": 3.911697868440405e-05, "loss": 0.7791, "step": 486 }, { "epoch": 0.10425196007599476, "grad_norm": 0.36523733636259026, "learning_rate": 3.9112981263497304e-05, "loss": 0.7893, "step": 487 }, { "epoch": 0.10446602980921035, "grad_norm": 0.3451596974843037, "learning_rate": 3.91089750200734e-05, "loss": 0.7679, "step": 488 }, { "epoch": 0.10468009954242595, "grad_norm": 0.3386287377778956, "learning_rate": 3.9104959955981605e-05, "loss": 0.7524, "step": 489 }, { "epoch": 0.10489416927564155, "grad_norm": 0.2688495289135925, "learning_rate": 3.910093607307526e-05, "loss": 0.771, "step": 490 }, { "epoch": 0.10510823900885713, "grad_norm": 0.3143137665297875, "learning_rate": 3.90969033732118e-05, "loss": 0.7944, "step": 491 }, { "epoch": 0.10532230874207273, "grad_norm": 0.3189166100146866, "learning_rate": 3.90928618582527e-05, "loss": 0.7977, "step": 492 }, { "epoch": 0.10553637847528832, "grad_norm": 0.2977536874353114, "learning_rate": 3.908881153006351e-05, "loss": 0.7924, "step": 493 }, { "epoch": 0.10575044820850392, "grad_norm": 0.3359457616162425, "learning_rate": 3.9084752390513865e-05, "loss": 0.7522, "step": 494 }, { "epoch": 0.10596451794171952, "grad_norm": 0.3281465702772829, "learning_rate": 3.908068444147745e-05, "loss": 0.8004, "step": 495 }, { "epoch": 0.10617858767493511, "grad_norm": 0.28487561432075975, "learning_rate": 3.907660768483203e-05, "loss": 0.7744, "step": 496 }, { "epoch": 0.10639265740815071, "grad_norm": 0.31522568771373916, "learning_rate": 3.9072522122459425e-05, "loss": 0.785, "step": 497 }, { "epoch": 0.1066067271413663, "grad_norm": 0.3223603592445368, "learning_rate": 3.906842775624552e-05, "loss": 0.7704, "step": 498 }, { "epoch": 0.10682079687458189, "grad_norm": 0.2932291793889263, "learning_rate": 3.906432458808026e-05, "loss": 0.8022, "step": 499 }, { "epoch": 0.10703486660779749, "grad_norm": 0.3066773167676235, "learning_rate": 3.9060212619857676e-05, "loss": 0.7992, "step": 500 }, { "epoch": 0.10724893634101308, "grad_norm": 0.33935414574001344, "learning_rate": 3.905609185347584e-05, "loss": 0.7982, "step": 501 }, { "epoch": 0.10746300607422868, "grad_norm": 0.3532180778210077, "learning_rate": 3.905196229083688e-05, "loss": 0.7967, "step": 502 }, { "epoch": 0.10767707580744428, "grad_norm": 0.3537033113035032, "learning_rate": 3.904782393384701e-05, "loss": 0.8083, "step": 503 }, { "epoch": 0.10789114554065987, "grad_norm": 0.334513713879041, "learning_rate": 3.9043676784416485e-05, "loss": 0.7814, "step": 504 }, { "epoch": 0.10810521527387547, "grad_norm": 0.3505105241291179, "learning_rate": 3.903952084445961e-05, "loss": 0.7858, "step": 505 }, { "epoch": 0.10831928500709107, "grad_norm": 0.33701530321242656, "learning_rate": 3.903535611589477e-05, "loss": 0.8028, "step": 506 }, { "epoch": 0.10853335474030666, "grad_norm": 0.3526054369532639, "learning_rate": 3.903118260064439e-05, "loss": 0.7879, "step": 507 }, { "epoch": 0.10874742447352224, "grad_norm": 0.33714070211530794, "learning_rate": 3.9027000300634955e-05, "loss": 0.7776, "step": 508 }, { "epoch": 0.10896149420673784, "grad_norm": 0.2949543568958641, "learning_rate": 3.902280921779702e-05, "loss": 0.7644, "step": 509 }, { "epoch": 0.10917556393995344, "grad_norm": 0.32674430466929394, "learning_rate": 3.901860935406517e-05, "loss": 0.8075, "step": 510 }, { "epoch": 0.10938963367316903, "grad_norm": 0.31174220707856237, "learning_rate": 3.9014400711378056e-05, "loss": 0.7646, "step": 511 }, { "epoch": 0.10960370340638463, "grad_norm": 0.3198392027013188, "learning_rate": 3.901018329167838e-05, "loss": 0.7711, "step": 512 }, { "epoch": 0.10981777313960023, "grad_norm": 0.31092469844898774, "learning_rate": 3.9005957096912896e-05, "loss": 0.7827, "step": 513 }, { "epoch": 0.11003184287281582, "grad_norm": 0.3444589142435659, "learning_rate": 3.900172212903241e-05, "loss": 0.7863, "step": 514 }, { "epoch": 0.11024591260603142, "grad_norm": 0.3693579186196287, "learning_rate": 3.899747838999177e-05, "loss": 0.7873, "step": 515 }, { "epoch": 0.110459982339247, "grad_norm": 0.29783179523689074, "learning_rate": 3.8993225881749887e-05, "loss": 0.7613, "step": 516 }, { "epoch": 0.1106740520724626, "grad_norm": 0.3039123293818144, "learning_rate": 3.89889646062697e-05, "loss": 0.7836, "step": 517 }, { "epoch": 0.1108881218056782, "grad_norm": 0.30480384331152066, "learning_rate": 3.898469456551821e-05, "loss": 0.7664, "step": 518 }, { "epoch": 0.1111021915388938, "grad_norm": 0.3121062544761374, "learning_rate": 3.898041576146647e-05, "loss": 0.7764, "step": 519 }, { "epoch": 0.11131626127210939, "grad_norm": 0.3060990055629138, "learning_rate": 3.897612819608955e-05, "loss": 0.7597, "step": 520 }, { "epoch": 0.11153033100532499, "grad_norm": 0.276573895942155, "learning_rate": 3.8971831871366594e-05, "loss": 0.7822, "step": 521 }, { "epoch": 0.11174440073854058, "grad_norm": 0.25312672886850446, "learning_rate": 3.896752678928078e-05, "loss": 0.7776, "step": 522 }, { "epoch": 0.11195847047175618, "grad_norm": 0.28272209005982685, "learning_rate": 3.896321295181932e-05, "loss": 0.7552, "step": 523 }, { "epoch": 0.11217254020497176, "grad_norm": 0.2712780694142558, "learning_rate": 3.895889036097347e-05, "loss": 0.7588, "step": 524 }, { "epoch": 0.11238660993818736, "grad_norm": 0.25201874224866017, "learning_rate": 3.895455901873854e-05, "loss": 0.7869, "step": 525 }, { "epoch": 0.11260067967140296, "grad_norm": 0.30452270733697234, "learning_rate": 3.895021892711387e-05, "loss": 0.7842, "step": 526 }, { "epoch": 0.11281474940461855, "grad_norm": 0.28683093116647973, "learning_rate": 3.8945870088102825e-05, "loss": 0.7906, "step": 527 }, { "epoch": 0.11302881913783415, "grad_norm": 0.25289372547757544, "learning_rate": 3.894151250371283e-05, "loss": 0.7592, "step": 528 }, { "epoch": 0.11324288887104975, "grad_norm": 0.26538901709259677, "learning_rate": 3.8937146175955336e-05, "loss": 0.7851, "step": 529 }, { "epoch": 0.11345695860426534, "grad_norm": 0.29445448102282995, "learning_rate": 3.893277110684584e-05, "loss": 0.7793, "step": 530 }, { "epoch": 0.11367102833748094, "grad_norm": 0.32573007017731953, "learning_rate": 3.892838729840385e-05, "loss": 0.7473, "step": 531 }, { "epoch": 0.11388509807069654, "grad_norm": 0.3089739158432779, "learning_rate": 3.892399475265294e-05, "loss": 0.7649, "step": 532 }, { "epoch": 0.11409916780391212, "grad_norm": 0.2784745964411335, "learning_rate": 3.8919593471620694e-05, "loss": 0.786, "step": 533 }, { "epoch": 0.11431323753712772, "grad_norm": 0.2858572786891086, "learning_rate": 3.8915183457338726e-05, "loss": 0.7361, "step": 534 }, { "epoch": 0.11452730727034331, "grad_norm": 0.2954335937171979, "learning_rate": 3.89107647118427e-05, "loss": 0.7796, "step": 535 }, { "epoch": 0.11474137700355891, "grad_norm": 0.33879091012521695, "learning_rate": 3.8906337237172314e-05, "loss": 0.7837, "step": 536 }, { "epoch": 0.1149554467367745, "grad_norm": 0.31576256981083695, "learning_rate": 3.890190103537126e-05, "loss": 0.7721, "step": 537 }, { "epoch": 0.1151695164699901, "grad_norm": 0.27847471034641175, "learning_rate": 3.8897456108487286e-05, "loss": 0.7754, "step": 538 }, { "epoch": 0.1153835862032057, "grad_norm": 0.29099685803387215, "learning_rate": 3.889300245857217e-05, "loss": 0.7616, "step": 539 }, { "epoch": 0.1155976559364213, "grad_norm": 0.32514233887056426, "learning_rate": 3.888854008768171e-05, "loss": 0.8162, "step": 540 }, { "epoch": 0.11581172566963688, "grad_norm": 0.27227832221242765, "learning_rate": 3.8884068997875714e-05, "loss": 0.7667, "step": 541 }, { "epoch": 0.11602579540285247, "grad_norm": 0.28965360783910543, "learning_rate": 3.887958919121804e-05, "loss": 0.8139, "step": 542 }, { "epoch": 0.11623986513606807, "grad_norm": 0.27492062128513606, "learning_rate": 3.8875100669776554e-05, "loss": 0.7859, "step": 543 }, { "epoch": 0.11645393486928367, "grad_norm": 0.30950320325187036, "learning_rate": 3.887060343562315e-05, "loss": 0.7632, "step": 544 }, { "epoch": 0.11666800460249926, "grad_norm": 0.34115044122556104, "learning_rate": 3.886609749083375e-05, "loss": 0.801, "step": 545 }, { "epoch": 0.11688207433571486, "grad_norm": 0.28438131757602064, "learning_rate": 3.886158283748828e-05, "loss": 0.8003, "step": 546 }, { "epoch": 0.11709614406893046, "grad_norm": 0.2974028330364232, "learning_rate": 3.88570594776707e-05, "loss": 0.7559, "step": 547 }, { "epoch": 0.11731021380214605, "grad_norm": 0.3398375178841318, "learning_rate": 3.8852527413468984e-05, "loss": 0.7841, "step": 548 }, { "epoch": 0.11752428353536164, "grad_norm": 0.35188531441607085, "learning_rate": 3.884798664697512e-05, "loss": 0.7945, "step": 549 }, { "epoch": 0.11773835326857723, "grad_norm": 0.31114165104756186, "learning_rate": 3.884343718028513e-05, "loss": 0.7922, "step": 550 }, { "epoch": 0.11795242300179283, "grad_norm": 0.28450168959567035, "learning_rate": 3.883887901549903e-05, "loss": 0.8044, "step": 551 }, { "epoch": 0.11816649273500843, "grad_norm": 0.2740499206171291, "learning_rate": 3.883431215472086e-05, "loss": 0.7721, "step": 552 }, { "epoch": 0.11838056246822402, "grad_norm": 0.3228937059359105, "learning_rate": 3.882973660005868e-05, "loss": 0.7691, "step": 553 }, { "epoch": 0.11859463220143962, "grad_norm": 0.2534849908445428, "learning_rate": 3.882515235362456e-05, "loss": 0.7707, "step": 554 }, { "epoch": 0.11880870193465522, "grad_norm": 0.29382509647086186, "learning_rate": 3.8820559417534564e-05, "loss": 0.7436, "step": 555 }, { "epoch": 0.11902277166787081, "grad_norm": 0.36869485323895296, "learning_rate": 3.8815957793908794e-05, "loss": 0.7651, "step": 556 }, { "epoch": 0.11923684140108641, "grad_norm": 0.2686317456497916, "learning_rate": 3.8811347484871353e-05, "loss": 0.7963, "step": 557 }, { "epoch": 0.11945091113430199, "grad_norm": 0.28997605967274664, "learning_rate": 3.880672849255035e-05, "loss": 0.726, "step": 558 }, { "epoch": 0.11966498086751759, "grad_norm": 0.25217042243669957, "learning_rate": 3.8802100819077905e-05, "loss": 0.7435, "step": 559 }, { "epoch": 0.11987905060073319, "grad_norm": 0.3093551373710632, "learning_rate": 3.879746446659013e-05, "loss": 0.8133, "step": 560 }, { "epoch": 0.12009312033394878, "grad_norm": 0.32250094376072785, "learning_rate": 3.879281943722718e-05, "loss": 0.814, "step": 561 }, { "epoch": 0.12030719006716438, "grad_norm": 0.3009295548092589, "learning_rate": 3.878816573313317e-05, "loss": 0.7727, "step": 562 }, { "epoch": 0.12052125980037998, "grad_norm": 0.2725509094693102, "learning_rate": 3.878350335645626e-05, "loss": 0.7591, "step": 563 }, { "epoch": 0.12073532953359557, "grad_norm": 0.255873272518967, "learning_rate": 3.877883230934858e-05, "loss": 0.7694, "step": 564 }, { "epoch": 0.12094939926681117, "grad_norm": 0.29434175814592056, "learning_rate": 3.8774152593966277e-05, "loss": 0.7658, "step": 565 }, { "epoch": 0.12116346900002675, "grad_norm": 0.35007367150480406, "learning_rate": 3.8769464212469504e-05, "loss": 0.7668, "step": 566 }, { "epoch": 0.12137753873324235, "grad_norm": 0.3665899152199687, "learning_rate": 3.876476716702242e-05, "loss": 0.7646, "step": 567 }, { "epoch": 0.12159160846645795, "grad_norm": 0.31901208253235064, "learning_rate": 3.8760061459793155e-05, "loss": 0.7801, "step": 568 }, { "epoch": 0.12180567819967354, "grad_norm": 0.2682094020590244, "learning_rate": 3.8755347092953856e-05, "loss": 0.7663, "step": 569 }, { "epoch": 0.12201974793288914, "grad_norm": 0.24301063250734176, "learning_rate": 3.8750624068680684e-05, "loss": 0.7821, "step": 570 }, { "epoch": 0.12223381766610474, "grad_norm": 0.2927237881643003, "learning_rate": 3.874589238915376e-05, "loss": 0.7836, "step": 571 }, { "epoch": 0.12244788739932033, "grad_norm": 0.30694527478392386, "learning_rate": 3.874115205655722e-05, "loss": 0.7646, "step": 572 }, { "epoch": 0.12266195713253593, "grad_norm": 0.2541192562387383, "learning_rate": 3.873640307307921e-05, "loss": 0.8211, "step": 573 }, { "epoch": 0.12287602686575153, "grad_norm": 0.2810086501685054, "learning_rate": 3.873164544091183e-05, "loss": 0.7733, "step": 574 }, { "epoch": 0.12309009659896711, "grad_norm": 0.39706157769586736, "learning_rate": 3.872687916225121e-05, "loss": 0.8116, "step": 575 }, { "epoch": 0.1233041663321827, "grad_norm": 0.3540089083723581, "learning_rate": 3.872210423929744e-05, "loss": 0.7714, "step": 576 }, { "epoch": 0.1235182360653983, "grad_norm": 0.4016793229760035, "learning_rate": 3.8717320674254636e-05, "loss": 0.804, "step": 577 }, { "epoch": 0.1237323057986139, "grad_norm": 0.30846391176315663, "learning_rate": 3.871252846933087e-05, "loss": 0.7642, "step": 578 }, { "epoch": 0.1239463755318295, "grad_norm": 0.29856262927914995, "learning_rate": 3.870772762673821e-05, "loss": 0.7869, "step": 579 }, { "epoch": 0.12416044526504509, "grad_norm": 0.33404088281171423, "learning_rate": 3.8702918148692725e-05, "loss": 0.7793, "step": 580 }, { "epoch": 0.12437451499826069, "grad_norm": 0.3555240010502193, "learning_rate": 3.869810003741447e-05, "loss": 0.7814, "step": 581 }, { "epoch": 0.12458858473147628, "grad_norm": 0.36175837427588975, "learning_rate": 3.869327329512746e-05, "loss": 0.7654, "step": 582 }, { "epoch": 0.12480265446469187, "grad_norm": 0.31043731011159237, "learning_rate": 3.868843792405971e-05, "loss": 0.7798, "step": 583 }, { "epoch": 0.12501672419790746, "grad_norm": 0.26537385873434644, "learning_rate": 3.868359392644323e-05, "loss": 0.8154, "step": 584 }, { "epoch": 0.12523079393112307, "grad_norm": 0.3260081381676428, "learning_rate": 3.8678741304514e-05, "loss": 0.7503, "step": 585 }, { "epoch": 0.12544486366433866, "grad_norm": 0.3328831596964062, "learning_rate": 3.8673880060511974e-05, "loss": 0.7944, "step": 586 }, { "epoch": 0.12565893339755424, "grad_norm": 0.302523997477463, "learning_rate": 3.86690101966811e-05, "loss": 0.7289, "step": 587 }, { "epoch": 0.12587300313076985, "grad_norm": 0.31239351228194273, "learning_rate": 3.866413171526928e-05, "loss": 0.7573, "step": 588 }, { "epoch": 0.12608707286398543, "grad_norm": 0.28106346332874094, "learning_rate": 3.865924461852843e-05, "loss": 0.7687, "step": 589 }, { "epoch": 0.12630114259720104, "grad_norm": 0.25788660211306436, "learning_rate": 3.8654348908714434e-05, "loss": 0.8018, "step": 590 }, { "epoch": 0.12651521233041663, "grad_norm": 0.25831164826273156, "learning_rate": 3.864944458808712e-05, "loss": 0.7682, "step": 591 }, { "epoch": 0.12672928206363224, "grad_norm": 0.2509999919005387, "learning_rate": 3.864453165891032e-05, "loss": 0.7984, "step": 592 }, { "epoch": 0.12694335179684782, "grad_norm": 0.2951328187917488, "learning_rate": 3.863961012345184e-05, "loss": 0.785, "step": 593 }, { "epoch": 0.12715742153006343, "grad_norm": 0.26397863417223993, "learning_rate": 3.863467998398346e-05, "loss": 0.8013, "step": 594 }, { "epoch": 0.127371491263279, "grad_norm": 0.2583119588714624, "learning_rate": 3.86297412427809e-05, "loss": 0.7822, "step": 595 }, { "epoch": 0.1275855609964946, "grad_norm": 0.27064259133436563, "learning_rate": 3.8624793902123886e-05, "loss": 0.7944, "step": 596 }, { "epoch": 0.1277996307297102, "grad_norm": 0.29108215935884013, "learning_rate": 3.86198379642961e-05, "loss": 0.796, "step": 597 }, { "epoch": 0.1280137004629258, "grad_norm": 0.29671321773104437, "learning_rate": 3.8614873431585196e-05, "loss": 0.781, "step": 598 }, { "epoch": 0.1282277701961414, "grad_norm": 0.4195978479010054, "learning_rate": 3.860990030628279e-05, "loss": 0.7478, "step": 599 }, { "epoch": 0.12844183992935698, "grad_norm": 0.26840119966490283, "learning_rate": 3.860491859068447e-05, "loss": 0.7836, "step": 600 }, { "epoch": 0.1286559096625726, "grad_norm": 0.4129907291581528, "learning_rate": 3.859992828708979e-05, "loss": 0.7618, "step": 601 }, { "epoch": 0.12886997939578818, "grad_norm": 0.3238934678575172, "learning_rate": 3.859492939780226e-05, "loss": 0.7619, "step": 602 }, { "epoch": 0.1290840491290038, "grad_norm": 0.27898878787820014, "learning_rate": 3.8589921925129357e-05, "loss": 0.7629, "step": 603 }, { "epoch": 0.12929811886221937, "grad_norm": 0.2604284751693988, "learning_rate": 3.8584905871382526e-05, "loss": 0.787, "step": 604 }, { "epoch": 0.12951218859543495, "grad_norm": 0.2619205407332, "learning_rate": 3.857988123887716e-05, "loss": 0.7558, "step": 605 }, { "epoch": 0.12972625832865056, "grad_norm": 0.29313059763695404, "learning_rate": 3.857484802993263e-05, "loss": 0.758, "step": 606 }, { "epoch": 0.12994032806186614, "grad_norm": 2.1930454149061887, "learning_rate": 3.856980624687225e-05, "loss": 0.7746, "step": 607 }, { "epoch": 0.13015439779508176, "grad_norm": 0.35963399420098147, "learning_rate": 3.85647558920233e-05, "loss": 0.7798, "step": 608 }, { "epoch": 0.13036846752829734, "grad_norm": 0.5345692799132874, "learning_rate": 3.855969696771702e-05, "loss": 0.8404, "step": 609 }, { "epoch": 0.13058253726151295, "grad_norm": 0.4341539959883955, "learning_rate": 3.8554629476288596e-05, "loss": 0.7688, "step": 610 }, { "epoch": 0.13079660699472853, "grad_norm": 0.4512232747327198, "learning_rate": 3.8549553420077167e-05, "loss": 0.7458, "step": 611 }, { "epoch": 0.13101067672794411, "grad_norm": 0.337253619260505, "learning_rate": 3.8544468801425836e-05, "loss": 0.7954, "step": 612 }, { "epoch": 0.13122474646115972, "grad_norm": 0.32416015765255696, "learning_rate": 3.853937562268165e-05, "loss": 0.8084, "step": 613 }, { "epoch": 0.1314388161943753, "grad_norm": 0.38068934309719155, "learning_rate": 3.853427388619562e-05, "loss": 0.7465, "step": 614 }, { "epoch": 0.13165288592759092, "grad_norm": 0.3619894703448777, "learning_rate": 3.852916359432269e-05, "loss": 0.7527, "step": 615 }, { "epoch": 0.1318669556608065, "grad_norm": 0.35645238503047305, "learning_rate": 3.852404474942176e-05, "loss": 0.7819, "step": 616 }, { "epoch": 0.1320810253940221, "grad_norm": 0.35511855267868303, "learning_rate": 3.8518917353855686e-05, "loss": 0.7465, "step": 617 }, { "epoch": 0.1322950951272377, "grad_norm": 0.37180162261348115, "learning_rate": 3.851378140999126e-05, "loss": 0.7459, "step": 618 }, { "epoch": 0.1325091648604533, "grad_norm": 0.3396390822262118, "learning_rate": 3.850863692019923e-05, "loss": 0.7681, "step": 619 }, { "epoch": 0.1327232345936689, "grad_norm": 0.35510271781924185, "learning_rate": 3.850348388685428e-05, "loss": 0.7635, "step": 620 }, { "epoch": 0.13293730432688447, "grad_norm": 0.40441804257077346, "learning_rate": 3.849832231233503e-05, "loss": 0.7771, "step": 621 }, { "epoch": 0.13315137406010008, "grad_norm": 0.34293311293913303, "learning_rate": 3.8493152199024074e-05, "loss": 0.7421, "step": 622 }, { "epoch": 0.13336544379331566, "grad_norm": 0.2912600178622258, "learning_rate": 3.848797354930791e-05, "loss": 0.8171, "step": 623 }, { "epoch": 0.13357951352653127, "grad_norm": 0.29306478713974593, "learning_rate": 3.8482786365577e-05, "loss": 0.7717, "step": 624 }, { "epoch": 0.13379358325974686, "grad_norm": 0.3326609157566286, "learning_rate": 3.8477590650225735e-05, "loss": 0.7854, "step": 625 }, { "epoch": 0.13400765299296247, "grad_norm": 0.26537216605654773, "learning_rate": 3.847238640565246e-05, "loss": 0.7672, "step": 626 }, { "epoch": 0.13422172272617805, "grad_norm": 0.2850266903516824, "learning_rate": 3.846717363425943e-05, "loss": 0.7699, "step": 627 }, { "epoch": 0.13443579245939366, "grad_norm": 0.32199229672531654, "learning_rate": 3.846195233845285e-05, "loss": 0.7673, "step": 628 }, { "epoch": 0.13464986219260924, "grad_norm": 0.3262432652902493, "learning_rate": 3.8456722520642876e-05, "loss": 0.7767, "step": 629 }, { "epoch": 0.13486393192582483, "grad_norm": 0.38010201931639875, "learning_rate": 3.845148418324357e-05, "loss": 0.7755, "step": 630 }, { "epoch": 0.13507800165904044, "grad_norm": 0.2774173275910418, "learning_rate": 3.844623732867294e-05, "loss": 0.7397, "step": 631 }, { "epoch": 0.13529207139225602, "grad_norm": 0.2659211241771369, "learning_rate": 3.844098195935292e-05, "loss": 0.7704, "step": 632 }, { "epoch": 0.13550614112547163, "grad_norm": 0.25464525623727563, "learning_rate": 3.843571807770939e-05, "loss": 0.7834, "step": 633 }, { "epoch": 0.1357202108586872, "grad_norm": 0.26193574683038773, "learning_rate": 3.843044568617215e-05, "loss": 0.7672, "step": 634 }, { "epoch": 0.13593428059190282, "grad_norm": 0.26387639271750185, "learning_rate": 3.842516478717492e-05, "loss": 0.788, "step": 635 }, { "epoch": 0.1361483503251184, "grad_norm": 0.29187272474834036, "learning_rate": 3.841987538315534e-05, "loss": 0.7655, "step": 636 }, { "epoch": 0.136362420058334, "grad_norm": 0.31168977117009056, "learning_rate": 3.8414577476555014e-05, "loss": 0.7487, "step": 637 }, { "epoch": 0.1365764897915496, "grad_norm": 0.2621584434554687, "learning_rate": 3.840927106981943e-05, "loss": 0.7553, "step": 638 }, { "epoch": 0.13679055952476518, "grad_norm": 0.2327188756460318, "learning_rate": 3.8403956165398016e-05, "loss": 0.7856, "step": 639 }, { "epoch": 0.1370046292579808, "grad_norm": 0.2562349108395864, "learning_rate": 3.8398632765744127e-05, "loss": 0.7989, "step": 640 }, { "epoch": 0.13721869899119638, "grad_norm": 0.2610415362265219, "learning_rate": 3.8393300873315035e-05, "loss": 0.7759, "step": 641 }, { "epoch": 0.13743276872441199, "grad_norm": 0.31247290427911734, "learning_rate": 3.8387960490571935e-05, "loss": 0.7618, "step": 642 }, { "epoch": 0.13764683845762757, "grad_norm": 0.2611576449147401, "learning_rate": 3.838261161997992e-05, "loss": 0.7746, "step": 643 }, { "epoch": 0.13786090819084318, "grad_norm": 0.2890329268622078, "learning_rate": 3.8377254264008044e-05, "loss": 0.7783, "step": 644 }, { "epoch": 0.13807497792405876, "grad_norm": 0.27299705023008836, "learning_rate": 3.837188842512924e-05, "loss": 0.7665, "step": 645 }, { "epoch": 0.13828904765727434, "grad_norm": 0.28975886281296026, "learning_rate": 3.836651410582037e-05, "loss": 0.7842, "step": 646 }, { "epoch": 0.13850311739048995, "grad_norm": 0.255916883922782, "learning_rate": 3.83611313085622e-05, "loss": 0.7308, "step": 647 }, { "epoch": 0.13871718712370554, "grad_norm": 0.2623143223523937, "learning_rate": 3.835574003583945e-05, "loss": 0.7612, "step": 648 }, { "epoch": 0.13893125685692115, "grad_norm": 0.2908807278221207, "learning_rate": 3.835034029014068e-05, "loss": 0.7591, "step": 649 }, { "epoch": 0.13914532659013673, "grad_norm": 0.2792529186802475, "learning_rate": 3.834493207395843e-05, "loss": 0.7694, "step": 650 }, { "epoch": 0.13935939632335234, "grad_norm": 0.2623369924104658, "learning_rate": 3.8339515389789115e-05, "loss": 0.744, "step": 651 }, { "epoch": 0.13957346605656792, "grad_norm": 0.2820722762027879, "learning_rate": 3.833409024013307e-05, "loss": 0.7556, "step": 652 }, { "epoch": 0.13978753578978353, "grad_norm": 0.27711755248733916, "learning_rate": 3.8328656627494534e-05, "loss": 0.7709, "step": 653 }, { "epoch": 0.14000160552299912, "grad_norm": 0.3811917986398551, "learning_rate": 3.832321455438165e-05, "loss": 0.7617, "step": 654 }, { "epoch": 0.1402156752562147, "grad_norm": 0.3028791758140864, "learning_rate": 3.8317764023306466e-05, "loss": 0.805, "step": 655 }, { "epoch": 0.1404297449894303, "grad_norm": 0.25141344995387904, "learning_rate": 3.831230503678494e-05, "loss": 0.7785, "step": 656 }, { "epoch": 0.1406438147226459, "grad_norm": 0.26261303365336286, "learning_rate": 3.8306837597336943e-05, "loss": 0.7856, "step": 657 }, { "epoch": 0.1408578844558615, "grad_norm": 0.2609065168861405, "learning_rate": 3.830136170748621e-05, "loss": 0.781, "step": 658 }, { "epoch": 0.1410719541890771, "grad_norm": 0.27034827983831866, "learning_rate": 3.8295877369760426e-05, "loss": 0.7581, "step": 659 }, { "epoch": 0.1412860239222927, "grad_norm": 0.2701528651235907, "learning_rate": 3.829038458669113e-05, "loss": 0.7654, "step": 660 }, { "epoch": 0.14150009365550828, "grad_norm": 0.2563557451157304, "learning_rate": 3.828488336081379e-05, "loss": 0.7903, "step": 661 }, { "epoch": 0.1417141633887239, "grad_norm": 0.2764534665072802, "learning_rate": 3.827937369466777e-05, "loss": 0.7526, "step": 662 }, { "epoch": 0.14192823312193947, "grad_norm": 0.24673773612316036, "learning_rate": 3.8273855590796316e-05, "loss": 0.7556, "step": 663 }, { "epoch": 0.14214230285515506, "grad_norm": 0.24728120211043053, "learning_rate": 3.8268329051746564e-05, "loss": 0.7916, "step": 664 }, { "epoch": 0.14235637258837067, "grad_norm": 0.28711329804957375, "learning_rate": 3.826279408006957e-05, "loss": 0.7699, "step": 665 }, { "epoch": 0.14257044232158625, "grad_norm": 0.6682267334163176, "learning_rate": 3.8257250678320254e-05, "loss": 0.7595, "step": 666 }, { "epoch": 0.14278451205480186, "grad_norm": 0.29826111975164143, "learning_rate": 3.825169884905745e-05, "loss": 0.789, "step": 667 }, { "epoch": 0.14299858178801744, "grad_norm": 0.26844129791093646, "learning_rate": 3.8246138594843866e-05, "loss": 0.7858, "step": 668 }, { "epoch": 0.14321265152123305, "grad_norm": 0.25460203600355735, "learning_rate": 3.824056991824611e-05, "loss": 0.7768, "step": 669 }, { "epoch": 0.14342672125444864, "grad_norm": 0.25700886657543587, "learning_rate": 3.823499282183467e-05, "loss": 0.7903, "step": 670 }, { "epoch": 0.14364079098766422, "grad_norm": 0.27604014778042324, "learning_rate": 3.822940730818392e-05, "loss": 0.7751, "step": 671 }, { "epoch": 0.14385486072087983, "grad_norm": 0.2545147079115621, "learning_rate": 3.822381337987213e-05, "loss": 0.7561, "step": 672 }, { "epoch": 0.1440689304540954, "grad_norm": 0.23984722698315658, "learning_rate": 3.821821103948145e-05, "loss": 0.7623, "step": 673 }, { "epoch": 0.14428300018731102, "grad_norm": 0.867169222204874, "learning_rate": 3.821260028959789e-05, "loss": 0.7989, "step": 674 }, { "epoch": 0.1444970699205266, "grad_norm": 0.25654537168602387, "learning_rate": 3.820698113281139e-05, "loss": 0.78, "step": 675 }, { "epoch": 0.14471113965374222, "grad_norm": 3.2058329497567497, "learning_rate": 3.8201353571715724e-05, "loss": 0.7841, "step": 676 }, { "epoch": 0.1449252093869578, "grad_norm": 0.3697316952651418, "learning_rate": 3.8195717608908564e-05, "loss": 0.7375, "step": 677 }, { "epoch": 0.1451392791201734, "grad_norm": 0.5280543469774536, "learning_rate": 3.8190073246991465e-05, "loss": 0.7873, "step": 678 }, { "epoch": 0.145353348853389, "grad_norm": 0.465557351184121, "learning_rate": 3.818442048856986e-05, "loss": 0.7555, "step": 679 }, { "epoch": 0.14556741858660457, "grad_norm": 0.4186516823478108, "learning_rate": 3.8178759336253034e-05, "loss": 0.7612, "step": 680 }, { "epoch": 0.14578148831982018, "grad_norm": 0.3626499032628553, "learning_rate": 3.817308979265418e-05, "loss": 0.7677, "step": 681 }, { "epoch": 0.14599555805303577, "grad_norm": 0.4557368296382944, "learning_rate": 3.816741186039035e-05, "loss": 0.803, "step": 682 }, { "epoch": 0.14620962778625138, "grad_norm": 0.36389305704256725, "learning_rate": 3.8161725542082464e-05, "loss": 0.7697, "step": 683 }, { "epoch": 0.14642369751946696, "grad_norm": 0.40408928941984346, "learning_rate": 3.8156030840355306e-05, "loss": 0.7939, "step": 684 }, { "epoch": 0.14663776725268257, "grad_norm": 0.34759856848217446, "learning_rate": 3.815032775783755e-05, "loss": 0.7569, "step": 685 }, { "epoch": 0.14685183698589815, "grad_norm": 0.3213468951285394, "learning_rate": 3.814461629716173e-05, "loss": 0.789, "step": 686 }, { "epoch": 0.14706590671911376, "grad_norm": 0.28784011586337543, "learning_rate": 3.813889646096424e-05, "loss": 0.7645, "step": 687 }, { "epoch": 0.14727997645232935, "grad_norm": 0.34078877386979906, "learning_rate": 3.8133168251885354e-05, "loss": 0.7932, "step": 688 }, { "epoch": 0.14749404618554493, "grad_norm": 0.303131014259725, "learning_rate": 3.8127431672569187e-05, "loss": 0.7682, "step": 689 }, { "epoch": 0.14770811591876054, "grad_norm": 0.2684582694020625, "learning_rate": 3.8121686725663744e-05, "loss": 0.7852, "step": 690 }, { "epoch": 0.14792218565197612, "grad_norm": 0.2837038618704381, "learning_rate": 3.811593341382088e-05, "loss": 0.7673, "step": 691 }, { "epoch": 0.14813625538519173, "grad_norm": 0.2844560648737273, "learning_rate": 3.811017173969632e-05, "loss": 0.7982, "step": 692 }, { "epoch": 0.14835032511840732, "grad_norm": 0.3119909822976725, "learning_rate": 3.810440170594964e-05, "loss": 0.7565, "step": 693 }, { "epoch": 0.14856439485162293, "grad_norm": 0.2690330292340514, "learning_rate": 3.8098623315244275e-05, "loss": 0.7689, "step": 694 }, { "epoch": 0.1487784645848385, "grad_norm": 0.2783152561701639, "learning_rate": 3.809283657024751e-05, "loss": 0.7855, "step": 695 }, { "epoch": 0.1489925343180541, "grad_norm": 0.2765407147198869, "learning_rate": 3.8087041473630516e-05, "loss": 0.7149, "step": 696 }, { "epoch": 0.1492066040512697, "grad_norm": 0.262225712133104, "learning_rate": 3.8081238028068274e-05, "loss": 0.7828, "step": 697 }, { "epoch": 0.1494206737844853, "grad_norm": 0.2705245628148132, "learning_rate": 3.807542623623967e-05, "loss": 0.7608, "step": 698 }, { "epoch": 0.1496347435177009, "grad_norm": 0.28967389099332874, "learning_rate": 3.8069606100827396e-05, "loss": 0.7707, "step": 699 }, { "epoch": 0.14984881325091648, "grad_norm": 0.27952000191705045, "learning_rate": 3.8063777624518026e-05, "loss": 0.7777, "step": 700 }, { "epoch": 0.1500628829841321, "grad_norm": 0.5827073160411023, "learning_rate": 3.805794081000197e-05, "loss": 0.7687, "step": 701 }, { "epoch": 0.15027695271734767, "grad_norm": 0.27978095776641804, "learning_rate": 3.8052095659973494e-05, "loss": 0.7673, "step": 702 }, { "epoch": 0.15049102245056328, "grad_norm": 0.24177725367845768, "learning_rate": 3.8046242177130707e-05, "loss": 0.7961, "step": 703 }, { "epoch": 0.15070509218377887, "grad_norm": 0.2719754688841993, "learning_rate": 3.8040380364175556e-05, "loss": 0.7848, "step": 704 }, { "epoch": 0.15091916191699445, "grad_norm": 0.28065407377805734, "learning_rate": 3.8034510223813864e-05, "loss": 0.7799, "step": 705 }, { "epoch": 0.15113323165021006, "grad_norm": 0.2653947429929618, "learning_rate": 3.8028631758755264e-05, "loss": 0.7605, "step": 706 }, { "epoch": 0.15134730138342564, "grad_norm": 0.26084282961218674, "learning_rate": 3.802274497171325e-05, "loss": 0.7776, "step": 707 }, { "epoch": 0.15156137111664125, "grad_norm": 0.25782589841291353, "learning_rate": 3.8016849865405145e-05, "loss": 0.7772, "step": 708 }, { "epoch": 0.15177544084985684, "grad_norm": 0.28312640843468234, "learning_rate": 3.801094644255213e-05, "loss": 0.7663, "step": 709 }, { "epoch": 0.15198951058307245, "grad_norm": 0.27255746129312775, "learning_rate": 3.80050347058792e-05, "loss": 0.7677, "step": 710 }, { "epoch": 0.15220358031628803, "grad_norm": 0.24608802340719124, "learning_rate": 3.799911465811521e-05, "loss": 0.7639, "step": 711 }, { "epoch": 0.15241765004950364, "grad_norm": 0.2646709801661402, "learning_rate": 3.799318630199284e-05, "loss": 0.7451, "step": 712 }, { "epoch": 0.15263171978271922, "grad_norm": 0.26130611058084147, "learning_rate": 3.798724964024862e-05, "loss": 0.775, "step": 713 }, { "epoch": 0.1528457895159348, "grad_norm": 0.25417406786538366, "learning_rate": 3.798130467562288e-05, "loss": 0.7661, "step": 714 }, { "epoch": 0.15305985924915042, "grad_norm": 0.2860437453192641, "learning_rate": 3.797535141085983e-05, "loss": 0.7742, "step": 715 }, { "epoch": 0.153273928982366, "grad_norm": 0.2915108791493047, "learning_rate": 3.796938984870747e-05, "loss": 0.7698, "step": 716 }, { "epoch": 0.1534879987155816, "grad_norm": 0.4095923416537272, "learning_rate": 3.796341999191765e-05, "loss": 0.7801, "step": 717 }, { "epoch": 0.1537020684487972, "grad_norm": 0.262364819657621, "learning_rate": 3.795744184324604e-05, "loss": 0.7525, "step": 718 }, { "epoch": 0.1539161381820128, "grad_norm": 0.28695421488433925, "learning_rate": 3.7951455405452155e-05, "loss": 0.776, "step": 719 }, { "epoch": 0.15413020791522838, "grad_norm": 0.3137504716311156, "learning_rate": 3.794546068129931e-05, "loss": 0.7682, "step": 720 }, { "epoch": 0.15434427764844397, "grad_norm": 0.3260775023769069, "learning_rate": 3.793945767355467e-05, "loss": 0.76, "step": 721 }, { "epoch": 0.15455834738165958, "grad_norm": 0.3143029699594306, "learning_rate": 3.7933446384989205e-05, "loss": 0.728, "step": 722 }, { "epoch": 0.15477241711487516, "grad_norm": 0.3432677260109923, "learning_rate": 3.792742681837772e-05, "loss": 0.7451, "step": 723 }, { "epoch": 0.15498648684809077, "grad_norm": 0.34307535233147257, "learning_rate": 3.792139897649883e-05, "loss": 0.7683, "step": 724 }, { "epoch": 0.15520055658130635, "grad_norm": 0.26039722324676695, "learning_rate": 3.791536286213498e-05, "loss": 0.7588, "step": 725 }, { "epoch": 0.15541462631452196, "grad_norm": 0.3180245927328958, "learning_rate": 3.790931847807243e-05, "loss": 0.7579, "step": 726 }, { "epoch": 0.15562869604773755, "grad_norm": 0.3656361806954744, "learning_rate": 3.790326582710125e-05, "loss": 0.7689, "step": 727 }, { "epoch": 0.15584276578095316, "grad_norm": 0.34451469974838755, "learning_rate": 3.789720491201534e-05, "loss": 0.7482, "step": 728 }, { "epoch": 0.15605683551416874, "grad_norm": 0.29215948365756916, "learning_rate": 3.789113573561241e-05, "loss": 0.7763, "step": 729 }, { "epoch": 0.15627090524738432, "grad_norm": 0.25004022512355967, "learning_rate": 3.7885058300693965e-05, "loss": 0.7807, "step": 730 }, { "epoch": 0.15648497498059993, "grad_norm": 0.3220605418818703, "learning_rate": 3.7878972610065354e-05, "loss": 0.8252, "step": 731 }, { "epoch": 0.15669904471381552, "grad_norm": 0.30932527158683015, "learning_rate": 3.7872878666535716e-05, "loss": 0.7371, "step": 732 }, { "epoch": 0.15691311444703113, "grad_norm": 0.25967906503546573, "learning_rate": 3.7866776472918e-05, "loss": 0.7797, "step": 733 }, { "epoch": 0.1571271841802467, "grad_norm": 0.2551191958568535, "learning_rate": 3.7860666032028974e-05, "loss": 0.746, "step": 734 }, { "epoch": 0.15734125391346232, "grad_norm": 0.2651953004886809, "learning_rate": 3.78545473466892e-05, "loss": 0.7725, "step": 735 }, { "epoch": 0.1575553236466779, "grad_norm": 0.2766358558914324, "learning_rate": 3.784842041972305e-05, "loss": 0.7683, "step": 736 }, { "epoch": 0.1577693933798935, "grad_norm": 0.2735619396043064, "learning_rate": 3.784228525395872e-05, "loss": 0.7533, "step": 737 }, { "epoch": 0.1579834631131091, "grad_norm": 0.253304508339416, "learning_rate": 3.783614185222817e-05, "loss": 0.7608, "step": 738 }, { "epoch": 0.15819753284632468, "grad_norm": 0.247978568649734, "learning_rate": 3.7829990217367195e-05, "loss": 0.7703, "step": 739 }, { "epoch": 0.1584116025795403, "grad_norm": 0.21606299875376397, "learning_rate": 3.782383035221537e-05, "loss": 0.7611, "step": 740 }, { "epoch": 0.15862567231275587, "grad_norm": 0.2587787141580186, "learning_rate": 3.7817662259616084e-05, "loss": 0.7562, "step": 741 }, { "epoch": 0.15883974204597148, "grad_norm": 0.25957712222095314, "learning_rate": 3.7811485942416515e-05, "loss": 0.7725, "step": 742 }, { "epoch": 0.15905381177918707, "grad_norm": 0.23629133910620595, "learning_rate": 3.780530140346764e-05, "loss": 0.7791, "step": 743 }, { "epoch": 0.15926788151240268, "grad_norm": 0.2768203978908302, "learning_rate": 3.779910864562424e-05, "loss": 0.747, "step": 744 }, { "epoch": 0.15948195124561826, "grad_norm": 0.259458847699784, "learning_rate": 3.779290767174486e-05, "loss": 0.772, "step": 745 }, { "epoch": 0.15969602097883384, "grad_norm": 0.26038873795570955, "learning_rate": 3.778669848469187e-05, "loss": 0.7919, "step": 746 }, { "epoch": 0.15991009071204945, "grad_norm": 0.25802087574967486, "learning_rate": 3.778048108733143e-05, "loss": 0.7675, "step": 747 }, { "epoch": 0.16012416044526503, "grad_norm": 0.263636350635592, "learning_rate": 3.777425548253346e-05, "loss": 0.7389, "step": 748 }, { "epoch": 0.16033823017848065, "grad_norm": 0.27426089074668625, "learning_rate": 3.77680216731717e-05, "loss": 0.7807, "step": 749 }, { "epoch": 0.16055229991169623, "grad_norm": 0.27602553631119603, "learning_rate": 3.776177966212366e-05, "loss": 0.7399, "step": 750 }, { "epoch": 0.16076636964491184, "grad_norm": 0.27755950091141984, "learning_rate": 3.775552945227064e-05, "loss": 0.7958, "step": 751 }, { "epoch": 0.16098043937812742, "grad_norm": 0.2712475858518582, "learning_rate": 3.774927104649773e-05, "loss": 0.7511, "step": 752 }, { "epoch": 0.16119450911134303, "grad_norm": 0.27952538499117413, "learning_rate": 3.7743004447693794e-05, "loss": 0.7607, "step": 753 }, { "epoch": 0.16140857884455861, "grad_norm": 0.309206124797287, "learning_rate": 3.773672965875148e-05, "loss": 0.7811, "step": 754 }, { "epoch": 0.1616226485777742, "grad_norm": 0.28451036271115715, "learning_rate": 3.7730446682567236e-05, "loss": 0.7821, "step": 755 }, { "epoch": 0.1618367183109898, "grad_norm": 0.2825866984308074, "learning_rate": 3.7724155522041256e-05, "loss": 0.7633, "step": 756 }, { "epoch": 0.1620507880442054, "grad_norm": 0.3074401298667024, "learning_rate": 3.771785618007753e-05, "loss": 0.7747, "step": 757 }, { "epoch": 0.162264857777421, "grad_norm": 0.4009164697772463, "learning_rate": 3.771154865958383e-05, "loss": 0.7744, "step": 758 }, { "epoch": 0.16247892751063658, "grad_norm": 0.3245826957847806, "learning_rate": 3.770523296347168e-05, "loss": 0.7595, "step": 759 }, { "epoch": 0.1626929972438522, "grad_norm": 0.2652441909792531, "learning_rate": 3.769890909465642e-05, "loss": 0.7741, "step": 760 }, { "epoch": 0.16290706697706778, "grad_norm": 0.3165367792575318, "learning_rate": 3.769257705605711e-05, "loss": 0.784, "step": 761 }, { "epoch": 0.1631211367102834, "grad_norm": 0.37822687851375253, "learning_rate": 3.768623685059662e-05, "loss": 0.767, "step": 762 }, { "epoch": 0.16333520644349897, "grad_norm": 0.3575859732243416, "learning_rate": 3.767988848120158e-05, "loss": 0.7734, "step": 763 }, { "epoch": 0.16354927617671455, "grad_norm": 0.24818763877214692, "learning_rate": 3.7673531950802373e-05, "loss": 0.8094, "step": 764 }, { "epoch": 0.16376334590993016, "grad_norm": 0.28354400368932975, "learning_rate": 3.766716726233318e-05, "loss": 0.7576, "step": 765 }, { "epoch": 0.16397741564314575, "grad_norm": 0.39658861942795515, "learning_rate": 3.766079441873192e-05, "loss": 0.7668, "step": 766 }, { "epoch": 0.16419148537636136, "grad_norm": 0.37765885576146235, "learning_rate": 3.765441342294028e-05, "loss": 0.8061, "step": 767 }, { "epoch": 0.16440555510957694, "grad_norm": 0.5915025678108166, "learning_rate": 3.764802427790372e-05, "loss": 0.759, "step": 768 }, { "epoch": 0.16461962484279255, "grad_norm": 0.27850811874246983, "learning_rate": 3.764162698657147e-05, "loss": 0.7699, "step": 769 }, { "epoch": 0.16483369457600813, "grad_norm": 0.30960138552127586, "learning_rate": 3.763522155189648e-05, "loss": 0.8017, "step": 770 }, { "epoch": 0.16504776430922374, "grad_norm": 0.40696548208878003, "learning_rate": 3.7628807976835516e-05, "loss": 0.7622, "step": 771 }, { "epoch": 0.16526183404243933, "grad_norm": 0.2886451062321135, "learning_rate": 3.762238626434906e-05, "loss": 0.7763, "step": 772 }, { "epoch": 0.1654759037756549, "grad_norm": 0.2592910622465004, "learning_rate": 3.7615956417401357e-05, "loss": 0.7401, "step": 773 }, { "epoch": 0.16568997350887052, "grad_norm": 0.2896541504226147, "learning_rate": 3.760951843896043e-05, "loss": 0.7524, "step": 774 }, { "epoch": 0.1659040432420861, "grad_norm": 0.28207888316976604, "learning_rate": 3.7603072331998015e-05, "loss": 0.8057, "step": 775 }, { "epoch": 0.1661181129753017, "grad_norm": 0.27093980342361823, "learning_rate": 3.7596618099489645e-05, "loss": 0.7722, "step": 776 }, { "epoch": 0.1663321827085173, "grad_norm": 0.24124228740960998, "learning_rate": 3.759015574441456e-05, "loss": 0.766, "step": 777 }, { "epoch": 0.1665462524417329, "grad_norm": 0.2633646596925862, "learning_rate": 3.75836852697558e-05, "loss": 0.7534, "step": 778 }, { "epoch": 0.1667603221749485, "grad_norm": 0.2912341873523871, "learning_rate": 3.7577206678500096e-05, "loss": 0.7741, "step": 779 }, { "epoch": 0.16697439190816407, "grad_norm": 0.2687135282669201, "learning_rate": 3.757071997363797e-05, "loss": 0.7641, "step": 780 }, { "epoch": 0.16718846164137968, "grad_norm": 0.23712231013906176, "learning_rate": 3.756422515816367e-05, "loss": 0.7386, "step": 781 }, { "epoch": 0.16740253137459526, "grad_norm": 0.2783664207712955, "learning_rate": 3.7557722235075185e-05, "loss": 0.7641, "step": 782 }, { "epoch": 0.16761660110781088, "grad_norm": 0.25178668122834624, "learning_rate": 3.7551211207374256e-05, "loss": 0.7674, "step": 783 }, { "epoch": 0.16783067084102646, "grad_norm": 0.24464602048954365, "learning_rate": 3.754469207806636e-05, "loss": 0.7471, "step": 784 }, { "epoch": 0.16804474057424207, "grad_norm": 0.25491223478402747, "learning_rate": 3.753816485016073e-05, "loss": 0.782, "step": 785 }, { "epoch": 0.16825881030745765, "grad_norm": 0.24290481349085874, "learning_rate": 3.7531629526670305e-05, "loss": 0.7449, "step": 786 }, { "epoch": 0.16847288004067326, "grad_norm": 0.25907191218936754, "learning_rate": 3.7525086110611775e-05, "loss": 0.7425, "step": 787 }, { "epoch": 0.16868694977388884, "grad_norm": 0.27513812843783825, "learning_rate": 3.751853460500559e-05, "loss": 0.7472, "step": 788 }, { "epoch": 0.16890101950710443, "grad_norm": 0.26492744335289636, "learning_rate": 3.751197501287589e-05, "loss": 0.7498, "step": 789 }, { "epoch": 0.16911508924032004, "grad_norm": 0.26303828189678313, "learning_rate": 3.750540733725059e-05, "loss": 0.7315, "step": 790 }, { "epoch": 0.16932915897353562, "grad_norm": 0.2214058090704647, "learning_rate": 3.74988315811613e-05, "loss": 0.7383, "step": 791 }, { "epoch": 0.16954322870675123, "grad_norm": 0.30065919368053895, "learning_rate": 3.749224774764339e-05, "loss": 0.7745, "step": 792 }, { "epoch": 0.16975729843996681, "grad_norm": 0.27094054827229336, "learning_rate": 3.748565583973594e-05, "loss": 0.7352, "step": 793 }, { "epoch": 0.16997136817318242, "grad_norm": 0.2542348275920184, "learning_rate": 3.747905586048176e-05, "loss": 0.7535, "step": 794 }, { "epoch": 0.170185437906398, "grad_norm": 0.24558353992266338, "learning_rate": 3.7472447812927395e-05, "loss": 0.7327, "step": 795 }, { "epoch": 0.17039950763961362, "grad_norm": 0.2351506672864148, "learning_rate": 3.74658317001231e-05, "loss": 0.7715, "step": 796 }, { "epoch": 0.1706135773728292, "grad_norm": 0.2758704299920849, "learning_rate": 3.745920752512287e-05, "loss": 0.7744, "step": 797 }, { "epoch": 0.17082764710604478, "grad_norm": 0.28828204667769974, "learning_rate": 3.7452575290984406e-05, "loss": 0.7693, "step": 798 }, { "epoch": 0.1710417168392604, "grad_norm": 0.2955914891504317, "learning_rate": 3.744593500076913e-05, "loss": 0.7772, "step": 799 }, { "epoch": 0.17125578657247598, "grad_norm": 0.2737615699555731, "learning_rate": 3.74392866575422e-05, "loss": 0.7657, "step": 800 }, { "epoch": 0.1714698563056916, "grad_norm": 0.26983392116272886, "learning_rate": 3.743263026437247e-05, "loss": 0.7412, "step": 801 }, { "epoch": 0.17168392603890717, "grad_norm": 0.29284983297353145, "learning_rate": 3.742596582433252e-05, "loss": 0.7595, "step": 802 }, { "epoch": 0.17189799577212278, "grad_norm": 0.26663836921476725, "learning_rate": 3.741929334049864e-05, "loss": 0.7386, "step": 803 }, { "epoch": 0.17211206550533836, "grad_norm": 0.2850204051932672, "learning_rate": 3.741261281595086e-05, "loss": 0.7635, "step": 804 }, { "epoch": 0.17232613523855395, "grad_norm": 0.27818601095540874, "learning_rate": 3.740592425377286e-05, "loss": 0.7637, "step": 805 }, { "epoch": 0.17254020497176956, "grad_norm": 0.24672279606204295, "learning_rate": 3.73992276570521e-05, "loss": 0.7389, "step": 806 }, { "epoch": 0.17275427470498514, "grad_norm": 0.25423393775313885, "learning_rate": 3.73925230288797e-05, "loss": 0.7545, "step": 807 }, { "epoch": 0.17296834443820075, "grad_norm": 0.3097737609761657, "learning_rate": 3.73858103723505e-05, "loss": 0.7623, "step": 808 }, { "epoch": 0.17318241417141633, "grad_norm": 0.2914051914001749, "learning_rate": 3.7379089690563064e-05, "loss": 0.7292, "step": 809 }, { "epoch": 0.17339648390463194, "grad_norm": 0.2569206855026681, "learning_rate": 3.7372360986619646e-05, "loss": 0.7872, "step": 810 }, { "epoch": 0.17361055363784753, "grad_norm": 0.2469593360745501, "learning_rate": 3.73656242636262e-05, "loss": 0.7776, "step": 811 }, { "epoch": 0.17382462337106314, "grad_norm": 0.24572005389612414, "learning_rate": 3.735887952469237e-05, "loss": 0.7504, "step": 812 }, { "epoch": 0.17403869310427872, "grad_norm": 0.2458790506465563, "learning_rate": 3.735212677293153e-05, "loss": 0.7499, "step": 813 }, { "epoch": 0.1742527628374943, "grad_norm": 0.23899682983295484, "learning_rate": 3.7345366011460746e-05, "loss": 0.7511, "step": 814 }, { "epoch": 0.1744668325707099, "grad_norm": 0.3699231564097912, "learning_rate": 3.733859724340076e-05, "loss": 0.7564, "step": 815 }, { "epoch": 0.1746809023039255, "grad_norm": 0.2875645194686169, "learning_rate": 3.733182047187602e-05, "loss": 0.782, "step": 816 }, { "epoch": 0.1748949720371411, "grad_norm": 0.2627392316870406, "learning_rate": 3.732503570001468e-05, "loss": 0.7841, "step": 817 }, { "epoch": 0.1751090417703567, "grad_norm": 0.23716566552296248, "learning_rate": 3.7318242930948575e-05, "loss": 0.755, "step": 818 }, { "epoch": 0.1753231115035723, "grad_norm": 0.28114635821103856, "learning_rate": 3.731144216781324e-05, "loss": 0.8051, "step": 819 }, { "epoch": 0.17553718123678788, "grad_norm": 0.3054026391376041, "learning_rate": 3.7304633413747885e-05, "loss": 0.7843, "step": 820 }, { "epoch": 0.1757512509700035, "grad_norm": 0.25992981601725024, "learning_rate": 3.7297816671895425e-05, "loss": 0.747, "step": 821 }, { "epoch": 0.17596532070321907, "grad_norm": 0.23811026243010816, "learning_rate": 3.7290991945402456e-05, "loss": 0.7748, "step": 822 }, { "epoch": 0.17617939043643466, "grad_norm": 0.2806740728248504, "learning_rate": 3.7284159237419255e-05, "loss": 0.7625, "step": 823 }, { "epoch": 0.17639346016965027, "grad_norm": 0.2676841422875386, "learning_rate": 3.727731855109979e-05, "loss": 0.7743, "step": 824 }, { "epoch": 0.17660752990286585, "grad_norm": 0.23284766500209506, "learning_rate": 3.7270469889601716e-05, "loss": 0.7365, "step": 825 }, { "epoch": 0.17682159963608146, "grad_norm": 0.2692311864758718, "learning_rate": 3.7263613256086346e-05, "loss": 0.753, "step": 826 }, { "epoch": 0.17703566936929704, "grad_norm": 0.2735512134769299, "learning_rate": 3.72567486537187e-05, "loss": 0.7305, "step": 827 }, { "epoch": 0.17724973910251265, "grad_norm": 0.26244627961838785, "learning_rate": 3.7249876085667474e-05, "loss": 0.7603, "step": 828 }, { "epoch": 0.17746380883572824, "grad_norm": 0.22792906030022086, "learning_rate": 3.7242995555105016e-05, "loss": 0.7482, "step": 829 }, { "epoch": 0.17767787856894382, "grad_norm": 0.2530232897181774, "learning_rate": 3.723610706520738e-05, "loss": 0.7588, "step": 830 }, { "epoch": 0.17789194830215943, "grad_norm": 0.2426994810142526, "learning_rate": 3.722921061915427e-05, "loss": 0.7429, "step": 831 }, { "epoch": 0.178106018035375, "grad_norm": 0.2891343066654247, "learning_rate": 3.722230622012908e-05, "loss": 0.7669, "step": 832 }, { "epoch": 0.17832008776859062, "grad_norm": 0.2659092479939417, "learning_rate": 3.721539387131886e-05, "loss": 0.7449, "step": 833 }, { "epoch": 0.1785341575018062, "grad_norm": 0.299275820855458, "learning_rate": 3.720847357591435e-05, "loss": 0.7485, "step": 834 }, { "epoch": 0.17874822723502182, "grad_norm": 0.31678348023318653, "learning_rate": 3.720154533710994e-05, "loss": 0.8065, "step": 835 }, { "epoch": 0.1789622969682374, "grad_norm": 0.274238580197539, "learning_rate": 3.719460915810368e-05, "loss": 0.7499, "step": 836 }, { "epoch": 0.179176366701453, "grad_norm": 0.2645299103700658, "learning_rate": 3.718766504209732e-05, "loss": 0.748, "step": 837 }, { "epoch": 0.1793904364346686, "grad_norm": 0.33810732073784494, "learning_rate": 3.718071299229624e-05, "loss": 0.749, "step": 838 }, { "epoch": 0.17960450616788418, "grad_norm": 0.2707034275854758, "learning_rate": 3.7173753011909484e-05, "loss": 0.7428, "step": 839 }, { "epoch": 0.1798185759010998, "grad_norm": 0.278007294454195, "learning_rate": 3.716678510414978e-05, "loss": 0.7931, "step": 840 }, { "epoch": 0.18003264563431537, "grad_norm": 0.2873841867788181, "learning_rate": 3.7159809272233503e-05, "loss": 0.7483, "step": 841 }, { "epoch": 0.18024671536753098, "grad_norm": 0.25165887340624554, "learning_rate": 3.715282551938067e-05, "loss": 0.7667, "step": 842 }, { "epoch": 0.18046078510074656, "grad_norm": 0.23882700215230773, "learning_rate": 3.714583384881498e-05, "loss": 0.7666, "step": 843 }, { "epoch": 0.18067485483396217, "grad_norm": 0.3095243256273488, "learning_rate": 3.713883426376377e-05, "loss": 0.773, "step": 844 }, { "epoch": 0.18088892456717776, "grad_norm": 0.24101944011500306, "learning_rate": 3.713182676745804e-05, "loss": 0.7478, "step": 845 }, { "epoch": 0.18110299430039337, "grad_norm": 0.2609131242953271, "learning_rate": 3.7124811363132434e-05, "loss": 0.7338, "step": 846 }, { "epoch": 0.18131706403360895, "grad_norm": 0.23794528277671756, "learning_rate": 3.711778805402525e-05, "loss": 0.7341, "step": 847 }, { "epoch": 0.18153113376682453, "grad_norm": 0.2563717398283204, "learning_rate": 3.711075684337844e-05, "loss": 0.794, "step": 848 }, { "epoch": 0.18174520350004014, "grad_norm": 0.24247069564237045, "learning_rate": 3.710371773443759e-05, "loss": 0.746, "step": 849 }, { "epoch": 0.18195927323325572, "grad_norm": 0.26129577521893677, "learning_rate": 3.7096670730451945e-05, "loss": 0.7789, "step": 850 }, { "epoch": 0.18217334296647134, "grad_norm": 0.2316369539031604, "learning_rate": 3.708961583467438e-05, "loss": 0.7647, "step": 851 }, { "epoch": 0.18238741269968692, "grad_norm": 0.2421032365965948, "learning_rate": 3.708255305036144e-05, "loss": 0.7452, "step": 852 }, { "epoch": 0.18260148243290253, "grad_norm": 0.24479496782932084, "learning_rate": 3.707548238077328e-05, "loss": 0.7607, "step": 853 }, { "epoch": 0.1828155521661181, "grad_norm": 0.26513015104086035, "learning_rate": 3.7068403829173705e-05, "loss": 0.7811, "step": 854 }, { "epoch": 0.1830296218993337, "grad_norm": 0.22550303237471955, "learning_rate": 3.7061317398830176e-05, "loss": 0.7651, "step": 855 }, { "epoch": 0.1832436916325493, "grad_norm": 0.24071132285626767, "learning_rate": 3.705422309301376e-05, "loss": 0.7447, "step": 856 }, { "epoch": 0.1834577613657649, "grad_norm": 0.24485261299010438, "learning_rate": 3.704712091499919e-05, "loss": 0.7489, "step": 857 }, { "epoch": 0.1836718310989805, "grad_norm": 0.21599402692306946, "learning_rate": 3.7040010868064814e-05, "loss": 0.7692, "step": 858 }, { "epoch": 0.18388590083219608, "grad_norm": 0.3574968477206158, "learning_rate": 3.703289295549261e-05, "loss": 0.7802, "step": 859 }, { "epoch": 0.1840999705654117, "grad_norm": 0.2416963853432307, "learning_rate": 3.702576718056819e-05, "loss": 0.751, "step": 860 }, { "epoch": 0.18431404029862727, "grad_norm": 0.263650749855753, "learning_rate": 3.7018633546580815e-05, "loss": 0.7514, "step": 861 }, { "epoch": 0.18452811003184288, "grad_norm": 0.2416012709858466, "learning_rate": 3.701149205682335e-05, "loss": 0.7518, "step": 862 }, { "epoch": 0.18474217976505847, "grad_norm": 0.2637897868977282, "learning_rate": 3.700434271459229e-05, "loss": 0.7673, "step": 863 }, { "epoch": 0.18495624949827405, "grad_norm": 0.2591986973480914, "learning_rate": 3.699718552318776e-05, "loss": 0.758, "step": 864 }, { "epoch": 0.18517031923148966, "grad_norm": 0.28373412976738566, "learning_rate": 3.69900204859135e-05, "loss": 0.7556, "step": 865 }, { "epoch": 0.18538438896470524, "grad_norm": 0.26644833684543134, "learning_rate": 3.698284760607689e-05, "loss": 0.733, "step": 866 }, { "epoch": 0.18559845869792085, "grad_norm": 0.2557223244454845, "learning_rate": 3.697566688698892e-05, "loss": 0.7916, "step": 867 }, { "epoch": 0.18581252843113644, "grad_norm": 0.25936331823578024, "learning_rate": 3.696847833196419e-05, "loss": 0.7466, "step": 868 }, { "epoch": 0.18602659816435205, "grad_norm": 0.24833805790191535, "learning_rate": 3.696128194432092e-05, "loss": 0.7475, "step": 869 }, { "epoch": 0.18624066789756763, "grad_norm": 0.24301701958171398, "learning_rate": 3.695407772738095e-05, "loss": 0.75, "step": 870 }, { "epoch": 0.18645473763078324, "grad_norm": 0.2547341313806687, "learning_rate": 3.6946865684469735e-05, "loss": 0.7487, "step": 871 }, { "epoch": 0.18666880736399882, "grad_norm": 0.3514402089251288, "learning_rate": 3.693964581891635e-05, "loss": 0.7556, "step": 872 }, { "epoch": 0.1868828770972144, "grad_norm": 0.26073534877205573, "learning_rate": 3.693241813405346e-05, "loss": 0.7769, "step": 873 }, { "epoch": 0.18709694683043002, "grad_norm": 0.2531253720240625, "learning_rate": 3.692518263321736e-05, "loss": 0.7515, "step": 874 }, { "epoch": 0.1873110165636456, "grad_norm": 0.253007191898049, "learning_rate": 3.691793931974793e-05, "loss": 0.762, "step": 875 }, { "epoch": 0.1875250862968612, "grad_norm": 0.23301058060597665, "learning_rate": 3.6910688196988685e-05, "loss": 0.7485, "step": 876 }, { "epoch": 0.1877391560300768, "grad_norm": 0.2451215601454093, "learning_rate": 3.690342926828673e-05, "loss": 0.758, "step": 877 }, { "epoch": 0.1879532257632924, "grad_norm": 0.2540061937078841, "learning_rate": 3.689616253699276e-05, "loss": 0.7562, "step": 878 }, { "epoch": 0.18816729549650799, "grad_norm": 0.2379510276033454, "learning_rate": 3.68888880064611e-05, "loss": 0.7295, "step": 879 }, { "epoch": 0.1883813652297236, "grad_norm": 0.2733224328561649, "learning_rate": 3.688160568004965e-05, "loss": 0.7238, "step": 880 }, { "epoch": 0.18859543496293918, "grad_norm": 0.2766041481780387, "learning_rate": 3.687431556111992e-05, "loss": 0.7542, "step": 881 }, { "epoch": 0.18880950469615476, "grad_norm": 0.295314325954768, "learning_rate": 3.686701765303701e-05, "loss": 0.8054, "step": 882 }, { "epoch": 0.18902357442937037, "grad_norm": 0.29212465129928555, "learning_rate": 3.685971195916963e-05, "loss": 0.7635, "step": 883 }, { "epoch": 0.18923764416258596, "grad_norm": 0.2773384531911246, "learning_rate": 3.685239848289008e-05, "loss": 0.747, "step": 884 }, { "epoch": 0.18945171389580157, "grad_norm": 0.3046770818327443, "learning_rate": 3.6845077227574234e-05, "loss": 0.7635, "step": 885 }, { "epoch": 0.18966578362901715, "grad_norm": 0.7223002240752847, "learning_rate": 3.683774819660158e-05, "loss": 0.7754, "step": 886 }, { "epoch": 0.18987985336223276, "grad_norm": 0.28505220492776684, "learning_rate": 3.683041139335518e-05, "loss": 0.7566, "step": 887 }, { "epoch": 0.19009392309544834, "grad_norm": 0.27583794145550233, "learning_rate": 3.682306682122168e-05, "loss": 0.7517, "step": 888 }, { "epoch": 0.19030799282866392, "grad_norm": 0.2555012836872675, "learning_rate": 3.681571448359135e-05, "loss": 0.782, "step": 889 }, { "epoch": 0.19052206256187953, "grad_norm": 0.30910545710081916, "learning_rate": 3.6808354383857983e-05, "loss": 0.7581, "step": 890 }, { "epoch": 0.19073613229509512, "grad_norm": 0.2651034955880483, "learning_rate": 3.680098652541901e-05, "loss": 0.7493, "step": 891 }, { "epoch": 0.19095020202831073, "grad_norm": 0.2513642505801275, "learning_rate": 3.6793610911675405e-05, "loss": 0.7579, "step": 892 }, { "epoch": 0.1911642717615263, "grad_norm": 0.27887751715847525, "learning_rate": 3.678622754603175e-05, "loss": 0.7508, "step": 893 }, { "epoch": 0.19137834149474192, "grad_norm": 0.27339848683232354, "learning_rate": 3.6778836431896184e-05, "loss": 0.7504, "step": 894 }, { "epoch": 0.1915924112279575, "grad_norm": 0.27590508160509225, "learning_rate": 3.677143757268043e-05, "loss": 0.7813, "step": 895 }, { "epoch": 0.19180648096117311, "grad_norm": 0.27287031621349384, "learning_rate": 3.676403097179981e-05, "loss": 0.7654, "step": 896 }, { "epoch": 0.1920205506943887, "grad_norm": 0.2731546157033016, "learning_rate": 3.675661663267317e-05, "loss": 0.7602, "step": 897 }, { "epoch": 0.19223462042760428, "grad_norm": 0.4328428167070174, "learning_rate": 3.674919455872297e-05, "loss": 0.7489, "step": 898 }, { "epoch": 0.1924486901608199, "grad_norm": 0.26609179205860484, "learning_rate": 3.6741764753375216e-05, "loss": 0.7878, "step": 899 }, { "epoch": 0.19266275989403547, "grad_norm": 0.2560585322095053, "learning_rate": 3.673432722005951e-05, "loss": 0.7692, "step": 900 }, { "epoch": 0.19287682962725108, "grad_norm": 0.27375377498463116, "learning_rate": 3.672688196220899e-05, "loss": 0.7435, "step": 901 }, { "epoch": 0.19309089936046667, "grad_norm": 0.24204721417261835, "learning_rate": 3.6719428983260364e-05, "loss": 0.7619, "step": 902 }, { "epoch": 0.19330496909368228, "grad_norm": 0.27963563350801673, "learning_rate": 3.6711968286653936e-05, "loss": 0.7871, "step": 903 }, { "epoch": 0.19351903882689786, "grad_norm": 0.2642109920435115, "learning_rate": 3.6704499875833536e-05, "loss": 0.7571, "step": 904 }, { "epoch": 0.19373310856011347, "grad_norm": 0.3544081292478254, "learning_rate": 3.669702375424658e-05, "loss": 0.7406, "step": 905 }, { "epoch": 0.19394717829332905, "grad_norm": 0.3042454254833041, "learning_rate": 3.668953992534402e-05, "loss": 0.7371, "step": 906 }, { "epoch": 0.19416124802654464, "grad_norm": 0.25046822229158755, "learning_rate": 3.668204839258038e-05, "loss": 0.7471, "step": 907 }, { "epoch": 0.19437531775976025, "grad_norm": 0.2870686541223231, "learning_rate": 3.667454915941373e-05, "loss": 0.7685, "step": 908 }, { "epoch": 0.19458938749297583, "grad_norm": 0.2508346151097107, "learning_rate": 3.6667042229305725e-05, "loss": 0.7228, "step": 909 }, { "epoch": 0.19480345722619144, "grad_norm": 0.5901600838533766, "learning_rate": 3.665952760572154e-05, "loss": 0.7617, "step": 910 }, { "epoch": 0.19501752695940702, "grad_norm": 0.29949620098684454, "learning_rate": 3.6652005292129894e-05, "loss": 0.7458, "step": 911 }, { "epoch": 0.19523159669262263, "grad_norm": 1.255403538904062, "learning_rate": 3.66444752920031e-05, "loss": 0.7635, "step": 912 }, { "epoch": 0.19544566642583822, "grad_norm": 0.2671125999944408, "learning_rate": 3.6636937608816975e-05, "loss": 0.7467, "step": 913 }, { "epoch": 0.1956597361590538, "grad_norm": 0.3126725141894936, "learning_rate": 3.662939224605091e-05, "loss": 0.7595, "step": 914 }, { "epoch": 0.1958738058922694, "grad_norm": 0.8565454077615011, "learning_rate": 3.662183920718782e-05, "loss": 0.8323, "step": 915 }, { "epoch": 0.196087875625485, "grad_norm": 0.3999468363425481, "learning_rate": 3.661427849571418e-05, "loss": 0.7466, "step": 916 }, { "epoch": 0.1963019453587006, "grad_norm": 0.30723734570549177, "learning_rate": 3.660671011512e-05, "loss": 0.7205, "step": 917 }, { "epoch": 0.19651601509191619, "grad_norm": 0.25734326947114644, "learning_rate": 3.659913406889883e-05, "loss": 0.7595, "step": 918 }, { "epoch": 0.1967300848251318, "grad_norm": 0.29978089982244505, "learning_rate": 3.659155036054777e-05, "loss": 0.7536, "step": 919 }, { "epoch": 0.19694415455834738, "grad_norm": 0.279094319424851, "learning_rate": 3.6583958993567424e-05, "loss": 0.7958, "step": 920 }, { "epoch": 0.197158224291563, "grad_norm": 0.24255895338189498, "learning_rate": 3.657635997146197e-05, "loss": 0.7548, "step": 921 }, { "epoch": 0.19737229402477857, "grad_norm": 0.2701795076574947, "learning_rate": 3.6568753297739094e-05, "loss": 0.7678, "step": 922 }, { "epoch": 0.19758636375799415, "grad_norm": 0.962851927144585, "learning_rate": 3.656113897591003e-05, "loss": 0.7494, "step": 923 }, { "epoch": 0.19780043349120977, "grad_norm": 0.44179348600186596, "learning_rate": 3.655351700948953e-05, "loss": 0.7625, "step": 924 }, { "epoch": 0.19801450322442535, "grad_norm": 0.2589381596936857, "learning_rate": 3.654588740199588e-05, "loss": 0.7768, "step": 925 }, { "epoch": 0.19822857295764096, "grad_norm": 0.25911085272735385, "learning_rate": 3.653825015695089e-05, "loss": 0.7321, "step": 926 }, { "epoch": 0.19844264269085654, "grad_norm": 0.24885041775075306, "learning_rate": 3.65306052778799e-05, "loss": 0.7487, "step": 927 }, { "epoch": 0.19865671242407215, "grad_norm": 0.2556786113182652, "learning_rate": 3.652295276831178e-05, "loss": 0.7801, "step": 928 }, { "epoch": 0.19887078215728773, "grad_norm": 0.26634249596644843, "learning_rate": 3.651529263177891e-05, "loss": 0.7329, "step": 929 }, { "epoch": 0.19908485189050334, "grad_norm": 0.2732515080303684, "learning_rate": 3.6507624871817194e-05, "loss": 0.7481, "step": 930 }, { "epoch": 0.19929892162371893, "grad_norm": 0.2672716464049646, "learning_rate": 3.6499949491966046e-05, "loss": 0.7448, "step": 931 }, { "epoch": 0.1995129913569345, "grad_norm": 0.2568881179297147, "learning_rate": 3.649226649576843e-05, "loss": 0.77, "step": 932 }, { "epoch": 0.19972706109015012, "grad_norm": 0.23810961527881044, "learning_rate": 3.6484575886770784e-05, "loss": 0.749, "step": 933 }, { "epoch": 0.1999411308233657, "grad_norm": 0.25038064244934716, "learning_rate": 3.647687766852308e-05, "loss": 0.7666, "step": 934 }, { "epoch": 0.20015520055658131, "grad_norm": 0.2283222081414046, "learning_rate": 3.6469171844578815e-05, "loss": 0.7702, "step": 935 }, { "epoch": 0.2003692702897969, "grad_norm": 0.25814551947403014, "learning_rate": 3.6461458418494966e-05, "loss": 0.7512, "step": 936 }, { "epoch": 0.2005833400230125, "grad_norm": 0.2821156323407736, "learning_rate": 3.645373739383205e-05, "loss": 0.7567, "step": 937 }, { "epoch": 0.2007974097562281, "grad_norm": 0.29502512710151657, "learning_rate": 3.6446008774154075e-05, "loss": 0.7529, "step": 938 }, { "epoch": 0.20101147948944367, "grad_norm": 0.2371006541241456, "learning_rate": 3.643827256302855e-05, "loss": 0.7348, "step": 939 }, { "epoch": 0.20122554922265928, "grad_norm": 0.28220834460938493, "learning_rate": 3.64305287640265e-05, "loss": 0.7491, "step": 940 }, { "epoch": 0.20143961895587487, "grad_norm": 0.2743393530532695, "learning_rate": 3.642277738072246e-05, "loss": 0.7659, "step": 941 }, { "epoch": 0.20165368868909048, "grad_norm": 0.26341136279409577, "learning_rate": 3.6415018416694435e-05, "loss": 0.7735, "step": 942 }, { "epoch": 0.20186775842230606, "grad_norm": 0.23857262810112115, "learning_rate": 3.640725187552396e-05, "loss": 0.7051, "step": 943 }, { "epoch": 0.20208182815552167, "grad_norm": 0.2588822104766973, "learning_rate": 3.6399477760796055e-05, "loss": 0.7353, "step": 944 }, { "epoch": 0.20229589788873725, "grad_norm": 0.25409577229517644, "learning_rate": 3.639169607609924e-05, "loss": 0.7626, "step": 945 }, { "epoch": 0.20250996762195286, "grad_norm": 0.22614850090788918, "learning_rate": 3.638390682502552e-05, "loss": 0.7693, "step": 946 }, { "epoch": 0.20272403735516845, "grad_norm": 0.2219331573913887, "learning_rate": 3.63761100111704e-05, "loss": 0.7504, "step": 947 }, { "epoch": 0.20293810708838403, "grad_norm": 0.2527122225830614, "learning_rate": 3.636830563813287e-05, "loss": 0.7292, "step": 948 }, { "epoch": 0.20315217682159964, "grad_norm": 0.25888609447841554, "learning_rate": 3.6360493709515427e-05, "loss": 0.7933, "step": 949 }, { "epoch": 0.20336624655481522, "grad_norm": 0.22355108902056006, "learning_rate": 3.635267422892404e-05, "loss": 0.7555, "step": 950 }, { "epoch": 0.20358031628803083, "grad_norm": 0.2477643513389135, "learning_rate": 3.634484719996816e-05, "loss": 0.724, "step": 951 }, { "epoch": 0.20379438602124642, "grad_norm": 0.26711413869833667, "learning_rate": 3.6337012626260736e-05, "loss": 0.7214, "step": 952 }, { "epoch": 0.20400845575446203, "grad_norm": 0.2518356078233925, "learning_rate": 3.632917051141818e-05, "loss": 0.7631, "step": 953 }, { "epoch": 0.2042225254876776, "grad_norm": 0.2727448789347231, "learning_rate": 3.632132085906042e-05, "loss": 0.736, "step": 954 }, { "epoch": 0.20443659522089322, "grad_norm": 0.2564131594299474, "learning_rate": 3.631346367281082e-05, "loss": 0.7667, "step": 955 }, { "epoch": 0.2046506649541088, "grad_norm": 0.2456704570702374, "learning_rate": 3.6305598956296255e-05, "loss": 0.7582, "step": 956 }, { "epoch": 0.20486473468732438, "grad_norm": 0.24376627673782, "learning_rate": 3.6297726713147065e-05, "loss": 0.759, "step": 957 }, { "epoch": 0.20507880442054, "grad_norm": 0.21743130219262322, "learning_rate": 3.628984694699705e-05, "loss": 0.7407, "step": 958 }, { "epoch": 0.20529287415375558, "grad_norm": 0.2554600431871466, "learning_rate": 3.6281959661483506e-05, "loss": 0.7333, "step": 959 }, { "epoch": 0.2055069438869712, "grad_norm": 0.23795030565121542, "learning_rate": 3.627406486024719e-05, "loss": 0.7686, "step": 960 }, { "epoch": 0.20572101362018677, "grad_norm": 0.23460618633053193, "learning_rate": 3.626616254693233e-05, "loss": 0.7608, "step": 961 }, { "epoch": 0.20593508335340238, "grad_norm": 0.36219050008528314, "learning_rate": 3.6258252725186614e-05, "loss": 0.7727, "step": 962 }, { "epoch": 0.20614915308661796, "grad_norm": 0.28802229226357035, "learning_rate": 3.6250335398661196e-05, "loss": 0.754, "step": 963 }, { "epoch": 0.20636322281983355, "grad_norm": 0.22913800174835353, "learning_rate": 3.6242410571010705e-05, "loss": 0.741, "step": 964 }, { "epoch": 0.20657729255304916, "grad_norm": 0.2162083975992117, "learning_rate": 3.623447824589323e-05, "loss": 0.7301, "step": 965 }, { "epoch": 0.20679136228626474, "grad_norm": 0.24966914276568036, "learning_rate": 3.6226538426970315e-05, "loss": 0.7288, "step": 966 }, { "epoch": 0.20700543201948035, "grad_norm": 0.26811401307725996, "learning_rate": 3.621859111790696e-05, "loss": 0.7704, "step": 967 }, { "epoch": 0.20721950175269593, "grad_norm": 0.8554616315407051, "learning_rate": 3.621063632237164e-05, "loss": 0.7557, "step": 968 }, { "epoch": 0.20743357148591154, "grad_norm": 0.26537345378866023, "learning_rate": 3.620267404403627e-05, "loss": 0.7481, "step": 969 }, { "epoch": 0.20764764121912713, "grad_norm": 0.265563365564281, "learning_rate": 3.619470428657622e-05, "loss": 0.7624, "step": 970 }, { "epoch": 0.20786171095234274, "grad_norm": 0.25337546459776217, "learning_rate": 3.6186727053670316e-05, "loss": 0.7434, "step": 971 }, { "epoch": 0.20807578068555832, "grad_norm": 0.28627342771657804, "learning_rate": 3.617874234900083e-05, "loss": 0.7776, "step": 972 }, { "epoch": 0.2082898504187739, "grad_norm": 0.290219753909199, "learning_rate": 3.61707501762535e-05, "loss": 0.7703, "step": 973 }, { "epoch": 0.2085039201519895, "grad_norm": 0.28497728133674854, "learning_rate": 3.616275053911749e-05, "loss": 0.7801, "step": 974 }, { "epoch": 0.2087179898852051, "grad_norm": 0.2767530667311987, "learning_rate": 3.615474344128542e-05, "loss": 0.7442, "step": 975 }, { "epoch": 0.2089320596184207, "grad_norm": 0.26315175959980436, "learning_rate": 3.614672888645334e-05, "loss": 0.7675, "step": 976 }, { "epoch": 0.2091461293516363, "grad_norm": 0.26937821442955023, "learning_rate": 3.6138706878320775e-05, "loss": 0.7707, "step": 977 }, { "epoch": 0.2093601990848519, "grad_norm": 0.3039189057189529, "learning_rate": 3.613067742059065e-05, "loss": 0.7409, "step": 978 }, { "epoch": 0.20957426881806748, "grad_norm": 0.3009097585087098, "learning_rate": 3.6122640516969356e-05, "loss": 0.7627, "step": 979 }, { "epoch": 0.2097883385512831, "grad_norm": 0.2544442419583323, "learning_rate": 3.611459617116672e-05, "loss": 0.7447, "step": 980 }, { "epoch": 0.21000240828449868, "grad_norm": 0.23778116952129275, "learning_rate": 3.610654438689598e-05, "loss": 0.7272, "step": 981 }, { "epoch": 0.21021647801771426, "grad_norm": 0.2613464568658064, "learning_rate": 3.6098485167873845e-05, "loss": 0.7364, "step": 982 }, { "epoch": 0.21043054775092987, "grad_norm": 0.30803820816106064, "learning_rate": 3.609041851782042e-05, "loss": 0.7228, "step": 983 }, { "epoch": 0.21064461748414545, "grad_norm": 0.3056767252526709, "learning_rate": 3.608234444045927e-05, "loss": 0.7369, "step": 984 }, { "epoch": 0.21085868721736106, "grad_norm": 0.23492053334567353, "learning_rate": 3.6074262939517355e-05, "loss": 0.7333, "step": 985 }, { "epoch": 0.21107275695057665, "grad_norm": 0.2689515273251663, "learning_rate": 3.60661740187251e-05, "loss": 0.7374, "step": 986 }, { "epoch": 0.21128682668379226, "grad_norm": 0.2895333948928181, "learning_rate": 3.605807768181633e-05, "loss": 0.743, "step": 987 }, { "epoch": 0.21150089641700784, "grad_norm": 0.2549851894277088, "learning_rate": 3.604997393252829e-05, "loss": 0.7273, "step": 988 }, { "epoch": 0.21171496615022342, "grad_norm": 0.22971103325219708, "learning_rate": 3.604186277460166e-05, "loss": 0.743, "step": 989 }, { "epoch": 0.21192903588343903, "grad_norm": 0.2710269586461271, "learning_rate": 3.603374421178055e-05, "loss": 0.7235, "step": 990 }, { "epoch": 0.21214310561665461, "grad_norm": 0.29461367252677395, "learning_rate": 3.602561824781246e-05, "loss": 0.7739, "step": 991 }, { "epoch": 0.21235717534987023, "grad_norm": 0.23895229336665744, "learning_rate": 3.601748488644832e-05, "loss": 0.7634, "step": 992 }, { "epoch": 0.2125712450830858, "grad_norm": 0.2593252880595089, "learning_rate": 3.600934413144248e-05, "loss": 0.7561, "step": 993 }, { "epoch": 0.21278531481630142, "grad_norm": 0.2849973035192068, "learning_rate": 3.6001195986552694e-05, "loss": 0.7429, "step": 994 }, { "epoch": 0.212999384549517, "grad_norm": 0.24028559386334664, "learning_rate": 3.5993040455540135e-05, "loss": 0.7512, "step": 995 }, { "epoch": 0.2132134542827326, "grad_norm": 0.26559823564831764, "learning_rate": 3.5984877542169376e-05, "loss": 0.7224, "step": 996 }, { "epoch": 0.2134275240159482, "grad_norm": 0.2745250932675374, "learning_rate": 3.59767072502084e-05, "loss": 0.7631, "step": 997 }, { "epoch": 0.21364159374916378, "grad_norm": 0.24741598704199386, "learning_rate": 3.596852958342861e-05, "loss": 0.7256, "step": 998 }, { "epoch": 0.2138556634823794, "grad_norm": 0.24734191368592298, "learning_rate": 3.5960344545604796e-05, "loss": 0.7596, "step": 999 }, { "epoch": 0.21406973321559497, "grad_norm": 10.092858210617589, "learning_rate": 3.595215214051515e-05, "loss": 0.746, "step": 1000 }, { "epoch": 0.21428380294881058, "grad_norm": 0.4171261524859099, "learning_rate": 3.594395237194128e-05, "loss": 0.7935, "step": 1001 }, { "epoch": 0.21449787268202616, "grad_norm": 0.46178238096671537, "learning_rate": 3.593574524366819e-05, "loss": 0.7595, "step": 1002 }, { "epoch": 0.21471194241524177, "grad_norm": 0.35196861929128975, "learning_rate": 3.592753075948426e-05, "loss": 0.7435, "step": 1003 }, { "epoch": 0.21492601214845736, "grad_norm": 0.36167115218197843, "learning_rate": 3.5919308923181286e-05, "loss": 0.7605, "step": 1004 }, { "epoch": 0.21514008188167297, "grad_norm": 0.3696739643849057, "learning_rate": 3.591107973855445e-05, "loss": 0.7451, "step": 1005 }, { "epoch": 0.21535415161488855, "grad_norm": 0.37838772822659933, "learning_rate": 3.590284320940235e-05, "loss": 0.748, "step": 1006 }, { "epoch": 0.21556822134810413, "grad_norm": 0.31589052796646483, "learning_rate": 3.589459933952692e-05, "loss": 0.7552, "step": 1007 }, { "epoch": 0.21578229108131974, "grad_norm": 0.385105987314194, "learning_rate": 3.588634813273354e-05, "loss": 0.741, "step": 1008 }, { "epoch": 0.21599636081453533, "grad_norm": 0.38260827645461054, "learning_rate": 3.587808959283094e-05, "loss": 0.7506, "step": 1009 }, { "epoch": 0.21621043054775094, "grad_norm": 0.33440889188332873, "learning_rate": 3.586982372363125e-05, "loss": 0.7327, "step": 1010 }, { "epoch": 0.21642450028096652, "grad_norm": 0.29313029392688494, "learning_rate": 3.586155052894998e-05, "loss": 0.7469, "step": 1011 }, { "epoch": 0.21663857001418213, "grad_norm": 0.340638229924971, "learning_rate": 3.585327001260602e-05, "loss": 0.7532, "step": 1012 }, { "epoch": 0.2168526397473977, "grad_norm": 0.3544562477711959, "learning_rate": 3.5844982178421646e-05, "loss": 0.7754, "step": 1013 }, { "epoch": 0.21706670948061332, "grad_norm": 0.3006330074376759, "learning_rate": 3.58366870302225e-05, "loss": 0.7742, "step": 1014 }, { "epoch": 0.2172807792138289, "grad_norm": 0.28236810688192665, "learning_rate": 3.5828384571837615e-05, "loss": 0.7257, "step": 1015 }, { "epoch": 0.2174948489470445, "grad_norm": 0.3736260433358128, "learning_rate": 3.582007480709939e-05, "loss": 0.7403, "step": 1016 }, { "epoch": 0.2177089186802601, "grad_norm": 0.34295859341269685, "learning_rate": 3.581175773984359e-05, "loss": 0.7507, "step": 1017 }, { "epoch": 0.21792298841347568, "grad_norm": 0.28018933171639143, "learning_rate": 3.580343337390935e-05, "loss": 0.7321, "step": 1018 }, { "epoch": 0.2181370581466913, "grad_norm": 0.3285608573750389, "learning_rate": 3.5795101713139205e-05, "loss": 0.7501, "step": 1019 }, { "epoch": 0.21835112787990688, "grad_norm": 0.3299448479539171, "learning_rate": 3.578676276137903e-05, "loss": 0.7532, "step": 1020 }, { "epoch": 0.21856519761312249, "grad_norm": 0.277610694652717, "learning_rate": 3.577841652247805e-05, "loss": 0.7319, "step": 1021 }, { "epoch": 0.21877926734633807, "grad_norm": 0.24757180785673524, "learning_rate": 3.5770063000288896e-05, "loss": 0.711, "step": 1022 }, { "epoch": 0.21899333707955365, "grad_norm": 0.28273722392178796, "learning_rate": 3.5761702198667525e-05, "loss": 0.7578, "step": 1023 }, { "epoch": 0.21920740681276926, "grad_norm": 0.26298230247381893, "learning_rate": 3.5753334121473275e-05, "loss": 0.7492, "step": 1024 }, { "epoch": 0.21942147654598484, "grad_norm": 0.25583233500336755, "learning_rate": 3.574495877256883e-05, "loss": 0.739, "step": 1025 }, { "epoch": 0.21963554627920046, "grad_norm": 0.2898634519620882, "learning_rate": 3.5736576155820236e-05, "loss": 0.7418, "step": 1026 }, { "epoch": 0.21984961601241604, "grad_norm": 0.25997297043422357, "learning_rate": 3.57281862750969e-05, "loss": 0.7487, "step": 1027 }, { "epoch": 0.22006368574563165, "grad_norm": 0.23053881246498512, "learning_rate": 3.571978913427157e-05, "loss": 0.7253, "step": 1028 }, { "epoch": 0.22027775547884723, "grad_norm": 0.26850485101261434, "learning_rate": 3.5711384737220345e-05, "loss": 0.7384, "step": 1029 }, { "epoch": 0.22049182521206284, "grad_norm": 0.24457392835460862, "learning_rate": 3.570297308782269e-05, "loss": 0.7264, "step": 1030 }, { "epoch": 0.22070589494527842, "grad_norm": 0.23022578083012712, "learning_rate": 3.5694554189961405e-05, "loss": 0.738, "step": 1031 }, { "epoch": 0.220919964678494, "grad_norm": 0.2525301599694607, "learning_rate": 3.5686128047522635e-05, "loss": 0.7138, "step": 1032 }, { "epoch": 0.22113403441170962, "grad_norm": 0.2437538317226544, "learning_rate": 3.567769466439588e-05, "loss": 0.7111, "step": 1033 }, { "epoch": 0.2213481041449252, "grad_norm": 0.24709264591753685, "learning_rate": 3.5669254044473954e-05, "loss": 0.7323, "step": 1034 }, { "epoch": 0.2215621738781408, "grad_norm": 0.24310991049521027, "learning_rate": 3.5660806191653055e-05, "loss": 0.7295, "step": 1035 }, { "epoch": 0.2217762436113564, "grad_norm": 0.22807514507682305, "learning_rate": 3.565235110983268e-05, "loss": 0.741, "step": 1036 }, { "epoch": 0.221990313344572, "grad_norm": 0.26964715143263146, "learning_rate": 3.564388880291569e-05, "loss": 0.7484, "step": 1037 }, { "epoch": 0.2222043830777876, "grad_norm": 0.2554835794594098, "learning_rate": 3.5635419274808266e-05, "loss": 0.7637, "step": 1038 }, { "epoch": 0.2224184528110032, "grad_norm": 0.24403499735322062, "learning_rate": 3.5626942529419916e-05, "loss": 0.7457, "step": 1039 }, { "epoch": 0.22263252254421878, "grad_norm": 0.2416178368600129, "learning_rate": 3.5618458570663515e-05, "loss": 0.7507, "step": 1040 }, { "epoch": 0.22284659227743436, "grad_norm": 0.21805288171407658, "learning_rate": 3.5609967402455226e-05, "loss": 0.735, "step": 1041 }, { "epoch": 0.22306066201064997, "grad_norm": 0.24543246850912478, "learning_rate": 3.560146902871455e-05, "loss": 0.7413, "step": 1042 }, { "epoch": 0.22327473174386556, "grad_norm": 0.22441315685460572, "learning_rate": 3.559296345336433e-05, "loss": 0.7484, "step": 1043 }, { "epoch": 0.22348880147708117, "grad_norm": 0.2338048943893594, "learning_rate": 3.558445068033074e-05, "loss": 0.7277, "step": 1044 }, { "epoch": 0.22370287121029675, "grad_norm": 0.2605556960844784, "learning_rate": 3.557593071354323e-05, "loss": 0.7409, "step": 1045 }, { "epoch": 0.22391694094351236, "grad_norm": 0.23718871838302671, "learning_rate": 3.556740355693462e-05, "loss": 0.7974, "step": 1046 }, { "epoch": 0.22413101067672794, "grad_norm": 0.20936316183624143, "learning_rate": 3.5558869214441025e-05, "loss": 0.7436, "step": 1047 }, { "epoch": 0.22434508040994353, "grad_norm": 0.25239905632152304, "learning_rate": 3.555032769000188e-05, "loss": 0.7661, "step": 1048 }, { "epoch": 0.22455915014315914, "grad_norm": 0.5191686141846192, "learning_rate": 3.554177898755994e-05, "loss": 0.7506, "step": 1049 }, { "epoch": 0.22477321987637472, "grad_norm": 0.24494503867317957, "learning_rate": 3.5533223111061276e-05, "loss": 0.7437, "step": 1050 }, { "epoch": 0.22498728960959033, "grad_norm": 0.22260914180330765, "learning_rate": 3.552466006445525e-05, "loss": 0.705, "step": 1051 }, { "epoch": 0.2252013593428059, "grad_norm": 0.2409119289875767, "learning_rate": 3.551608985169456e-05, "loss": 0.7392, "step": 1052 }, { "epoch": 0.22541542907602152, "grad_norm": 0.23037441671075173, "learning_rate": 3.55075124767352e-05, "loss": 0.7556, "step": 1053 }, { "epoch": 0.2256294988092371, "grad_norm": 0.2413821715606796, "learning_rate": 3.549892794353647e-05, "loss": 0.7594, "step": 1054 }, { "epoch": 0.22584356854245272, "grad_norm": 0.23296370989555829, "learning_rate": 3.549033625606097e-05, "loss": 0.7523, "step": 1055 }, { "epoch": 0.2260576382756683, "grad_norm": 0.23123454344750505, "learning_rate": 3.548173741827461e-05, "loss": 0.7588, "step": 1056 }, { "epoch": 0.22627170800888388, "grad_norm": 0.3488286736625951, "learning_rate": 3.54731314341466e-05, "loss": 0.7225, "step": 1057 }, { "epoch": 0.2264857777420995, "grad_norm": 0.22198341459277993, "learning_rate": 3.546451830764944e-05, "loss": 0.7514, "step": 1058 }, { "epoch": 0.22669984747531507, "grad_norm": 0.25979761376278093, "learning_rate": 3.545589804275894e-05, "loss": 0.77, "step": 1059 }, { "epoch": 0.22691391720853069, "grad_norm": 0.24466064570140464, "learning_rate": 3.5447270643454196e-05, "loss": 0.7741, "step": 1060 }, { "epoch": 0.22712798694174627, "grad_norm": 0.28949037020711366, "learning_rate": 3.5438636113717604e-05, "loss": 0.7701, "step": 1061 }, { "epoch": 0.22734205667496188, "grad_norm": 0.24924127205366303, "learning_rate": 3.542999445753485e-05, "loss": 0.7349, "step": 1062 }, { "epoch": 0.22755612640817746, "grad_norm": 0.24463955301015564, "learning_rate": 3.5421345678894883e-05, "loss": 0.7377, "step": 1063 }, { "epoch": 0.22777019614139307, "grad_norm": 0.24132365114750715, "learning_rate": 3.5412689781789994e-05, "loss": 0.7447, "step": 1064 }, { "epoch": 0.22798426587460865, "grad_norm": 0.2393135182760011, "learning_rate": 3.540402677021571e-05, "loss": 0.7536, "step": 1065 }, { "epoch": 0.22819833560782424, "grad_norm": 0.23470436314271398, "learning_rate": 3.539535664817087e-05, "loss": 0.7356, "step": 1066 }, { "epoch": 0.22841240534103985, "grad_norm": 0.24991603565251896, "learning_rate": 3.538667941965758e-05, "loss": 0.7471, "step": 1067 }, { "epoch": 0.22862647507425543, "grad_norm": 0.2510669109647726, "learning_rate": 3.537799508868124e-05, "loss": 0.7428, "step": 1068 }, { "epoch": 0.22884054480747104, "grad_norm": 0.23343415846091617, "learning_rate": 3.5369303659250515e-05, "loss": 0.7624, "step": 1069 }, { "epoch": 0.22905461454068662, "grad_norm": 0.276998861185143, "learning_rate": 3.5360605135377354e-05, "loss": 0.7527, "step": 1070 }, { "epoch": 0.22926868427390223, "grad_norm": 0.2462153525809238, "learning_rate": 3.535189952107699e-05, "loss": 0.7373, "step": 1071 }, { "epoch": 0.22948275400711782, "grad_norm": 0.2238843819915132, "learning_rate": 3.53431868203679e-05, "loss": 0.7281, "step": 1072 }, { "epoch": 0.2296968237403334, "grad_norm": 0.23209022838278037, "learning_rate": 3.5334467037271864e-05, "loss": 0.7591, "step": 1073 }, { "epoch": 0.229910893473549, "grad_norm": 0.24566729465175108, "learning_rate": 3.5325740175813915e-05, "loss": 0.7503, "step": 1074 }, { "epoch": 0.2301249632067646, "grad_norm": 0.22745802495833817, "learning_rate": 3.5317006240022355e-05, "loss": 0.7498, "step": 1075 }, { "epoch": 0.2303390329399802, "grad_norm": 0.24769220269180708, "learning_rate": 3.5308265233928755e-05, "loss": 0.7042, "step": 1076 }, { "epoch": 0.2305531026731958, "grad_norm": 0.24851444239254405, "learning_rate": 3.529951716156794e-05, "loss": 0.7367, "step": 1077 }, { "epoch": 0.2307671724064114, "grad_norm": 0.23598212975741154, "learning_rate": 3.529076202697802e-05, "loss": 0.7306, "step": 1078 }, { "epoch": 0.23098124213962698, "grad_norm": 0.22106312349882618, "learning_rate": 3.528199983420033e-05, "loss": 0.7296, "step": 1079 }, { "epoch": 0.2311953118728426, "grad_norm": 0.23293359636391545, "learning_rate": 3.52732305872795e-05, "loss": 0.7326, "step": 1080 }, { "epoch": 0.23140938160605817, "grad_norm": 0.25325870616664814, "learning_rate": 3.526445429026338e-05, "loss": 0.7302, "step": 1081 }, { "epoch": 0.23162345133927376, "grad_norm": 0.23651232115232096, "learning_rate": 3.5255670947203104e-05, "loss": 0.7575, "step": 1082 }, { "epoch": 0.23183752107248937, "grad_norm": 0.24854842349869274, "learning_rate": 3.5246880562153055e-05, "loss": 0.7544, "step": 1083 }, { "epoch": 0.23205159080570495, "grad_norm": 0.21848220913183314, "learning_rate": 3.523808313917084e-05, "loss": 0.7533, "step": 1084 }, { "epoch": 0.23226566053892056, "grad_norm": 0.2430693946130431, "learning_rate": 3.5229278682317346e-05, "loss": 0.7264, "step": 1085 }, { "epoch": 0.23247973027213614, "grad_norm": 0.22773842855288573, "learning_rate": 3.522046719565669e-05, "loss": 0.7094, "step": 1086 }, { "epoch": 0.23269380000535175, "grad_norm": 0.23527288317600056, "learning_rate": 3.521164868325624e-05, "loss": 0.7344, "step": 1087 }, { "epoch": 0.23290786973856734, "grad_norm": 0.23204699437866774, "learning_rate": 3.52028231491866e-05, "loss": 0.7322, "step": 1088 }, { "epoch": 0.23312193947178295, "grad_norm": 0.3894003861294942, "learning_rate": 3.519399059752163e-05, "loss": 0.7576, "step": 1089 }, { "epoch": 0.23333600920499853, "grad_norm": 0.22807525846828683, "learning_rate": 3.5185151032338406e-05, "loss": 0.7254, "step": 1090 }, { "epoch": 0.2335500789382141, "grad_norm": 0.24709398073098707, "learning_rate": 3.517630445771727e-05, "loss": 0.7501, "step": 1091 }, { "epoch": 0.23376414867142972, "grad_norm": 0.2883078871345372, "learning_rate": 3.516745087774177e-05, "loss": 0.7511, "step": 1092 }, { "epoch": 0.2339782184046453, "grad_norm": 0.2400295529465924, "learning_rate": 3.515859029649872e-05, "loss": 0.7392, "step": 1093 }, { "epoch": 0.23419228813786092, "grad_norm": 0.27051740398227936, "learning_rate": 3.514972271807813e-05, "loss": 0.7382, "step": 1094 }, { "epoch": 0.2344063578710765, "grad_norm": 0.220551996922905, "learning_rate": 3.514084814657327e-05, "loss": 0.7117, "step": 1095 }, { "epoch": 0.2346204276042921, "grad_norm": 0.28698525292874566, "learning_rate": 3.513196658608062e-05, "loss": 0.7352, "step": 1096 }, { "epoch": 0.2348344973375077, "grad_norm": 0.25692057922391903, "learning_rate": 3.5123078040699895e-05, "loss": 0.7169, "step": 1097 }, { "epoch": 0.23504856707072327, "grad_norm": 0.21995943099729548, "learning_rate": 3.511418251453403e-05, "loss": 0.7453, "step": 1098 }, { "epoch": 0.23526263680393888, "grad_norm": 0.2812453485865409, "learning_rate": 3.5105280011689186e-05, "loss": 0.7586, "step": 1099 }, { "epoch": 0.23547670653715447, "grad_norm": 0.26061041513055433, "learning_rate": 3.5096370536274736e-05, "loss": 0.7757, "step": 1100 }, { "epoch": 0.23569077627037008, "grad_norm": 0.23762390163994687, "learning_rate": 3.5087454092403285e-05, "loss": 0.739, "step": 1101 }, { "epoch": 0.23590484600358566, "grad_norm": 0.23390835824020367, "learning_rate": 3.507853068419064e-05, "loss": 0.7727, "step": 1102 }, { "epoch": 0.23611891573680127, "grad_norm": 0.24197047861128176, "learning_rate": 3.506960031575584e-05, "loss": 0.7228, "step": 1103 }, { "epoch": 0.23633298547001685, "grad_norm": 0.23622843382472056, "learning_rate": 3.5060662991221113e-05, "loss": 0.7552, "step": 1104 }, { "epoch": 0.23654705520323246, "grad_norm": 0.26080172807431923, "learning_rate": 3.505171871471192e-05, "loss": 0.7453, "step": 1105 }, { "epoch": 0.23676112493644805, "grad_norm": 0.248947733593152, "learning_rate": 3.504276749035693e-05, "loss": 0.7596, "step": 1106 }, { "epoch": 0.23697519466966363, "grad_norm": 0.24080733011178224, "learning_rate": 3.503380932228799e-05, "loss": 0.7365, "step": 1107 }, { "epoch": 0.23718926440287924, "grad_norm": 0.21600510072981646, "learning_rate": 3.502484421464019e-05, "loss": 0.7673, "step": 1108 }, { "epoch": 0.23740333413609482, "grad_norm": 0.23575839153297223, "learning_rate": 3.501587217155181e-05, "loss": 0.7327, "step": 1109 }, { "epoch": 0.23761740386931043, "grad_norm": 0.2626002933160131, "learning_rate": 3.500689319716432e-05, "loss": 0.7814, "step": 1110 }, { "epoch": 0.23783147360252602, "grad_norm": 0.22747937420096545, "learning_rate": 3.4997907295622405e-05, "loss": 0.7452, "step": 1111 }, { "epoch": 0.23804554333574163, "grad_norm": 0.2394714977449478, "learning_rate": 3.4988914471073936e-05, "loss": 0.7526, "step": 1112 }, { "epoch": 0.2382596130689572, "grad_norm": 0.2796432097213277, "learning_rate": 3.4979914727669984e-05, "loss": 0.7398, "step": 1113 }, { "epoch": 0.23847368280217282, "grad_norm": 0.25029719142892853, "learning_rate": 3.497090806956481e-05, "loss": 0.7305, "step": 1114 }, { "epoch": 0.2386877525353884, "grad_norm": 0.2297835876791337, "learning_rate": 3.496189450091588e-05, "loss": 0.7539, "step": 1115 }, { "epoch": 0.23890182226860399, "grad_norm": 0.2497481123456355, "learning_rate": 3.495287402588385e-05, "loss": 0.7583, "step": 1116 }, { "epoch": 0.2391158920018196, "grad_norm": 0.27884511406424517, "learning_rate": 3.494384664863253e-05, "loss": 0.7186, "step": 1117 }, { "epoch": 0.23932996173503518, "grad_norm": 0.27767521541428375, "learning_rate": 3.493481237332895e-05, "loss": 0.7189, "step": 1118 }, { "epoch": 0.2395440314682508, "grad_norm": 0.24207967789590182, "learning_rate": 3.492577120414333e-05, "loss": 0.7324, "step": 1119 }, { "epoch": 0.23975810120146637, "grad_norm": 0.22421259080900618, "learning_rate": 3.4916723145249034e-05, "loss": 0.7489, "step": 1120 }, { "epoch": 0.23997217093468198, "grad_norm": 0.2772284666752655, "learning_rate": 3.4907668200822645e-05, "loss": 0.743, "step": 1121 }, { "epoch": 0.24018624066789757, "grad_norm": 0.26152102978471387, "learning_rate": 3.48986063750439e-05, "loss": 0.7288, "step": 1122 }, { "epoch": 0.24040031040111318, "grad_norm": 0.2272798845694307, "learning_rate": 3.488953767209573e-05, "loss": 0.7507, "step": 1123 }, { "epoch": 0.24061438013432876, "grad_norm": 0.263098149056709, "learning_rate": 3.488046209616422e-05, "loss": 0.722, "step": 1124 }, { "epoch": 0.24082844986754434, "grad_norm": 0.2500442559848116, "learning_rate": 3.4871379651438656e-05, "loss": 0.7235, "step": 1125 }, { "epoch": 0.24104251960075995, "grad_norm": 0.21433706064419283, "learning_rate": 3.486229034211146e-05, "loss": 0.7543, "step": 1126 }, { "epoch": 0.24125658933397554, "grad_norm": 0.2215477500866708, "learning_rate": 3.4853194172378256e-05, "loss": 0.7575, "step": 1127 }, { "epoch": 0.24147065906719115, "grad_norm": 0.27231554106216876, "learning_rate": 3.48440911464378e-05, "loss": 0.7728, "step": 1128 }, { "epoch": 0.24168472880040673, "grad_norm": 0.23050816642788935, "learning_rate": 3.483498126849205e-05, "loss": 0.7444, "step": 1129 }, { "epoch": 0.24189879853362234, "grad_norm": 0.22813814355783174, "learning_rate": 3.482586454274611e-05, "loss": 0.7331, "step": 1130 }, { "epoch": 0.24211286826683792, "grad_norm": 0.2799083074168808, "learning_rate": 3.481674097340823e-05, "loss": 0.7462, "step": 1131 }, { "epoch": 0.2423269380000535, "grad_norm": 0.23980352747771683, "learning_rate": 3.480761056468984e-05, "loss": 0.7673, "step": 1132 }, { "epoch": 0.24254100773326911, "grad_norm": 0.19925576192225541, "learning_rate": 3.4798473320805525e-05, "loss": 0.7199, "step": 1133 }, { "epoch": 0.2427550774664847, "grad_norm": 0.2781882173625236, "learning_rate": 3.478932924597301e-05, "loss": 0.7587, "step": 1134 }, { "epoch": 0.2429691471997003, "grad_norm": 0.2637722494697476, "learning_rate": 3.478017834441319e-05, "loss": 0.763, "step": 1135 }, { "epoch": 0.2431832169329159, "grad_norm": 0.248322154684677, "learning_rate": 3.4771020620350096e-05, "loss": 0.7499, "step": 1136 }, { "epoch": 0.2433972866661315, "grad_norm": 0.2489220850544133, "learning_rate": 3.4761856078010924e-05, "loss": 0.7402, "step": 1137 }, { "epoch": 0.24361135639934708, "grad_norm": 0.2581634984844074, "learning_rate": 3.475268472162601e-05, "loss": 0.7329, "step": 1138 }, { "epoch": 0.2438254261325627, "grad_norm": 0.23118231452967447, "learning_rate": 3.4743506555428845e-05, "loss": 0.7395, "step": 1139 }, { "epoch": 0.24403949586577828, "grad_norm": 0.22158647945102775, "learning_rate": 3.4734321583656036e-05, "loss": 0.723, "step": 1140 }, { "epoch": 0.24425356559899386, "grad_norm": 0.30313975204745625, "learning_rate": 3.472512981054736e-05, "loss": 0.7586, "step": 1141 }, { "epoch": 0.24446763533220947, "grad_norm": 0.323824729607345, "learning_rate": 3.471593124034571e-05, "loss": 0.7459, "step": 1142 }, { "epoch": 0.24468170506542505, "grad_norm": 0.24092939792483786, "learning_rate": 3.470672587729714e-05, "loss": 0.7313, "step": 1143 }, { "epoch": 0.24489577479864066, "grad_norm": 0.24306584169238002, "learning_rate": 3.469751372565083e-05, "loss": 0.7436, "step": 1144 }, { "epoch": 0.24510984453185625, "grad_norm": 0.3188364211969285, "learning_rate": 3.468829478965909e-05, "loss": 0.7699, "step": 1145 }, { "epoch": 0.24532391426507186, "grad_norm": 0.2859441884380527, "learning_rate": 3.467906907357736e-05, "loss": 0.7463, "step": 1146 }, { "epoch": 0.24553798399828744, "grad_norm": 0.2242704276233571, "learning_rate": 3.466983658166422e-05, "loss": 0.7459, "step": 1147 }, { "epoch": 0.24575205373150305, "grad_norm": 0.26642590934914734, "learning_rate": 3.4660597318181364e-05, "loss": 0.7641, "step": 1148 }, { "epoch": 0.24596612346471863, "grad_norm": 0.2966715399351912, "learning_rate": 3.465135128739363e-05, "loss": 0.7158, "step": 1149 }, { "epoch": 0.24618019319793422, "grad_norm": 0.2445052443016698, "learning_rate": 3.464209849356896e-05, "loss": 0.721, "step": 1150 }, { "epoch": 0.24639426293114983, "grad_norm": 0.26256000088434067, "learning_rate": 3.463283894097842e-05, "loss": 0.7366, "step": 1151 }, { "epoch": 0.2466083326643654, "grad_norm": 0.3048545406567788, "learning_rate": 3.4623572633896224e-05, "loss": 0.7271, "step": 1152 }, { "epoch": 0.24682240239758102, "grad_norm": 0.28828148093982753, "learning_rate": 3.4614299576599656e-05, "loss": 0.7195, "step": 1153 }, { "epoch": 0.2470364721307966, "grad_norm": 0.23559172995254016, "learning_rate": 3.4605019773369165e-05, "loss": 0.7311, "step": 1154 }, { "epoch": 0.2472505418640122, "grad_norm": 0.2554881762437298, "learning_rate": 3.4595733228488284e-05, "loss": 0.7182, "step": 1155 }, { "epoch": 0.2474646115972278, "grad_norm": 0.24590570891373473, "learning_rate": 3.458643994624366e-05, "loss": 0.7418, "step": 1156 }, { "epoch": 0.24767868133044338, "grad_norm": 0.23111157050752615, "learning_rate": 3.4577139930925053e-05, "loss": 0.7423, "step": 1157 }, { "epoch": 0.247892751063659, "grad_norm": 0.23485886010963944, "learning_rate": 3.456783318682534e-05, "loss": 0.7599, "step": 1158 }, { "epoch": 0.24810682079687457, "grad_norm": 0.24413155764456476, "learning_rate": 3.455851971824051e-05, "loss": 0.7146, "step": 1159 }, { "epoch": 0.24832089053009018, "grad_norm": 0.23833202775302623, "learning_rate": 3.454919952946961e-05, "loss": 0.7581, "step": 1160 }, { "epoch": 0.24853496026330577, "grad_norm": 0.23615597843344693, "learning_rate": 3.453987262481485e-05, "loss": 0.7703, "step": 1161 }, { "epoch": 0.24874902999652138, "grad_norm": 0.25678600028761517, "learning_rate": 3.4530539008581505e-05, "loss": 0.771, "step": 1162 }, { "epoch": 0.24896309972973696, "grad_norm": 0.22283876228606897, "learning_rate": 3.452119868507794e-05, "loss": 0.7871, "step": 1163 }, { "epoch": 0.24917716946295257, "grad_norm": 0.2380059035707347, "learning_rate": 3.451185165861566e-05, "loss": 0.7308, "step": 1164 }, { "epoch": 0.24939123919616815, "grad_norm": 0.2523623714446187, "learning_rate": 3.450249793350921e-05, "loss": 0.7592, "step": 1165 }, { "epoch": 0.24960530892938373, "grad_norm": 0.2354571442181655, "learning_rate": 3.449313751407626e-05, "loss": 0.7359, "step": 1166 }, { "epoch": 0.24981937866259935, "grad_norm": 0.20846214666118837, "learning_rate": 3.4483770404637574e-05, "loss": 0.7448, "step": 1167 }, { "epoch": 0.25003344839581493, "grad_norm": 0.24788370857158232, "learning_rate": 3.447439660951697e-05, "loss": 0.7352, "step": 1168 }, { "epoch": 0.2502475181290305, "grad_norm": 0.22760502346048284, "learning_rate": 3.4465016133041405e-05, "loss": 0.7554, "step": 1169 }, { "epoch": 0.25046158786224615, "grad_norm": 0.22386578931970105, "learning_rate": 3.4455628979540856e-05, "loss": 0.7349, "step": 1170 }, { "epoch": 0.25067565759546173, "grad_norm": 0.24959189418281694, "learning_rate": 3.444623515334844e-05, "loss": 0.7138, "step": 1171 }, { "epoch": 0.2508897273286773, "grad_norm": 0.2385615516788312, "learning_rate": 3.443683465880032e-05, "loss": 0.7351, "step": 1172 }, { "epoch": 0.2511037970618929, "grad_norm": 0.24990831138591885, "learning_rate": 3.442742750023575e-05, "loss": 0.7392, "step": 1173 }, { "epoch": 0.2513178667951085, "grad_norm": 0.24757762975607733, "learning_rate": 3.441801368199706e-05, "loss": 0.7597, "step": 1174 }, { "epoch": 0.2515319365283241, "grad_norm": 0.24073704959664105, "learning_rate": 3.4408593208429637e-05, "loss": 0.7491, "step": 1175 }, { "epoch": 0.2517460062615397, "grad_norm": 0.20779625732813095, "learning_rate": 3.439916608388197e-05, "loss": 0.6953, "step": 1176 }, { "epoch": 0.2519600759947553, "grad_norm": 0.2547420961904698, "learning_rate": 3.43897323127056e-05, "loss": 0.7293, "step": 1177 }, { "epoch": 0.25217414572797087, "grad_norm": 0.24464680814797685, "learning_rate": 3.438029189925513e-05, "loss": 0.7039, "step": 1178 }, { "epoch": 0.2523882154611865, "grad_norm": 0.21550033836220966, "learning_rate": 3.437084484788825e-05, "loss": 0.753, "step": 1179 }, { "epoch": 0.2526022851944021, "grad_norm": 0.24667308792049616, "learning_rate": 3.436139116296569e-05, "loss": 0.7513, "step": 1180 }, { "epoch": 0.25281635492761767, "grad_norm": 0.2572438301730163, "learning_rate": 3.4351930848851264e-05, "loss": 0.7672, "step": 1181 }, { "epoch": 0.25303042466083325, "grad_norm": 0.2297997590083026, "learning_rate": 3.4342463909911826e-05, "loss": 0.7388, "step": 1182 }, { "epoch": 0.25324449439404884, "grad_norm": 0.2407006829080367, "learning_rate": 3.433299035051731e-05, "loss": 0.7191, "step": 1183 }, { "epoch": 0.2534585641272645, "grad_norm": 0.26075842853984643, "learning_rate": 3.432351017504068e-05, "loss": 0.7334, "step": 1184 }, { "epoch": 0.25367263386048006, "grad_norm": 0.2901402030666382, "learning_rate": 3.431402338785797e-05, "loss": 0.7273, "step": 1185 }, { "epoch": 0.25388670359369564, "grad_norm": 0.23686107780870275, "learning_rate": 3.4304529993348276e-05, "loss": 0.7407, "step": 1186 }, { "epoch": 0.2541007733269112, "grad_norm": 0.24898576813796555, "learning_rate": 3.429502999589371e-05, "loss": 0.7523, "step": 1187 }, { "epoch": 0.25431484306012686, "grad_norm": 0.2813968197460596, "learning_rate": 3.4285523399879476e-05, "loss": 0.7289, "step": 1188 }, { "epoch": 0.25452891279334244, "grad_norm": 0.23487785867274336, "learning_rate": 3.427601020969379e-05, "loss": 0.755, "step": 1189 }, { "epoch": 0.254742982526558, "grad_norm": 0.24512958456467976, "learning_rate": 3.426649042972792e-05, "loss": 0.7274, "step": 1190 }, { "epoch": 0.2549570522597736, "grad_norm": 0.23657665610482212, "learning_rate": 3.425696406437619e-05, "loss": 0.7295, "step": 1191 }, { "epoch": 0.2551711219929892, "grad_norm": 0.2324456811817946, "learning_rate": 3.424743111803594e-05, "loss": 0.758, "step": 1192 }, { "epoch": 0.25538519172620483, "grad_norm": 0.21708333632414636, "learning_rate": 3.423789159510757e-05, "loss": 0.7426, "step": 1193 }, { "epoch": 0.2555992614594204, "grad_norm": 0.24871125843116768, "learning_rate": 3.4228345499994504e-05, "loss": 0.741, "step": 1194 }, { "epoch": 0.255813331192636, "grad_norm": 0.2307222244246413, "learning_rate": 3.42187928371032e-05, "loss": 0.7458, "step": 1195 }, { "epoch": 0.2560274009258516, "grad_norm": 0.22276180460737532, "learning_rate": 3.420923361084315e-05, "loss": 0.7792, "step": 1196 }, { "epoch": 0.2562414706590672, "grad_norm": 0.2305342650065054, "learning_rate": 3.419966782562687e-05, "loss": 0.7801, "step": 1197 }, { "epoch": 0.2564555403922828, "grad_norm": 0.21422753082824808, "learning_rate": 3.4190095485869926e-05, "loss": 0.7429, "step": 1198 }, { "epoch": 0.2566696101254984, "grad_norm": 0.237125565263133, "learning_rate": 3.418051659599088e-05, "loss": 0.7552, "step": 1199 }, { "epoch": 0.25688367985871396, "grad_norm": 0.24684772760226564, "learning_rate": 3.417093116041133e-05, "loss": 0.7257, "step": 1200 }, { "epoch": 0.25709774959192955, "grad_norm": 0.24245413705233052, "learning_rate": 3.4161339183555896e-05, "loss": 0.7491, "step": 1201 }, { "epoch": 0.2573118193251452, "grad_norm": 0.21715045986213533, "learning_rate": 3.415174066985222e-05, "loss": 0.7643, "step": 1202 }, { "epoch": 0.25752588905836077, "grad_norm": 0.2411298591658727, "learning_rate": 3.4142135623730954e-05, "loss": 0.7585, "step": 1203 }, { "epoch": 0.25773995879157635, "grad_norm": 0.23090726187919966, "learning_rate": 3.4132524049625774e-05, "loss": 0.7471, "step": 1204 }, { "epoch": 0.25795402852479193, "grad_norm": 0.21438308223040606, "learning_rate": 3.412290595197337e-05, "loss": 0.7267, "step": 1205 }, { "epoch": 0.2581680982580076, "grad_norm": 0.25239742637018964, "learning_rate": 3.4113281335213416e-05, "loss": 0.738, "step": 1206 }, { "epoch": 0.25838216799122316, "grad_norm": 0.20796047833447395, "learning_rate": 3.4103650203788646e-05, "loss": 0.7382, "step": 1207 }, { "epoch": 0.25859623772443874, "grad_norm": 0.23967752000872217, "learning_rate": 3.4094012562144754e-05, "loss": 0.7378, "step": 1208 }, { "epoch": 0.2588103074576543, "grad_norm": 0.24480785000490024, "learning_rate": 3.408436841473046e-05, "loss": 0.7319, "step": 1209 }, { "epoch": 0.2590243771908699, "grad_norm": 0.2591456126780797, "learning_rate": 3.40747177659975e-05, "loss": 0.7375, "step": 1210 }, { "epoch": 0.25923844692408554, "grad_norm": 0.2224563979787024, "learning_rate": 3.406506062040057e-05, "loss": 0.7396, "step": 1211 }, { "epoch": 0.2594525166573011, "grad_norm": 0.2892949208541926, "learning_rate": 3.405539698239742e-05, "loss": 0.738, "step": 1212 }, { "epoch": 0.2596665863905167, "grad_norm": 0.28509861922730945, "learning_rate": 3.4045726856448745e-05, "loss": 0.7307, "step": 1213 }, { "epoch": 0.2598806561237323, "grad_norm": 0.2242946439181324, "learning_rate": 3.403605024701826e-05, "loss": 0.7416, "step": 1214 }, { "epoch": 0.26009472585694793, "grad_norm": 0.24377794993916513, "learning_rate": 3.402636715857268e-05, "loss": 0.7572, "step": 1215 }, { "epoch": 0.2603087955901635, "grad_norm": 0.2776969756968053, "learning_rate": 3.4016677595581696e-05, "loss": 0.7408, "step": 1216 }, { "epoch": 0.2605228653233791, "grad_norm": 0.25539707708554316, "learning_rate": 3.4006981562517985e-05, "loss": 0.7374, "step": 1217 }, { "epoch": 0.2607369350565947, "grad_norm": 0.2529779543226716, "learning_rate": 3.3997279063857234e-05, "loss": 0.7201, "step": 1218 }, { "epoch": 0.26095100478981026, "grad_norm": 0.24168770679893958, "learning_rate": 3.398757010407809e-05, "loss": 0.738, "step": 1219 }, { "epoch": 0.2611650745230259, "grad_norm": 0.22336368147850622, "learning_rate": 3.397785468766219e-05, "loss": 0.7246, "step": 1220 }, { "epoch": 0.2613791442562415, "grad_norm": 0.26353607775290483, "learning_rate": 3.3968132819094153e-05, "loss": 0.7462, "step": 1221 }, { "epoch": 0.26159321398945706, "grad_norm": 0.25318820280116333, "learning_rate": 3.3958404502861574e-05, "loss": 0.7608, "step": 1222 }, { "epoch": 0.26180728372267265, "grad_norm": 0.2149599172943751, "learning_rate": 3.394866974345504e-05, "loss": 0.7156, "step": 1223 }, { "epoch": 0.26202135345588823, "grad_norm": 0.21534152810417512, "learning_rate": 3.393892854536807e-05, "loss": 0.7565, "step": 1224 }, { "epoch": 0.26223542318910387, "grad_norm": 0.24084926310843696, "learning_rate": 3.3929180913097206e-05, "loss": 0.7478, "step": 1225 }, { "epoch": 0.26244949292231945, "grad_norm": 0.2148070403910902, "learning_rate": 3.3919426851141935e-05, "loss": 0.7192, "step": 1226 }, { "epoch": 0.26266356265553503, "grad_norm": 0.22757017071777, "learning_rate": 3.39096663640047e-05, "loss": 0.7341, "step": 1227 }, { "epoch": 0.2628776323887506, "grad_norm": 0.2438716429425449, "learning_rate": 3.389989945619094e-05, "loss": 0.7284, "step": 1228 }, { "epoch": 0.26309170212196625, "grad_norm": 0.2139362123552242, "learning_rate": 3.389012613220904e-05, "loss": 0.7592, "step": 1229 }, { "epoch": 0.26330577185518184, "grad_norm": 0.21884388234064667, "learning_rate": 3.3880346396570344e-05, "loss": 0.6918, "step": 1230 }, { "epoch": 0.2635198415883974, "grad_norm": 0.20923067223168929, "learning_rate": 3.3870560253789155e-05, "loss": 0.724, "step": 1231 }, { "epoch": 0.263733911321613, "grad_norm": 0.24306339919153871, "learning_rate": 3.386076770838274e-05, "loss": 0.7499, "step": 1232 }, { "epoch": 0.2639479810548286, "grad_norm": 0.2323253062905506, "learning_rate": 3.385096876487134e-05, "loss": 0.7435, "step": 1233 }, { "epoch": 0.2641620507880442, "grad_norm": 0.21947749990891102, "learning_rate": 3.38411634277781e-05, "loss": 0.7402, "step": 1234 }, { "epoch": 0.2643761205212598, "grad_norm": 0.20947238115140063, "learning_rate": 3.383135170162916e-05, "loss": 0.733, "step": 1235 }, { "epoch": 0.2645901902544754, "grad_norm": 0.2148703620295522, "learning_rate": 3.38215335909536e-05, "loss": 0.7475, "step": 1236 }, { "epoch": 0.26480425998769097, "grad_norm": 0.22442933534089865, "learning_rate": 3.3811709100283434e-05, "loss": 0.7534, "step": 1237 }, { "epoch": 0.2650183297209066, "grad_norm": 0.23414723174854493, "learning_rate": 3.3801878234153624e-05, "loss": 0.7487, "step": 1238 }, { "epoch": 0.2652323994541222, "grad_norm": 0.31915631434876957, "learning_rate": 3.3792040997102093e-05, "loss": 0.7595, "step": 1239 }, { "epoch": 0.2654464691873378, "grad_norm": 0.2387136592412898, "learning_rate": 3.3782197393669684e-05, "loss": 0.7083, "step": 1240 }, { "epoch": 0.26566053892055336, "grad_norm": 0.2390840354417617, "learning_rate": 3.3772347428400185e-05, "loss": 0.7535, "step": 1241 }, { "epoch": 0.26587460865376894, "grad_norm": 0.22769911602399937, "learning_rate": 3.376249110584033e-05, "loss": 0.7421, "step": 1242 }, { "epoch": 0.2660886783869846, "grad_norm": 0.23289511358940743, "learning_rate": 3.375262843053976e-05, "loss": 0.7583, "step": 1243 }, { "epoch": 0.26630274812020016, "grad_norm": 0.21364216869927816, "learning_rate": 3.3742759407051094e-05, "loss": 0.7285, "step": 1244 }, { "epoch": 0.26651681785341574, "grad_norm": 0.23627876629788905, "learning_rate": 3.3732884039929844e-05, "loss": 0.7323, "step": 1245 }, { "epoch": 0.2667308875866313, "grad_norm": 0.2276106304734522, "learning_rate": 3.372300233373446e-05, "loss": 0.7274, "step": 1246 }, { "epoch": 0.26694495731984696, "grad_norm": 0.23001668093135316, "learning_rate": 3.371311429302632e-05, "loss": 0.7088, "step": 1247 }, { "epoch": 0.26715902705306255, "grad_norm": 0.2463448454397025, "learning_rate": 3.370321992236971e-05, "loss": 0.7208, "step": 1248 }, { "epoch": 0.26737309678627813, "grad_norm": 0.301210826139636, "learning_rate": 3.369331922633189e-05, "loss": 0.7203, "step": 1249 }, { "epoch": 0.2675871665194937, "grad_norm": 0.2839366167069765, "learning_rate": 3.368341220948297e-05, "loss": 0.7398, "step": 1250 }, { "epoch": 0.2678012362527093, "grad_norm": 0.21128119196927372, "learning_rate": 3.367349887639602e-05, "loss": 0.754, "step": 1251 }, { "epoch": 0.26801530598592493, "grad_norm": 0.23116333876179326, "learning_rate": 3.366357923164702e-05, "loss": 0.7604, "step": 1252 }, { "epoch": 0.2682293757191405, "grad_norm": 0.22747271551245782, "learning_rate": 3.3653653279814865e-05, "loss": 0.7394, "step": 1253 }, { "epoch": 0.2684434454523561, "grad_norm": 0.23004307375556815, "learning_rate": 3.364372102548135e-05, "loss": 0.7287, "step": 1254 }, { "epoch": 0.2686575151855717, "grad_norm": 0.2750622226294108, "learning_rate": 3.3633782473231176e-05, "loss": 0.7613, "step": 1255 }, { "epoch": 0.2688715849187873, "grad_norm": 0.2672186064726538, "learning_rate": 3.362383762765198e-05, "loss": 0.7325, "step": 1256 }, { "epoch": 0.2690856546520029, "grad_norm": 0.2348914065267851, "learning_rate": 3.361388649333427e-05, "loss": 0.7169, "step": 1257 }, { "epoch": 0.2692997243852185, "grad_norm": 0.24606329973802127, "learning_rate": 3.360392907487148e-05, "loss": 0.7387, "step": 1258 }, { "epoch": 0.26951379411843407, "grad_norm": 0.24919064555519513, "learning_rate": 3.359396537685992e-05, "loss": 0.711, "step": 1259 }, { "epoch": 0.26972786385164965, "grad_norm": 0.25219342730910826, "learning_rate": 3.358399540389884e-05, "loss": 0.7379, "step": 1260 }, { "epoch": 0.2699419335848653, "grad_norm": 0.2296712182666378, "learning_rate": 3.3574019160590345e-05, "loss": 0.7442, "step": 1261 }, { "epoch": 0.2701560033180809, "grad_norm": 0.22192744289815136, "learning_rate": 3.3564036651539455e-05, "loss": 0.74, "step": 1262 }, { "epoch": 0.27037007305129646, "grad_norm": 0.24846168601795277, "learning_rate": 3.355404788135407e-05, "loss": 0.725, "step": 1263 }, { "epoch": 0.27058414278451204, "grad_norm": 0.25442473984225245, "learning_rate": 3.3544052854645e-05, "loss": 0.7159, "step": 1264 }, { "epoch": 0.2707982125177277, "grad_norm": 0.2263136064538683, "learning_rate": 3.353405157602592e-05, "loss": 0.7222, "step": 1265 }, { "epoch": 0.27101228225094326, "grad_norm": 0.25067143420904886, "learning_rate": 3.352404405011342e-05, "loss": 0.7424, "step": 1266 }, { "epoch": 0.27122635198415884, "grad_norm": 0.2569024734131973, "learning_rate": 3.351403028152693e-05, "loss": 0.7412, "step": 1267 }, { "epoch": 0.2714404217173744, "grad_norm": 0.2304888122878882, "learning_rate": 3.3504010274888806e-05, "loss": 0.7235, "step": 1268 }, { "epoch": 0.27165449145059, "grad_norm": 0.23808739897672176, "learning_rate": 3.349398403482426e-05, "loss": 0.7167, "step": 1269 }, { "epoch": 0.27186856118380565, "grad_norm": 0.2238370810629572, "learning_rate": 3.348395156596138e-05, "loss": 0.692, "step": 1270 }, { "epoch": 0.27208263091702123, "grad_norm": 0.26716897637223047, "learning_rate": 3.347391287293115e-05, "loss": 0.7471, "step": 1271 }, { "epoch": 0.2722967006502368, "grad_norm": 0.22016470462040894, "learning_rate": 3.34638679603674e-05, "loss": 0.742, "step": 1272 }, { "epoch": 0.2725107703834524, "grad_norm": 0.2356505881221704, "learning_rate": 3.3453816832906835e-05, "loss": 0.7644, "step": 1273 }, { "epoch": 0.272724840116668, "grad_norm": 0.2543211750149203, "learning_rate": 3.344375949518906e-05, "loss": 0.7239, "step": 1274 }, { "epoch": 0.2729389098498836, "grad_norm": 0.24755004119231183, "learning_rate": 3.343369595185651e-05, "loss": 0.7264, "step": 1275 }, { "epoch": 0.2731529795830992, "grad_norm": 0.212753245018397, "learning_rate": 3.3423626207554494e-05, "loss": 0.7172, "step": 1276 }, { "epoch": 0.2733670493163148, "grad_norm": 0.23528019489141624, "learning_rate": 3.34135502669312e-05, "loss": 0.717, "step": 1277 }, { "epoch": 0.27358111904953036, "grad_norm": 0.25147108889505876, "learning_rate": 3.3403468134637654e-05, "loss": 0.7155, "step": 1278 }, { "epoch": 0.273795188782746, "grad_norm": 0.2261653568767125, "learning_rate": 3.339337981532776e-05, "loss": 0.7383, "step": 1279 }, { "epoch": 0.2740092585159616, "grad_norm": 0.24961114565552953, "learning_rate": 3.3383285313658254e-05, "loss": 0.7201, "step": 1280 }, { "epoch": 0.27422332824917717, "grad_norm": 0.2761825909484211, "learning_rate": 3.337318463428874e-05, "loss": 0.7258, "step": 1281 }, { "epoch": 0.27443739798239275, "grad_norm": 0.2439268449071247, "learning_rate": 3.336307778188169e-05, "loss": 0.7377, "step": 1282 }, { "epoch": 0.27465146771560833, "grad_norm": 0.2529478674719712, "learning_rate": 3.3352964761102395e-05, "loss": 0.7486, "step": 1283 }, { "epoch": 0.27486553744882397, "grad_norm": 0.22273915873906183, "learning_rate": 3.334284557661901e-05, "loss": 0.7373, "step": 1284 }, { "epoch": 0.27507960718203955, "grad_norm": 0.24813716145623047, "learning_rate": 3.333272023310253e-05, "loss": 0.766, "step": 1285 }, { "epoch": 0.27529367691525514, "grad_norm": 0.21863374492148302, "learning_rate": 3.33225887352268e-05, "loss": 0.7578, "step": 1286 }, { "epoch": 0.2755077466484707, "grad_norm": 0.24762223940774178, "learning_rate": 3.331245108766849e-05, "loss": 0.748, "step": 1287 }, { "epoch": 0.27572181638168636, "grad_norm": 0.2413065434679842, "learning_rate": 3.330230729510714e-05, "loss": 0.7267, "step": 1288 }, { "epoch": 0.27593588611490194, "grad_norm": 0.22838099631168504, "learning_rate": 3.329215736222508e-05, "loss": 0.6969, "step": 1289 }, { "epoch": 0.2761499558481175, "grad_norm": 0.21462260948933617, "learning_rate": 3.328200129370752e-05, "loss": 0.7252, "step": 1290 }, { "epoch": 0.2763640255813331, "grad_norm": 0.23463784112616665, "learning_rate": 3.327183909424248e-05, "loss": 0.7257, "step": 1291 }, { "epoch": 0.2765780953145487, "grad_norm": 0.24506200071432127, "learning_rate": 3.326167076852081e-05, "loss": 0.7455, "step": 1292 }, { "epoch": 0.2767921650477643, "grad_norm": 0.25487913911280596, "learning_rate": 3.325149632123618e-05, "loss": 0.753, "step": 1293 }, { "epoch": 0.2770062347809799, "grad_norm": 0.22380539321134613, "learning_rate": 3.324131575708512e-05, "loss": 0.6957, "step": 1294 }, { "epoch": 0.2772203045141955, "grad_norm": 0.22729766641670007, "learning_rate": 3.323112908076693e-05, "loss": 0.7592, "step": 1295 }, { "epoch": 0.2774343742474111, "grad_norm": 0.2310693350497247, "learning_rate": 3.322093629698379e-05, "loss": 0.7193, "step": 1296 }, { "epoch": 0.2776484439806267, "grad_norm": 0.20681259768160018, "learning_rate": 3.321073741044065e-05, "loss": 0.7381, "step": 1297 }, { "epoch": 0.2778625137138423, "grad_norm": 0.2151928003070936, "learning_rate": 3.32005324258453e-05, "loss": 0.7313, "step": 1298 }, { "epoch": 0.2780765834470579, "grad_norm": 0.21427855874770377, "learning_rate": 3.319032134790836e-05, "loss": 0.7516, "step": 1299 }, { "epoch": 0.27829065318027346, "grad_norm": 0.20595401236647193, "learning_rate": 3.3180104181343224e-05, "loss": 0.7176, "step": 1300 }, { "epoch": 0.27850472291348904, "grad_norm": 0.3106695088656347, "learning_rate": 3.316988093086612e-05, "loss": 0.7493, "step": 1301 }, { "epoch": 0.2787187926467047, "grad_norm": 0.2340688588742366, "learning_rate": 3.3159651601196094e-05, "loss": 0.7354, "step": 1302 }, { "epoch": 0.27893286237992027, "grad_norm": 0.22076851472351364, "learning_rate": 3.314941619705498e-05, "loss": 0.7334, "step": 1303 }, { "epoch": 0.27914693211313585, "grad_norm": 0.19874871129521252, "learning_rate": 3.3139174723167415e-05, "loss": 0.7589, "step": 1304 }, { "epoch": 0.27936100184635143, "grad_norm": 0.2212115497004667, "learning_rate": 3.312892718426086e-05, "loss": 0.7542, "step": 1305 }, { "epoch": 0.27957507157956707, "grad_norm": 0.21415836397243754, "learning_rate": 3.3118673585065536e-05, "loss": 0.7369, "step": 1306 }, { "epoch": 0.27978914131278265, "grad_norm": 0.2179798245266278, "learning_rate": 3.3108413930314506e-05, "loss": 0.7638, "step": 1307 }, { "epoch": 0.28000321104599823, "grad_norm": 0.2540638570289035, "learning_rate": 3.30981482247436e-05, "loss": 0.7414, "step": 1308 }, { "epoch": 0.2802172807792138, "grad_norm": 0.2061278171225783, "learning_rate": 3.3087876473091455e-05, "loss": 0.7356, "step": 1309 }, { "epoch": 0.2804313505124294, "grad_norm": 0.20998675886616364, "learning_rate": 3.307759868009949e-05, "loss": 0.7475, "step": 1310 }, { "epoch": 0.28064542024564504, "grad_norm": 0.2231058947198689, "learning_rate": 3.306731485051191e-05, "loss": 0.7131, "step": 1311 }, { "epoch": 0.2808594899788606, "grad_norm": 0.3566037413688859, "learning_rate": 3.3057024989075715e-05, "loss": 0.7525, "step": 1312 }, { "epoch": 0.2810735597120762, "grad_norm": 0.22277082867800663, "learning_rate": 3.3046729100540686e-05, "loss": 0.7493, "step": 1313 }, { "epoch": 0.2812876294452918, "grad_norm": 0.1861671724954601, "learning_rate": 3.3036427189659386e-05, "loss": 0.7061, "step": 1314 }, { "epoch": 0.2815016991785074, "grad_norm": 0.21021655214095677, "learning_rate": 3.302611926118716e-05, "loss": 0.7353, "step": 1315 }, { "epoch": 0.281715768911723, "grad_norm": 0.20877391839607665, "learning_rate": 3.301580531988213e-05, "loss": 0.7621, "step": 1316 }, { "epoch": 0.2819298386449386, "grad_norm": 0.20533436000378583, "learning_rate": 3.300548537050519e-05, "loss": 0.721, "step": 1317 }, { "epoch": 0.2821439083781542, "grad_norm": 0.20169237903063889, "learning_rate": 3.2995159417820014e-05, "loss": 0.7542, "step": 1318 }, { "epoch": 0.28235797811136976, "grad_norm": 0.21409057852008287, "learning_rate": 3.2984827466593036e-05, "loss": 0.7658, "step": 1319 }, { "epoch": 0.2825720478445854, "grad_norm": 0.20799828550855554, "learning_rate": 3.2974489521593474e-05, "loss": 0.7318, "step": 1320 }, { "epoch": 0.282786117577801, "grad_norm": 0.21440521985054223, "learning_rate": 3.296414558759329e-05, "loss": 0.7446, "step": 1321 }, { "epoch": 0.28300018731101656, "grad_norm": 0.20109109765449448, "learning_rate": 3.295379566936724e-05, "loss": 0.7237, "step": 1322 }, { "epoch": 0.28321425704423214, "grad_norm": 0.22008644947199202, "learning_rate": 3.294343977169282e-05, "loss": 0.7242, "step": 1323 }, { "epoch": 0.2834283267774478, "grad_norm": 0.21810873547560058, "learning_rate": 3.29330778993503e-05, "loss": 0.7269, "step": 1324 }, { "epoch": 0.28364239651066336, "grad_norm": 0.2109574149141801, "learning_rate": 3.292271005712269e-05, "loss": 0.7139, "step": 1325 }, { "epoch": 0.28385646624387895, "grad_norm": 0.2226470165117003, "learning_rate": 3.291233624979578e-05, "loss": 0.7364, "step": 1326 }, { "epoch": 0.28407053597709453, "grad_norm": 0.22180153572255398, "learning_rate": 3.290195648215809e-05, "loss": 0.7035, "step": 1327 }, { "epoch": 0.2842846057103101, "grad_norm": 0.21212274759872496, "learning_rate": 3.289157075900091e-05, "loss": 0.752, "step": 1328 }, { "epoch": 0.28449867544352575, "grad_norm": 0.22392232963013, "learning_rate": 3.288117908511826e-05, "loss": 0.7124, "step": 1329 }, { "epoch": 0.28471274517674133, "grad_norm": 0.2217384973022529, "learning_rate": 3.287078146530693e-05, "loss": 0.7119, "step": 1330 }, { "epoch": 0.2849268149099569, "grad_norm": 0.2239696839241841, "learning_rate": 3.286037790436644e-05, "loss": 0.709, "step": 1331 }, { "epoch": 0.2851408846431725, "grad_norm": 0.21247932684313287, "learning_rate": 3.284996840709904e-05, "loss": 0.7655, "step": 1332 }, { "epoch": 0.2853549543763881, "grad_norm": 0.24100995837849887, "learning_rate": 3.283955297830975e-05, "loss": 0.7191, "step": 1333 }, { "epoch": 0.2855690241096037, "grad_norm": 0.21021584901521734, "learning_rate": 3.2829131622806316e-05, "loss": 0.7369, "step": 1334 }, { "epoch": 0.2857830938428193, "grad_norm": 0.20031814317637867, "learning_rate": 3.28187043453992e-05, "loss": 0.7201, "step": 1335 }, { "epoch": 0.2859971635760349, "grad_norm": 0.2407290690269822, "learning_rate": 3.2808271150901626e-05, "loss": 0.7367, "step": 1336 }, { "epoch": 0.28621123330925047, "grad_norm": 0.20307590665925798, "learning_rate": 3.279783204412954e-05, "loss": 0.6986, "step": 1337 }, { "epoch": 0.2864253030424661, "grad_norm": 0.24047420783975218, "learning_rate": 3.2787387029901606e-05, "loss": 0.7292, "step": 1338 }, { "epoch": 0.2866393727756817, "grad_norm": 0.24157870880732082, "learning_rate": 3.277693611303922e-05, "loss": 0.7134, "step": 1339 }, { "epoch": 0.28685344250889727, "grad_norm": 0.22682727467384456, "learning_rate": 3.276647929836653e-05, "loss": 0.7023, "step": 1340 }, { "epoch": 0.28706751224211285, "grad_norm": 0.2188005785823767, "learning_rate": 3.2756016590710355e-05, "loss": 0.7707, "step": 1341 }, { "epoch": 0.28728158197532844, "grad_norm": 0.28172732336907075, "learning_rate": 3.274554799490028e-05, "loss": 0.7272, "step": 1342 }, { "epoch": 0.2874956517085441, "grad_norm": 0.24192696282082157, "learning_rate": 3.273507351576857e-05, "loss": 0.7132, "step": 1343 }, { "epoch": 0.28770972144175966, "grad_norm": 0.2315519440674189, "learning_rate": 3.272459315815025e-05, "loss": 0.7394, "step": 1344 }, { "epoch": 0.28792379117497524, "grad_norm": 0.26217337426162685, "learning_rate": 3.2714106926883016e-05, "loss": 0.7225, "step": 1345 }, { "epoch": 0.2881378609081908, "grad_norm": 0.26990586593344973, "learning_rate": 3.27036148268073e-05, "loss": 0.7441, "step": 1346 }, { "epoch": 0.28835193064140646, "grad_norm": 0.21589713648963416, "learning_rate": 3.2693116862766236e-05, "loss": 0.7161, "step": 1347 }, { "epoch": 0.28856600037462204, "grad_norm": 0.24421754890717157, "learning_rate": 3.2682613039605655e-05, "loss": 0.7207, "step": 1348 }, { "epoch": 0.2887800701078376, "grad_norm": 0.24741110918426046, "learning_rate": 3.267210336217412e-05, "loss": 0.7422, "step": 1349 }, { "epoch": 0.2889941398410532, "grad_norm": 0.21218214160427318, "learning_rate": 3.266158783532287e-05, "loss": 0.7416, "step": 1350 }, { "epoch": 0.2892082095742688, "grad_norm": 0.23033686820949453, "learning_rate": 3.2651066463905854e-05, "loss": 0.724, "step": 1351 }, { "epoch": 0.28942227930748443, "grad_norm": 0.24030135503458686, "learning_rate": 3.264053925277972e-05, "loss": 0.7262, "step": 1352 }, { "epoch": 0.2896363490407, "grad_norm": 0.23475361277373719, "learning_rate": 3.263000620680379e-05, "loss": 0.7475, "step": 1353 }, { "epoch": 0.2898504187739156, "grad_norm": 0.2060328426111773, "learning_rate": 3.2619467330840124e-05, "loss": 0.7456, "step": 1354 }, { "epoch": 0.2900644885071312, "grad_norm": 0.2396608594606869, "learning_rate": 3.2608922629753444e-05, "loss": 0.7411, "step": 1355 }, { "epoch": 0.2902785582403468, "grad_norm": 0.21255554908811655, "learning_rate": 3.259837210841116e-05, "loss": 0.7543, "step": 1356 }, { "epoch": 0.2904926279735624, "grad_norm": 0.2035296928731616, "learning_rate": 3.2587815771683364e-05, "loss": 0.7343, "step": 1357 }, { "epoch": 0.290706697706778, "grad_norm": 0.21053857087589242, "learning_rate": 3.2577253624442855e-05, "loss": 0.6848, "step": 1358 }, { "epoch": 0.29092076743999357, "grad_norm": 0.22660109624261895, "learning_rate": 3.25666856715651e-05, "loss": 0.7321, "step": 1359 }, { "epoch": 0.29113483717320915, "grad_norm": 0.1899904190919483, "learning_rate": 3.255611191792824e-05, "loss": 0.7437, "step": 1360 }, { "epoch": 0.2913489069064248, "grad_norm": 0.22172872661906323, "learning_rate": 3.254553236841311e-05, "loss": 0.7482, "step": 1361 }, { "epoch": 0.29156297663964037, "grad_norm": 0.20740244056190774, "learning_rate": 3.25349470279032e-05, "loss": 0.7255, "step": 1362 }, { "epoch": 0.29177704637285595, "grad_norm": 0.3158156452257583, "learning_rate": 3.2524355901284676e-05, "loss": 0.7662, "step": 1363 }, { "epoch": 0.29199111610607154, "grad_norm": 0.22748707107737778, "learning_rate": 3.2513758993446406e-05, "loss": 0.7428, "step": 1364 }, { "epoch": 0.2922051858392872, "grad_norm": 0.21535543945914187, "learning_rate": 3.2503156309279895e-05, "loss": 0.7383, "step": 1365 }, { "epoch": 0.29241925557250276, "grad_norm": 0.2113972483014738, "learning_rate": 3.249254785367931e-05, "loss": 0.7492, "step": 1366 }, { "epoch": 0.29263332530571834, "grad_norm": 0.218096472040482, "learning_rate": 3.248193363154151e-05, "loss": 0.7312, "step": 1367 }, { "epoch": 0.2928473950389339, "grad_norm": 0.22987206607929475, "learning_rate": 3.2471313647766e-05, "loss": 0.7477, "step": 1368 }, { "epoch": 0.2930614647721495, "grad_norm": 0.2198837848575135, "learning_rate": 3.2460687907254933e-05, "loss": 0.728, "step": 1369 }, { "epoch": 0.29327553450536514, "grad_norm": 0.23854273009753085, "learning_rate": 3.245005641491314e-05, "loss": 0.742, "step": 1370 }, { "epoch": 0.2934896042385807, "grad_norm": 0.33658059803919166, "learning_rate": 3.2439419175648096e-05, "loss": 0.7506, "step": 1371 }, { "epoch": 0.2937036739717963, "grad_norm": 0.3201983548062593, "learning_rate": 3.2428776194369936e-05, "loss": 0.7548, "step": 1372 }, { "epoch": 0.2939177437050119, "grad_norm": 0.22445902352448582, "learning_rate": 3.241812747599143e-05, "loss": 0.7137, "step": 1373 }, { "epoch": 0.29413181343822753, "grad_norm": 0.24163029792772803, "learning_rate": 3.2407473025428014e-05, "loss": 0.717, "step": 1374 }, { "epoch": 0.2943458831714431, "grad_norm": 0.2328587034862239, "learning_rate": 3.239681284759776e-05, "loss": 0.7272, "step": 1375 }, { "epoch": 0.2945599529046587, "grad_norm": 0.20081575381786798, "learning_rate": 3.23861469474214e-05, "loss": 0.7434, "step": 1376 }, { "epoch": 0.2947740226378743, "grad_norm": 0.23837871139042788, "learning_rate": 3.237547532982228e-05, "loss": 0.7267, "step": 1377 }, { "epoch": 0.29498809237108986, "grad_norm": 0.21823564640646656, "learning_rate": 3.2364797999726395e-05, "loss": 0.7141, "step": 1378 }, { "epoch": 0.2952021621043055, "grad_norm": 0.22545105132075569, "learning_rate": 3.2354114962062394e-05, "loss": 0.7179, "step": 1379 }, { "epoch": 0.2954162318375211, "grad_norm": 0.23484867044352178, "learning_rate": 3.234342622176153e-05, "loss": 0.7148, "step": 1380 }, { "epoch": 0.29563030157073666, "grad_norm": 0.2195872391322405, "learning_rate": 3.2332731783757724e-05, "loss": 0.7679, "step": 1381 }, { "epoch": 0.29584437130395225, "grad_norm": 0.21156762066060109, "learning_rate": 3.232203165298751e-05, "loss": 0.7815, "step": 1382 }, { "epoch": 0.29605844103716783, "grad_norm": 0.22245947323364718, "learning_rate": 3.231132583439004e-05, "loss": 0.7411, "step": 1383 }, { "epoch": 0.29627251077038347, "grad_norm": 0.22536809649023096, "learning_rate": 3.2300614332907095e-05, "loss": 0.719, "step": 1384 }, { "epoch": 0.29648658050359905, "grad_norm": 0.19906274829822754, "learning_rate": 3.228989715348309e-05, "loss": 0.7461, "step": 1385 }, { "epoch": 0.29670065023681463, "grad_norm": 0.19402806520790786, "learning_rate": 3.227917430106506e-05, "loss": 0.7315, "step": 1386 }, { "epoch": 0.2969147199700302, "grad_norm": 0.2309421631043973, "learning_rate": 3.2268445780602654e-05, "loss": 0.7407, "step": 1387 }, { "epoch": 0.29712878970324585, "grad_norm": 0.20857649271903783, "learning_rate": 3.225771159704813e-05, "loss": 0.7368, "step": 1388 }, { "epoch": 0.29734285943646144, "grad_norm": 0.2013707317699051, "learning_rate": 3.2246971755356375e-05, "loss": 0.7009, "step": 1389 }, { "epoch": 0.297556929169677, "grad_norm": 0.20239912216816988, "learning_rate": 3.223622626048487e-05, "loss": 0.7168, "step": 1390 }, { "epoch": 0.2977709989028926, "grad_norm": 0.2153847798659455, "learning_rate": 3.222547511739373e-05, "loss": 0.7464, "step": 1391 }, { "epoch": 0.2979850686361082, "grad_norm": 0.19938444912860112, "learning_rate": 3.221471833104565e-05, "loss": 0.7068, "step": 1392 }, { "epoch": 0.2981991383693238, "grad_norm": 0.21451018840175334, "learning_rate": 3.220395590640595e-05, "loss": 0.7129, "step": 1393 }, { "epoch": 0.2984132081025394, "grad_norm": 0.21898499800150237, "learning_rate": 3.219318784844254e-05, "loss": 0.7278, "step": 1394 }, { "epoch": 0.298627277835755, "grad_norm": 0.22131841856705786, "learning_rate": 3.2182414162125945e-05, "loss": 0.7399, "step": 1395 }, { "epoch": 0.2988413475689706, "grad_norm": 0.2024849180619506, "learning_rate": 3.2171634852429274e-05, "loss": 0.7082, "step": 1396 }, { "epoch": 0.2990554173021862, "grad_norm": 0.2145618143912526, "learning_rate": 3.2160849924328234e-05, "loss": 0.7286, "step": 1397 }, { "epoch": 0.2992694870354018, "grad_norm": 0.20660470699343808, "learning_rate": 3.215005938280113e-05, "loss": 0.7246, "step": 1398 }, { "epoch": 0.2994835567686174, "grad_norm": 0.21058362047175624, "learning_rate": 3.213926323282886e-05, "loss": 0.6958, "step": 1399 }, { "epoch": 0.29969762650183296, "grad_norm": 0.20084108245517038, "learning_rate": 3.2128461479394894e-05, "loss": 0.7445, "step": 1400 }, { "epoch": 0.29991169623504854, "grad_norm": 0.2263541092646725, "learning_rate": 3.211765412748532e-05, "loss": 0.7437, "step": 1401 }, { "epoch": 0.3001257659682642, "grad_norm": 0.23875085738584625, "learning_rate": 3.210684118208878e-05, "loss": 0.7201, "step": 1402 }, { "epoch": 0.30033983570147976, "grad_norm": 0.21789335664347195, "learning_rate": 3.209602264819651e-05, "loss": 0.7102, "step": 1403 }, { "epoch": 0.30055390543469535, "grad_norm": 0.2046072580137681, "learning_rate": 3.2085198530802334e-05, "loss": 0.707, "step": 1404 }, { "epoch": 0.30076797516791093, "grad_norm": 0.2067969290194415, "learning_rate": 3.207436883490264e-05, "loss": 0.7162, "step": 1405 }, { "epoch": 0.30098204490112657, "grad_norm": 0.2223918694230222, "learning_rate": 3.206353356549639e-05, "loss": 0.696, "step": 1406 }, { "epoch": 0.30119611463434215, "grad_norm": 0.20285869745100288, "learning_rate": 3.205269272758513e-05, "loss": 0.7228, "step": 1407 }, { "epoch": 0.30141018436755773, "grad_norm": 0.22057104093564195, "learning_rate": 3.204184632617297e-05, "loss": 0.7402, "step": 1408 }, { "epoch": 0.3016242541007733, "grad_norm": 0.21719095695822196, "learning_rate": 3.2030994366266597e-05, "loss": 0.7178, "step": 1409 }, { "epoch": 0.3018383238339889, "grad_norm": 0.27094642527673257, "learning_rate": 3.202013685287524e-05, "loss": 0.7317, "step": 1410 }, { "epoch": 0.30205239356720454, "grad_norm": 0.2062133401939227, "learning_rate": 3.2009273791010715e-05, "loss": 0.7319, "step": 1411 }, { "epoch": 0.3022664633004201, "grad_norm": 0.2090566384014724, "learning_rate": 3.199840518568739e-05, "loss": 0.7122, "step": 1412 }, { "epoch": 0.3024805330336357, "grad_norm": 0.2214651222116033, "learning_rate": 3.1987531041922205e-05, "loss": 0.7534, "step": 1413 }, { "epoch": 0.3026946027668513, "grad_norm": 0.2072693638819392, "learning_rate": 3.197665136473463e-05, "loss": 0.7248, "step": 1414 }, { "epoch": 0.3029086725000669, "grad_norm": 0.21247821692980245, "learning_rate": 3.196576615914671e-05, "loss": 0.7134, "step": 1415 }, { "epoch": 0.3031227422332825, "grad_norm": 0.2061763925201024, "learning_rate": 3.195487543018302e-05, "loss": 0.7583, "step": 1416 }, { "epoch": 0.3033368119664981, "grad_norm": 0.20360764829818256, "learning_rate": 3.1943979182870734e-05, "loss": 0.7353, "step": 1417 }, { "epoch": 0.30355088169971367, "grad_norm": 0.20246447907897855, "learning_rate": 3.193307742223952e-05, "loss": 0.6982, "step": 1418 }, { "epoch": 0.30376495143292925, "grad_norm": 0.19760088589285252, "learning_rate": 3.192217015332161e-05, "loss": 0.722, "step": 1419 }, { "epoch": 0.3039790211661449, "grad_norm": 0.22099084344142367, "learning_rate": 3.191125738115178e-05, "loss": 0.7389, "step": 1420 }, { "epoch": 0.3041930908993605, "grad_norm": 0.205468337901139, "learning_rate": 3.190033911076735e-05, "loss": 0.7299, "step": 1421 }, { "epoch": 0.30440716063257606, "grad_norm": 0.19366753031949716, "learning_rate": 3.1889415347208164e-05, "loss": 0.7193, "step": 1422 }, { "epoch": 0.30462123036579164, "grad_norm": 0.2279557283397567, "learning_rate": 3.1878486095516624e-05, "loss": 0.7141, "step": 1423 }, { "epoch": 0.3048353000990073, "grad_norm": 0.22028831102493454, "learning_rate": 3.186755136073765e-05, "loss": 0.7274, "step": 1424 }, { "epoch": 0.30504936983222286, "grad_norm": 0.2020719104817722, "learning_rate": 3.1856611147918684e-05, "loss": 0.7481, "step": 1425 }, { "epoch": 0.30526343956543844, "grad_norm": 0.21945753089833947, "learning_rate": 3.184566546210972e-05, "loss": 0.7186, "step": 1426 }, { "epoch": 0.305477509298654, "grad_norm": 0.21162314149336317, "learning_rate": 3.1834714308363266e-05, "loss": 0.7159, "step": 1427 }, { "epoch": 0.3056915790318696, "grad_norm": 0.21626714707264866, "learning_rate": 3.182375769173435e-05, "loss": 0.7268, "step": 1428 }, { "epoch": 0.30590564876508525, "grad_norm": 0.38973557982804796, "learning_rate": 3.1812795617280527e-05, "loss": 0.7147, "step": 1429 }, { "epoch": 0.30611971849830083, "grad_norm": 0.21473123297218608, "learning_rate": 3.180182809006187e-05, "loss": 0.6822, "step": 1430 }, { "epoch": 0.3063337882315164, "grad_norm": 0.22805434357482182, "learning_rate": 3.1790855115140974e-05, "loss": 0.7192, "step": 1431 }, { "epoch": 0.306547857964732, "grad_norm": 0.23933935403155096, "learning_rate": 3.177987669758293e-05, "loss": 0.7408, "step": 1432 }, { "epoch": 0.30676192769794763, "grad_norm": 0.2483779401696868, "learning_rate": 3.176889284245538e-05, "loss": 0.7529, "step": 1433 }, { "epoch": 0.3069759974311632, "grad_norm": 0.24123188356015066, "learning_rate": 3.175790355482844e-05, "loss": 0.7475, "step": 1434 }, { "epoch": 0.3071900671643788, "grad_norm": 0.218235722605162, "learning_rate": 3.174690883977473e-05, "loss": 0.7322, "step": 1435 }, { "epoch": 0.3074041368975944, "grad_norm": 0.21883476938584442, "learning_rate": 3.1735908702369414e-05, "loss": 0.728, "step": 1436 }, { "epoch": 0.30761820663080996, "grad_norm": 0.25188122569352345, "learning_rate": 3.1724903147690115e-05, "loss": 0.7173, "step": 1437 }, { "epoch": 0.3078322763640256, "grad_norm": 0.22963360914059594, "learning_rate": 3.171389218081699e-05, "loss": 0.722, "step": 1438 }, { "epoch": 0.3080463460972412, "grad_norm": 0.24033502472741244, "learning_rate": 3.170287580683268e-05, "loss": 0.7242, "step": 1439 }, { "epoch": 0.30826041583045677, "grad_norm": 0.457217600022294, "learning_rate": 3.169185403082232e-05, "loss": 0.7212, "step": 1440 }, { "epoch": 0.30847448556367235, "grad_norm": 0.1963853889286967, "learning_rate": 3.1680826857873534e-05, "loss": 0.725, "step": 1441 }, { "epoch": 0.30868855529688793, "grad_norm": 0.2202312016687697, "learning_rate": 3.166979429307646e-05, "loss": 0.7314, "step": 1442 }, { "epoch": 0.3089026250301036, "grad_norm": 0.22396438931842594, "learning_rate": 3.165875634152371e-05, "loss": 0.7699, "step": 1443 }, { "epoch": 0.30911669476331916, "grad_norm": 0.20023765292935752, "learning_rate": 3.1647713008310356e-05, "loss": 0.7187, "step": 1444 }, { "epoch": 0.30933076449653474, "grad_norm": 0.21390753427895953, "learning_rate": 3.1636664298534014e-05, "loss": 0.7523, "step": 1445 }, { "epoch": 0.3095448342297503, "grad_norm": 0.22016320878320222, "learning_rate": 3.1625610217294734e-05, "loss": 0.7384, "step": 1446 }, { "epoch": 0.30975890396296596, "grad_norm": 0.2044759879767915, "learning_rate": 3.1614550769695055e-05, "loss": 0.7513, "step": 1447 }, { "epoch": 0.30997297369618154, "grad_norm": 0.20787685688090424, "learning_rate": 3.160348596084e-05, "loss": 0.7074, "step": 1448 }, { "epoch": 0.3101870434293971, "grad_norm": 0.23711949530366647, "learning_rate": 3.159241579583707e-05, "loss": 0.7476, "step": 1449 }, { "epoch": 0.3104011131626127, "grad_norm": 0.20747050361190908, "learning_rate": 3.158134027979623e-05, "loss": 0.7212, "step": 1450 }, { "epoch": 0.3106151828958283, "grad_norm": 0.20995264854875686, "learning_rate": 3.1570259417829914e-05, "loss": 0.7285, "step": 1451 }, { "epoch": 0.31082925262904393, "grad_norm": 0.21245624472761282, "learning_rate": 3.155917321505303e-05, "loss": 0.6909, "step": 1452 }, { "epoch": 0.3110433223622595, "grad_norm": 0.21659310287205993, "learning_rate": 3.1548081676582954e-05, "loss": 0.6987, "step": 1453 }, { "epoch": 0.3112573920954751, "grad_norm": 0.21758114627850686, "learning_rate": 3.153698480753952e-05, "loss": 0.7438, "step": 1454 }, { "epoch": 0.3114714618286907, "grad_norm": 0.19922319084931434, "learning_rate": 3.152588261304501e-05, "loss": 0.7385, "step": 1455 }, { "epoch": 0.3116855315619063, "grad_norm": 0.2016783810836013, "learning_rate": 3.151477509822418e-05, "loss": 0.7229, "step": 1456 }, { "epoch": 0.3118996012951219, "grad_norm": 0.22794981419350388, "learning_rate": 3.150366226820426e-05, "loss": 0.7301, "step": 1457 }, { "epoch": 0.3121136710283375, "grad_norm": 0.21499412039554525, "learning_rate": 3.1492544128114876e-05, "loss": 0.6997, "step": 1458 }, { "epoch": 0.31232774076155306, "grad_norm": 0.21843695096587704, "learning_rate": 3.1481420683088177e-05, "loss": 0.7284, "step": 1459 }, { "epoch": 0.31254181049476865, "grad_norm": 0.1971221300159341, "learning_rate": 3.14702919382587e-05, "loss": 0.7377, "step": 1460 }, { "epoch": 0.3127558802279843, "grad_norm": 0.240978737107255, "learning_rate": 3.145915789876346e-05, "loss": 0.7056, "step": 1461 }, { "epoch": 0.31296994996119987, "grad_norm": 0.21248182584074676, "learning_rate": 3.1448018569741916e-05, "loss": 0.7327, "step": 1462 }, { "epoch": 0.31318401969441545, "grad_norm": 0.22093395579421116, "learning_rate": 3.143687395633595e-05, "loss": 0.7275, "step": 1463 }, { "epoch": 0.31339808942763103, "grad_norm": 0.20231584780727468, "learning_rate": 3.1425724063689903e-05, "loss": 0.6969, "step": 1464 }, { "epoch": 0.31361215916084667, "grad_norm": 0.22308283661802453, "learning_rate": 3.141456889695055e-05, "loss": 0.711, "step": 1465 }, { "epoch": 0.31382622889406225, "grad_norm": 0.20868971832779562, "learning_rate": 3.1403408461267086e-05, "loss": 0.7506, "step": 1466 }, { "epoch": 0.31404029862727784, "grad_norm": 0.22523386937803858, "learning_rate": 3.139224276179115e-05, "loss": 0.7446, "step": 1467 }, { "epoch": 0.3142543683604934, "grad_norm": 0.19954737089394523, "learning_rate": 3.138107180367682e-05, "loss": 0.7112, "step": 1468 }, { "epoch": 0.314468438093709, "grad_norm": 0.2542268071847943, "learning_rate": 3.136989559208056e-05, "loss": 0.7365, "step": 1469 }, { "epoch": 0.31468250782692464, "grad_norm": 0.20895933764763175, "learning_rate": 3.135871413216132e-05, "loss": 0.7755, "step": 1470 }, { "epoch": 0.3148965775601402, "grad_norm": 0.228118964844556, "learning_rate": 3.134752742908043e-05, "loss": 0.7356, "step": 1471 }, { "epoch": 0.3151106472933558, "grad_norm": 0.22647240274599476, "learning_rate": 3.133633548800165e-05, "loss": 0.7199, "step": 1472 }, { "epoch": 0.3153247170265714, "grad_norm": 0.2475068220750008, "learning_rate": 3.132513831409116e-05, "loss": 0.7512, "step": 1473 }, { "epoch": 0.315538786759787, "grad_norm": 0.20471878775694313, "learning_rate": 3.131393591251755e-05, "loss": 0.7499, "step": 1474 }, { "epoch": 0.3157528564930026, "grad_norm": 0.2570118035800016, "learning_rate": 3.130272828845184e-05, "loss": 0.7217, "step": 1475 }, { "epoch": 0.3159669262262182, "grad_norm": 0.25695595430743884, "learning_rate": 3.129151544706744e-05, "loss": 0.715, "step": 1476 }, { "epoch": 0.3161809959594338, "grad_norm": 0.20673248374703146, "learning_rate": 3.1280297393540185e-05, "loss": 0.7495, "step": 1477 }, { "epoch": 0.31639506569264936, "grad_norm": 0.23730695452889797, "learning_rate": 3.12690741330483e-05, "loss": 0.7295, "step": 1478 }, { "epoch": 0.316609135425865, "grad_norm": 0.20127704834085067, "learning_rate": 3.125784567077242e-05, "loss": 0.7148, "step": 1479 }, { "epoch": 0.3168232051590806, "grad_norm": 0.22574231162441197, "learning_rate": 3.1246612011895595e-05, "loss": 0.7301, "step": 1480 }, { "epoch": 0.31703727489229616, "grad_norm": 0.204367347369339, "learning_rate": 3.123537316160324e-05, "loss": 0.7357, "step": 1481 }, { "epoch": 0.31725134462551174, "grad_norm": 0.2250067446805083, "learning_rate": 3.122412912508321e-05, "loss": 0.7463, "step": 1482 }, { "epoch": 0.3174654143587274, "grad_norm": 0.20892073965372557, "learning_rate": 3.121287990752572e-05, "loss": 0.7279, "step": 1483 }, { "epoch": 0.31767948409194297, "grad_norm": 0.2037437423808741, "learning_rate": 3.120162551412339e-05, "loss": 0.7483, "step": 1484 }, { "epoch": 0.31789355382515855, "grad_norm": 0.20411669175473196, "learning_rate": 3.119036595007123e-05, "loss": 0.7178, "step": 1485 }, { "epoch": 0.31810762355837413, "grad_norm": 0.21084258377580037, "learning_rate": 3.117910122056663e-05, "loss": 0.7431, "step": 1486 }, { "epoch": 0.3183216932915897, "grad_norm": 0.21409215492256983, "learning_rate": 3.1167831330809376e-05, "loss": 0.7326, "step": 1487 }, { "epoch": 0.31853576302480535, "grad_norm": 0.23379332085474894, "learning_rate": 3.1156556286001615e-05, "loss": 0.7116, "step": 1488 }, { "epoch": 0.31874983275802093, "grad_norm": 0.24796122337162388, "learning_rate": 3.1145276091347905e-05, "loss": 0.765, "step": 1489 }, { "epoch": 0.3189639024912365, "grad_norm": 0.2145407362714362, "learning_rate": 3.1133990752055146e-05, "loss": 0.7162, "step": 1490 }, { "epoch": 0.3191779722244521, "grad_norm": 0.23883686076081942, "learning_rate": 3.112270027333263e-05, "loss": 0.735, "step": 1491 }, { "epoch": 0.3193920419576677, "grad_norm": 0.22184388987701545, "learning_rate": 3.111140466039205e-05, "loss": 0.7159, "step": 1492 }, { "epoch": 0.3196061116908833, "grad_norm": 0.2412817705565827, "learning_rate": 3.1100103918447405e-05, "loss": 0.717, "step": 1493 }, { "epoch": 0.3198201814240989, "grad_norm": 0.21485345792419652, "learning_rate": 3.1088798052715117e-05, "loss": 0.7485, "step": 1494 }, { "epoch": 0.3200342511573145, "grad_norm": 0.24883652890398836, "learning_rate": 3.1077487068413936e-05, "loss": 0.6953, "step": 1495 }, { "epoch": 0.32024832089053007, "grad_norm": 0.24956105496228143, "learning_rate": 3.1066170970765015e-05, "loss": 0.7063, "step": 1496 }, { "epoch": 0.3204623906237457, "grad_norm": 0.21042698842894736, "learning_rate": 3.105484976499182e-05, "loss": 0.7073, "step": 1497 }, { "epoch": 0.3206764603569613, "grad_norm": 0.23898746630695497, "learning_rate": 3.104352345632022e-05, "loss": 0.7297, "step": 1498 }, { "epoch": 0.3208905300901769, "grad_norm": 0.23001509225970712, "learning_rate": 3.10321920499784e-05, "loss": 0.7494, "step": 1499 }, { "epoch": 0.32110459982339246, "grad_norm": 0.21966970587762638, "learning_rate": 3.1020855551196936e-05, "loss": 0.7466, "step": 1500 }, { "epoch": 0.32131866955660804, "grad_norm": 0.23563745853359952, "learning_rate": 3.100951396520871e-05, "loss": 0.7387, "step": 1501 }, { "epoch": 0.3215327392898237, "grad_norm": 0.24266907369743057, "learning_rate": 3.0998167297249e-05, "loss": 0.7537, "step": 1502 }, { "epoch": 0.32174680902303926, "grad_norm": 0.1944517361151009, "learning_rate": 3.09868155525554e-05, "loss": 0.7026, "step": 1503 }, { "epoch": 0.32196087875625484, "grad_norm": 0.22434107310309512, "learning_rate": 3.097545873636785e-05, "loss": 0.7089, "step": 1504 }, { "epoch": 0.3221749484894704, "grad_norm": 0.20815115398118045, "learning_rate": 3.096409685392864e-05, "loss": 0.715, "step": 1505 }, { "epoch": 0.32238901822268606, "grad_norm": 0.22420600533564186, "learning_rate": 3.095272991048239e-05, "loss": 0.7134, "step": 1506 }, { "epoch": 0.32260308795590165, "grad_norm": 0.2536604211257573, "learning_rate": 3.0941357911276064e-05, "loss": 0.7251, "step": 1507 }, { "epoch": 0.32281715768911723, "grad_norm": 0.23107524665004686, "learning_rate": 3.0929980861558955e-05, "loss": 0.7004, "step": 1508 }, { "epoch": 0.3230312274223328, "grad_norm": 0.19131080314353968, "learning_rate": 3.091859876658269e-05, "loss": 0.7288, "step": 1509 }, { "epoch": 0.3232452971555484, "grad_norm": 0.21450084832077976, "learning_rate": 3.090721163160122e-05, "loss": 0.7124, "step": 1510 }, { "epoch": 0.32345936688876403, "grad_norm": 0.2141686093064143, "learning_rate": 3.0895819461870825e-05, "loss": 0.7397, "step": 1511 }, { "epoch": 0.3236734366219796, "grad_norm": 0.22980922312876642, "learning_rate": 3.088442226265012e-05, "loss": 0.7166, "step": 1512 }, { "epoch": 0.3238875063551952, "grad_norm": 0.21599365474216994, "learning_rate": 3.0873020039200016e-05, "loss": 0.6909, "step": 1513 }, { "epoch": 0.3241015760884108, "grad_norm": 0.2261707207767992, "learning_rate": 3.086161279678377e-05, "loss": 0.7466, "step": 1514 }, { "epoch": 0.3243156458216264, "grad_norm": 0.20137945351047606, "learning_rate": 3.085020054066694e-05, "loss": 0.7189, "step": 1515 }, { "epoch": 0.324529715554842, "grad_norm": 0.23206221114248038, "learning_rate": 3.08387832761174e-05, "loss": 0.7089, "step": 1516 }, { "epoch": 0.3247437852880576, "grad_norm": 0.2141593987780817, "learning_rate": 3.082736100840534e-05, "loss": 0.7198, "step": 1517 }, { "epoch": 0.32495785502127317, "grad_norm": 0.20897205366311827, "learning_rate": 3.081593374280326e-05, "loss": 0.7159, "step": 1518 }, { "epoch": 0.32517192475448875, "grad_norm": 0.2127990393603389, "learning_rate": 3.0804501484585966e-05, "loss": 0.7026, "step": 1519 }, { "epoch": 0.3253859944877044, "grad_norm": 0.23938984632207602, "learning_rate": 3.0793064239030566e-05, "loss": 0.7144, "step": 1520 }, { "epoch": 0.32560006422091997, "grad_norm": 0.244064604982906, "learning_rate": 3.078162201141646e-05, "loss": 0.7148, "step": 1521 }, { "epoch": 0.32581413395413555, "grad_norm": 0.22934179594192933, "learning_rate": 3.077017480702538e-05, "loss": 0.742, "step": 1522 }, { "epoch": 0.32602820368735114, "grad_norm": 0.23353887242018262, "learning_rate": 3.0758722631141326e-05, "loss": 0.7534, "step": 1523 }, { "epoch": 0.3262422734205668, "grad_norm": 0.21107060960629914, "learning_rate": 3.07472654890506e-05, "loss": 0.7264, "step": 1524 }, { "epoch": 0.32645634315378236, "grad_norm": 0.22750805242706576, "learning_rate": 3.073580338604179e-05, "loss": 0.7269, "step": 1525 }, { "epoch": 0.32667041288699794, "grad_norm": 0.20500705537461428, "learning_rate": 3.07243363274058e-05, "loss": 0.7135, "step": 1526 }, { "epoch": 0.3268844826202135, "grad_norm": 0.2033217865487313, "learning_rate": 3.0712864318435786e-05, "loss": 0.7039, "step": 1527 }, { "epoch": 0.3270985523534291, "grad_norm": 0.2281544332583003, "learning_rate": 3.070138736442721e-05, "loss": 0.7254, "step": 1528 }, { "epoch": 0.32731262208664474, "grad_norm": 0.23078568100556765, "learning_rate": 3.068990547067783e-05, "loss": 0.7495, "step": 1529 }, { "epoch": 0.3275266918198603, "grad_norm": 0.21760372451945423, "learning_rate": 3.067841864248764e-05, "loss": 0.7177, "step": 1530 }, { "epoch": 0.3277407615530759, "grad_norm": 0.2162578028839833, "learning_rate": 3.066692688515896e-05, "loss": 0.7241, "step": 1531 }, { "epoch": 0.3279548312862915, "grad_norm": 0.23443380671489752, "learning_rate": 3.065543020399635e-05, "loss": 0.7417, "step": 1532 }, { "epoch": 0.32816890101950713, "grad_norm": 0.32337526862754307, "learning_rate": 3.064392860430666e-05, "loss": 0.7274, "step": 1533 }, { "epoch": 0.3283829707527227, "grad_norm": 0.21848406390806313, "learning_rate": 3.0632422091399024e-05, "loss": 0.7641, "step": 1534 }, { "epoch": 0.3285970404859383, "grad_norm": 0.22945002660888902, "learning_rate": 3.062091067058481e-05, "loss": 0.7479, "step": 1535 }, { "epoch": 0.3288111102191539, "grad_norm": 0.24754497404511555, "learning_rate": 3.0609394347177665e-05, "loss": 0.7162, "step": 1536 }, { "epoch": 0.32902517995236946, "grad_norm": 0.22515973375379672, "learning_rate": 3.0597873126493515e-05, "loss": 0.706, "step": 1537 }, { "epoch": 0.3292392496855851, "grad_norm": 0.19490951044382684, "learning_rate": 3.058634701385053e-05, "loss": 0.7108, "step": 1538 }, { "epoch": 0.3294533194188007, "grad_norm": 0.2701935725606944, "learning_rate": 3.057481601456915e-05, "loss": 0.7377, "step": 1539 }, { "epoch": 0.32966738915201627, "grad_norm": 0.2708519020796206, "learning_rate": 3.056328013397205e-05, "loss": 0.7319, "step": 1540 }, { "epoch": 0.32988145888523185, "grad_norm": 0.24208851137501558, "learning_rate": 3.0551739377384174e-05, "loss": 0.716, "step": 1541 }, { "epoch": 0.3300955286184475, "grad_norm": 0.26409441978526554, "learning_rate": 3.0540193750132714e-05, "loss": 0.732, "step": 1542 }, { "epoch": 0.33030959835166307, "grad_norm": 0.2671257890040448, "learning_rate": 3.052864325754712e-05, "loss": 0.7395, "step": 1543 }, { "epoch": 0.33052366808487865, "grad_norm": 0.21147023656331912, "learning_rate": 3.0517087904959068e-05, "loss": 0.7486, "step": 1544 }, { "epoch": 0.33073773781809424, "grad_norm": 0.23720722371685085, "learning_rate": 3.0505527697702497e-05, "loss": 0.7379, "step": 1545 }, { "epoch": 0.3309518075513098, "grad_norm": 0.24633335731519487, "learning_rate": 3.049396264111357e-05, "loss": 0.7073, "step": 1546 }, { "epoch": 0.33116587728452546, "grad_norm": 0.23511021355438164, "learning_rate": 3.0482392740530697e-05, "loss": 0.7123, "step": 1547 }, { "epoch": 0.33137994701774104, "grad_norm": 0.25273896051040157, "learning_rate": 3.0470818001294516e-05, "loss": 0.7489, "step": 1548 }, { "epoch": 0.3315940167509566, "grad_norm": 0.2844954501216773, "learning_rate": 3.0459238428747927e-05, "loss": 0.7388, "step": 1549 }, { "epoch": 0.3318080864841722, "grad_norm": 0.22700294434235596, "learning_rate": 3.0447654028236013e-05, "loss": 0.7464, "step": 1550 }, { "epoch": 0.3320221562173878, "grad_norm": 0.24139343046404554, "learning_rate": 3.0436064805106134e-05, "loss": 0.6965, "step": 1551 }, { "epoch": 0.3322362259506034, "grad_norm": 0.28762797583938526, "learning_rate": 3.0424470764707838e-05, "loss": 0.7248, "step": 1552 }, { "epoch": 0.332450295683819, "grad_norm": 0.24719411986259368, "learning_rate": 3.041287191239293e-05, "loss": 0.7212, "step": 1553 }, { "epoch": 0.3326643654170346, "grad_norm": 0.25059962292528026, "learning_rate": 3.0401268253515398e-05, "loss": 0.7422, "step": 1554 }, { "epoch": 0.3328784351502502, "grad_norm": 0.23228865151268058, "learning_rate": 3.0389659793431482e-05, "loss": 0.7295, "step": 1555 }, { "epoch": 0.3330925048834658, "grad_norm": 0.20578321531990376, "learning_rate": 3.0378046537499622e-05, "loss": 0.6944, "step": 1556 }, { "epoch": 0.3333065746166814, "grad_norm": 0.22639186641619344, "learning_rate": 3.0366428491080485e-05, "loss": 0.7351, "step": 1557 }, { "epoch": 0.333520644349897, "grad_norm": 0.23255840572300687, "learning_rate": 3.035480565953693e-05, "loss": 0.7526, "step": 1558 }, { "epoch": 0.33373471408311256, "grad_norm": 0.1949740285978128, "learning_rate": 3.0343178048234045e-05, "loss": 0.7295, "step": 1559 }, { "epoch": 0.33394878381632814, "grad_norm": 0.21275472679390295, "learning_rate": 3.0331545662539094e-05, "loss": 0.7225, "step": 1560 }, { "epoch": 0.3341628535495438, "grad_norm": 0.22510941658463754, "learning_rate": 3.0319908507821588e-05, "loss": 0.7407, "step": 1561 }, { "epoch": 0.33437692328275936, "grad_norm": 0.18169833239885208, "learning_rate": 3.0308266589453202e-05, "loss": 0.73, "step": 1562 }, { "epoch": 0.33459099301597495, "grad_norm": 0.2094184274202531, "learning_rate": 3.029661991280783e-05, "loss": 0.7226, "step": 1563 }, { "epoch": 0.33480506274919053, "grad_norm": 0.2242852324279592, "learning_rate": 3.028496848326155e-05, "loss": 0.7106, "step": 1564 }, { "epoch": 0.33501913248240617, "grad_norm": 0.2275744277057065, "learning_rate": 3.0273312306192656e-05, "loss": 0.7214, "step": 1565 }, { "epoch": 0.33523320221562175, "grad_norm": 0.21437923131620099, "learning_rate": 3.0261651386981596e-05, "loss": 0.7013, "step": 1566 }, { "epoch": 0.33544727194883733, "grad_norm": 0.2301184013271844, "learning_rate": 3.0249985731011045e-05, "loss": 0.7553, "step": 1567 }, { "epoch": 0.3356613416820529, "grad_norm": 0.20484283659264574, "learning_rate": 3.0238315343665843e-05, "loss": 0.7375, "step": 1568 }, { "epoch": 0.3358754114152685, "grad_norm": 0.19523627877554567, "learning_rate": 3.0226640230333025e-05, "loss": 0.7475, "step": 1569 }, { "epoch": 0.33608948114848414, "grad_norm": 0.20356523622286868, "learning_rate": 3.0214960396401792e-05, "loss": 0.7179, "step": 1570 }, { "epoch": 0.3363035508816997, "grad_norm": 0.21115453230974598, "learning_rate": 3.020327584726354e-05, "loss": 0.7487, "step": 1571 }, { "epoch": 0.3365176206149153, "grad_norm": 0.20746541444589975, "learning_rate": 3.0191586588311835e-05, "loss": 0.7315, "step": 1572 }, { "epoch": 0.3367316903481309, "grad_norm": 0.20301453422715285, "learning_rate": 3.0179892624942427e-05, "loss": 0.7308, "step": 1573 }, { "epoch": 0.3369457600813465, "grad_norm": 0.20402259252972538, "learning_rate": 3.0168193962553202e-05, "loss": 0.7228, "step": 1574 }, { "epoch": 0.3371598298145621, "grad_norm": 0.20037789819760432, "learning_rate": 3.0156490606544265e-05, "loss": 0.7349, "step": 1575 }, { "epoch": 0.3373738995477777, "grad_norm": 0.18538529041338164, "learning_rate": 3.014478256231786e-05, "loss": 0.6992, "step": 1576 }, { "epoch": 0.33758796928099327, "grad_norm": 0.21455149238360505, "learning_rate": 3.013306983527839e-05, "loss": 0.7546, "step": 1577 }, { "epoch": 0.33780203901420885, "grad_norm": 0.21237178866595172, "learning_rate": 3.0121352430832434e-05, "loss": 0.7366, "step": 1578 }, { "epoch": 0.3380161087474245, "grad_norm": 0.19002221689288828, "learning_rate": 3.0109630354388725e-05, "loss": 0.7053, "step": 1579 }, { "epoch": 0.3382301784806401, "grad_norm": 0.20220659300897512, "learning_rate": 3.0097903611358146e-05, "loss": 0.7148, "step": 1580 }, { "epoch": 0.33844424821385566, "grad_norm": 0.19654587916224117, "learning_rate": 3.0086172207153752e-05, "loss": 0.7082, "step": 1581 }, { "epoch": 0.33865831794707124, "grad_norm": 0.21760388876692274, "learning_rate": 3.0074436147190728e-05, "loss": 0.7171, "step": 1582 }, { "epoch": 0.3388723876802869, "grad_norm": 0.18985867130739387, "learning_rate": 3.0062695436886424e-05, "loss": 0.7246, "step": 1583 }, { "epoch": 0.33908645741350246, "grad_norm": 0.1983141633307748, "learning_rate": 3.0050950081660316e-05, "loss": 0.6926, "step": 1584 }, { "epoch": 0.33930052714671805, "grad_norm": 0.20311032549877156, "learning_rate": 3.0039200086934063e-05, "loss": 0.7479, "step": 1585 }, { "epoch": 0.33951459687993363, "grad_norm": 0.20611254761842612, "learning_rate": 3.0027445458131413e-05, "loss": 0.7433, "step": 1586 }, { "epoch": 0.3397286666131492, "grad_norm": 0.19158516904924683, "learning_rate": 3.001568620067831e-05, "loss": 0.7378, "step": 1587 }, { "epoch": 0.33994273634636485, "grad_norm": 0.22177020322419674, "learning_rate": 3.0003922320002786e-05, "loss": 0.7222, "step": 1588 }, { "epoch": 0.34015680607958043, "grad_norm": 0.2250376877792279, "learning_rate": 2.9992153821535028e-05, "loss": 0.7738, "step": 1589 }, { "epoch": 0.340370875812796, "grad_norm": 0.19034483792478848, "learning_rate": 2.9980380710707355e-05, "loss": 0.7353, "step": 1590 }, { "epoch": 0.3405849455460116, "grad_norm": 0.2705908427950601, "learning_rate": 2.9968602992954222e-05, "loss": 0.7323, "step": 1591 }, { "epoch": 0.34079901527922724, "grad_norm": 0.190363268341525, "learning_rate": 2.9956820673712194e-05, "loss": 0.7298, "step": 1592 }, { "epoch": 0.3410130850124428, "grad_norm": 0.2257903050391308, "learning_rate": 2.994503375841997e-05, "loss": 0.766, "step": 1593 }, { "epoch": 0.3412271547456584, "grad_norm": 0.22424147399106226, "learning_rate": 2.993324225251837e-05, "loss": 0.7222, "step": 1594 }, { "epoch": 0.341441224478874, "grad_norm": 0.21605957952365637, "learning_rate": 2.9921446161450328e-05, "loss": 0.7341, "step": 1595 }, { "epoch": 0.34165529421208957, "grad_norm": 0.20885516199045895, "learning_rate": 2.9909645490660896e-05, "loss": 0.7375, "step": 1596 }, { "epoch": 0.3418693639453052, "grad_norm": 0.20602752803738508, "learning_rate": 2.989784024559725e-05, "loss": 0.7619, "step": 1597 }, { "epoch": 0.3420834336785208, "grad_norm": 0.211005366778882, "learning_rate": 2.9886030431708665e-05, "loss": 0.7055, "step": 1598 }, { "epoch": 0.34229750341173637, "grad_norm": 0.2079197836533538, "learning_rate": 2.9874216054446532e-05, "loss": 0.7363, "step": 1599 }, { "epoch": 0.34251157314495195, "grad_norm": 0.22557406416231265, "learning_rate": 2.986239711926434e-05, "loss": 0.7307, "step": 1600 }, { "epoch": 0.34272564287816754, "grad_norm": 0.209959893905052, "learning_rate": 2.985057363161769e-05, "loss": 0.7325, "step": 1601 }, { "epoch": 0.3429397126113832, "grad_norm": 0.20610022811042758, "learning_rate": 2.9838745596964287e-05, "loss": 0.7328, "step": 1602 }, { "epoch": 0.34315378234459876, "grad_norm": 0.21951783081430787, "learning_rate": 2.982691302076393e-05, "loss": 0.7489, "step": 1603 }, { "epoch": 0.34336785207781434, "grad_norm": 0.21431566804952354, "learning_rate": 2.9815075908478506e-05, "loss": 0.7282, "step": 1604 }, { "epoch": 0.3435819218110299, "grad_norm": 0.20514990108713682, "learning_rate": 2.980323426557201e-05, "loss": 0.745, "step": 1605 }, { "epoch": 0.34379599154424556, "grad_norm": 0.19848649229970577, "learning_rate": 2.9791388097510526e-05, "loss": 0.7113, "step": 1606 }, { "epoch": 0.34401006127746114, "grad_norm": 0.1990950695705066, "learning_rate": 2.9779537409762223e-05, "loss": 0.7141, "step": 1607 }, { "epoch": 0.3442241310106767, "grad_norm": 0.20268547835912892, "learning_rate": 2.9767682207797345e-05, "loss": 0.7089, "step": 1608 }, { "epoch": 0.3444382007438923, "grad_norm": 0.24099431488848325, "learning_rate": 2.975582249708825e-05, "loss": 0.7484, "step": 1609 }, { "epoch": 0.3446522704771079, "grad_norm": 0.19793629203445218, "learning_rate": 2.974395828310934e-05, "loss": 0.7225, "step": 1610 }, { "epoch": 0.34486634021032353, "grad_norm": 0.20548925189030023, "learning_rate": 2.9732089571337126e-05, "loss": 0.6875, "step": 1611 }, { "epoch": 0.3450804099435391, "grad_norm": 0.23042418777640006, "learning_rate": 2.9720216367250187e-05, "loss": 0.7027, "step": 1612 }, { "epoch": 0.3452944796767547, "grad_norm": 0.21771843664066826, "learning_rate": 2.970833867632916e-05, "loss": 0.7416, "step": 1613 }, { "epoch": 0.3455085494099703, "grad_norm": 0.20521482357688642, "learning_rate": 2.9696456504056773e-05, "loss": 0.6956, "step": 1614 }, { "epoch": 0.3457226191431859, "grad_norm": 0.233271321921815, "learning_rate": 2.9684569855917817e-05, "loss": 0.7205, "step": 1615 }, { "epoch": 0.3459366888764015, "grad_norm": 0.21040558142057814, "learning_rate": 2.967267873739914e-05, "loss": 0.7415, "step": 1616 }, { "epoch": 0.3461507586096171, "grad_norm": 0.21114989102574602, "learning_rate": 2.9660783153989664e-05, "loss": 0.7196, "step": 1617 }, { "epoch": 0.34636482834283266, "grad_norm": 0.24747773642343213, "learning_rate": 2.9648883111180376e-05, "loss": 0.7414, "step": 1618 }, { "epoch": 0.34657889807604825, "grad_norm": 0.21550307981218678, "learning_rate": 2.9636978614464298e-05, "loss": 0.6899, "step": 1619 }, { "epoch": 0.3467929678092639, "grad_norm": 0.19876871136787375, "learning_rate": 2.962506966933654e-05, "loss": 0.704, "step": 1620 }, { "epoch": 0.34700703754247947, "grad_norm": 0.24248305331461964, "learning_rate": 2.9613156281294234e-05, "loss": 0.7251, "step": 1621 }, { "epoch": 0.34722110727569505, "grad_norm": 0.23234292968903417, "learning_rate": 2.9601238455836592e-05, "loss": 0.7362, "step": 1622 }, { "epoch": 0.34743517700891063, "grad_norm": 0.19450745856981094, "learning_rate": 2.9589316198464853e-05, "loss": 0.7002, "step": 1623 }, { "epoch": 0.34764924674212627, "grad_norm": 0.23514572854965687, "learning_rate": 2.957738951468231e-05, "loss": 0.7314, "step": 1624 }, { "epoch": 0.34786331647534185, "grad_norm": 0.21443200178723576, "learning_rate": 2.95654584099943e-05, "loss": 0.7081, "step": 1625 }, { "epoch": 0.34807738620855744, "grad_norm": 0.2000731915774815, "learning_rate": 2.9553522889908194e-05, "loss": 0.6902, "step": 1626 }, { "epoch": 0.348291455941773, "grad_norm": 0.22442441262696242, "learning_rate": 2.9541582959933416e-05, "loss": 0.7183, "step": 1627 }, { "epoch": 0.3485055256749886, "grad_norm": 0.20769996794973836, "learning_rate": 2.952963862558141e-05, "loss": 0.7025, "step": 1628 }, { "epoch": 0.34871959540820424, "grad_norm": 0.20551158106453798, "learning_rate": 2.9517689892365663e-05, "loss": 0.7293, "step": 1629 }, { "epoch": 0.3489336651414198, "grad_norm": 0.19655578191389472, "learning_rate": 2.9505736765801677e-05, "loss": 0.7518, "step": 1630 }, { "epoch": 0.3491477348746354, "grad_norm": 0.2170350384603782, "learning_rate": 2.9493779251407003e-05, "loss": 0.7515, "step": 1631 }, { "epoch": 0.349361804607851, "grad_norm": 0.1949058103607534, "learning_rate": 2.9481817354701206e-05, "loss": 0.7222, "step": 1632 }, { "epoch": 0.34957587434106663, "grad_norm": 0.309134693217062, "learning_rate": 2.9469851081205875e-05, "loss": 0.7385, "step": 1633 }, { "epoch": 0.3497899440742822, "grad_norm": 0.20833630459963806, "learning_rate": 2.945788043644462e-05, "loss": 0.6965, "step": 1634 }, { "epoch": 0.3500040138074978, "grad_norm": 0.21075423947370012, "learning_rate": 2.944590542594307e-05, "loss": 0.7187, "step": 1635 }, { "epoch": 0.3502180835407134, "grad_norm": 0.2167490432101183, "learning_rate": 2.9433926055228866e-05, "loss": 0.7482, "step": 1636 }, { "epoch": 0.35043215327392896, "grad_norm": 0.20541229872761796, "learning_rate": 2.942194232983166e-05, "loss": 0.749, "step": 1637 }, { "epoch": 0.3506462230071446, "grad_norm": 0.297135409790418, "learning_rate": 2.9409954255283132e-05, "loss": 0.7295, "step": 1638 }, { "epoch": 0.3508602927403602, "grad_norm": 0.2387083440534443, "learning_rate": 2.9397961837116935e-05, "loss": 0.7411, "step": 1639 }, { "epoch": 0.35107436247357576, "grad_norm": 0.20506889266125755, "learning_rate": 2.9385965080868763e-05, "loss": 0.7268, "step": 1640 }, { "epoch": 0.35128843220679135, "grad_norm": 0.20896069342101464, "learning_rate": 2.937396399207629e-05, "loss": 0.7115, "step": 1641 }, { "epoch": 0.351502501940007, "grad_norm": 0.22158110604347978, "learning_rate": 2.9361958576279197e-05, "loss": 0.7267, "step": 1642 }, { "epoch": 0.35171657167322257, "grad_norm": 0.21709748064823825, "learning_rate": 2.9349948839019165e-05, "loss": 0.7357, "step": 1643 }, { "epoch": 0.35193064140643815, "grad_norm": 0.21889904612208433, "learning_rate": 2.9337934785839864e-05, "loss": 0.7262, "step": 1644 }, { "epoch": 0.35214471113965373, "grad_norm": 0.1983177751255727, "learning_rate": 2.932591642228696e-05, "loss": 0.7054, "step": 1645 }, { "epoch": 0.3523587808728693, "grad_norm": 0.3684852676030669, "learning_rate": 2.9313893753908114e-05, "loss": 0.6861, "step": 1646 }, { "epoch": 0.35257285060608495, "grad_norm": 0.21530192211391289, "learning_rate": 2.930186678625295e-05, "loss": 0.7755, "step": 1647 }, { "epoch": 0.35278692033930054, "grad_norm": 0.19819752885760447, "learning_rate": 2.9289835524873108e-05, "loss": 0.6966, "step": 1648 }, { "epoch": 0.3530009900725161, "grad_norm": 0.2284344604462054, "learning_rate": 2.92777999753222e-05, "loss": 0.712, "step": 1649 }, { "epoch": 0.3532150598057317, "grad_norm": 0.22478725630612018, "learning_rate": 2.92657601431558e-05, "loss": 0.7364, "step": 1650 }, { "epoch": 0.35342912953894734, "grad_norm": 0.22149960765616844, "learning_rate": 2.9253716033931484e-05, "loss": 0.7221, "step": 1651 }, { "epoch": 0.3536431992721629, "grad_norm": 0.22881058246798758, "learning_rate": 2.924166765320878e-05, "loss": 0.7249, "step": 1652 }, { "epoch": 0.3538572690053785, "grad_norm": 0.2193895470686908, "learning_rate": 2.9229615006549208e-05, "loss": 0.719, "step": 1653 }, { "epoch": 0.3540713387385941, "grad_norm": 0.22011323073956504, "learning_rate": 2.9217558099516242e-05, "loss": 0.7155, "step": 1654 }, { "epoch": 0.35428540847180967, "grad_norm": 0.22693360239190205, "learning_rate": 2.9205496937675338e-05, "loss": 0.7307, "step": 1655 }, { "epoch": 0.3544994782050253, "grad_norm": 0.19702516441339143, "learning_rate": 2.9193431526593894e-05, "loss": 0.7205, "step": 1656 }, { "epoch": 0.3547135479382409, "grad_norm": 0.2013038172795989, "learning_rate": 2.918136187184129e-05, "loss": 0.7213, "step": 1657 }, { "epoch": 0.3549276176714565, "grad_norm": 0.1996526512490232, "learning_rate": 2.9169287978988846e-05, "loss": 0.7269, "step": 1658 }, { "epoch": 0.35514168740467206, "grad_norm": 0.2165464999925143, "learning_rate": 2.9157209853609864e-05, "loss": 0.7432, "step": 1659 }, { "epoch": 0.35535575713788764, "grad_norm": 0.21293323524833144, "learning_rate": 2.914512750127957e-05, "loss": 0.765, "step": 1660 }, { "epoch": 0.3555698268711033, "grad_norm": 0.21354026133827708, "learning_rate": 2.9133040927575165e-05, "loss": 0.7256, "step": 1661 }, { "epoch": 0.35578389660431886, "grad_norm": 0.1964840334247862, "learning_rate": 2.912095013807579e-05, "loss": 0.7121, "step": 1662 }, { "epoch": 0.35599796633753444, "grad_norm": 0.19786554348614302, "learning_rate": 2.910885513836252e-05, "loss": 0.7447, "step": 1663 }, { "epoch": 0.35621203607075, "grad_norm": 0.20852131506720853, "learning_rate": 2.90967559340184e-05, "loss": 0.7022, "step": 1664 }, { "epoch": 0.35642610580396566, "grad_norm": 0.19562913488732922, "learning_rate": 2.908465253062839e-05, "loss": 0.7346, "step": 1665 }, { "epoch": 0.35664017553718125, "grad_norm": 0.2032286542306735, "learning_rate": 2.90725449337794e-05, "loss": 0.7325, "step": 1666 }, { "epoch": 0.35685424527039683, "grad_norm": 0.2053471624657201, "learning_rate": 2.906043314906028e-05, "loss": 0.7423, "step": 1667 }, { "epoch": 0.3570683150036124, "grad_norm": 0.19943603861771186, "learning_rate": 2.9048317182061808e-05, "loss": 0.7584, "step": 1668 }, { "epoch": 0.357282384736828, "grad_norm": 0.20361802831648107, "learning_rate": 2.9036197038376674e-05, "loss": 0.7357, "step": 1669 }, { "epoch": 0.35749645447004363, "grad_norm": 0.18808887274718247, "learning_rate": 2.902407272359954e-05, "loss": 0.734, "step": 1670 }, { "epoch": 0.3577105242032592, "grad_norm": 0.2010547899926018, "learning_rate": 2.9011944243326958e-05, "loss": 0.7265, "step": 1671 }, { "epoch": 0.3579245939364748, "grad_norm": 0.1917256441174958, "learning_rate": 2.8999811603157403e-05, "loss": 0.716, "step": 1672 }, { "epoch": 0.3581386636696904, "grad_norm": 0.19431126645573318, "learning_rate": 2.8987674808691292e-05, "loss": 0.6921, "step": 1673 }, { "epoch": 0.358352733402906, "grad_norm": 0.19781977259116396, "learning_rate": 2.8975533865530935e-05, "loss": 0.7569, "step": 1674 }, { "epoch": 0.3585668031361216, "grad_norm": 0.18966150573569404, "learning_rate": 2.8963388779280583e-05, "loss": 0.6993, "step": 1675 }, { "epoch": 0.3587808728693372, "grad_norm": 0.2441647082523994, "learning_rate": 2.8951239555546377e-05, "loss": 0.7253, "step": 1676 }, { "epoch": 0.35899494260255277, "grad_norm": 0.18954802346774788, "learning_rate": 2.893908619993637e-05, "loss": 0.7335, "step": 1677 }, { "epoch": 0.35920901233576835, "grad_norm": 0.20300394226628365, "learning_rate": 2.892692871806055e-05, "loss": 0.7149, "step": 1678 }, { "epoch": 0.359423082068984, "grad_norm": 0.18951727794135695, "learning_rate": 2.891476711553077e-05, "loss": 0.717, "step": 1679 }, { "epoch": 0.3596371518021996, "grad_norm": 0.24248952830112777, "learning_rate": 2.8902601397960805e-05, "loss": 0.7269, "step": 1680 }, { "epoch": 0.35985122153541516, "grad_norm": 0.2191733197768651, "learning_rate": 2.8890431570966335e-05, "loss": 0.6912, "step": 1681 }, { "epoch": 0.36006529126863074, "grad_norm": 0.23506541996096175, "learning_rate": 2.8878257640164923e-05, "loss": 0.7096, "step": 1682 }, { "epoch": 0.3602793610018464, "grad_norm": 0.1998321006566307, "learning_rate": 2.886607961117604e-05, "loss": 0.7373, "step": 1683 }, { "epoch": 0.36049343073506196, "grad_norm": 0.2265943150454131, "learning_rate": 2.8853897489621036e-05, "loss": 0.6807, "step": 1684 }, { "epoch": 0.36070750046827754, "grad_norm": 0.22358646815462654, "learning_rate": 2.8841711281123163e-05, "loss": 0.6968, "step": 1685 }, { "epoch": 0.3609215702014931, "grad_norm": 0.20582772794801782, "learning_rate": 2.8829520991307544e-05, "loss": 0.731, "step": 1686 }, { "epoch": 0.3611356399347087, "grad_norm": 0.2887696199346498, "learning_rate": 2.8817326625801203e-05, "loss": 0.7482, "step": 1687 }, { "epoch": 0.36134970966792435, "grad_norm": 0.23325142534643833, "learning_rate": 2.8805128190233032e-05, "loss": 0.7334, "step": 1688 }, { "epoch": 0.36156377940113993, "grad_norm": 0.1770642884634449, "learning_rate": 2.87929256902338e-05, "loss": 0.7361, "step": 1689 }, { "epoch": 0.3617778491343555, "grad_norm": 0.2368596549079858, "learning_rate": 2.8780719131436168e-05, "loss": 0.751, "step": 1690 }, { "epoch": 0.3619919188675711, "grad_norm": 0.1739362619897583, "learning_rate": 2.8768508519474664e-05, "loss": 0.7077, "step": 1691 }, { "epoch": 0.36220598860078673, "grad_norm": 0.2147919831310469, "learning_rate": 2.8756293859985675e-05, "loss": 0.7318, "step": 1692 }, { "epoch": 0.3624200583340023, "grad_norm": 0.19748325715869405, "learning_rate": 2.8744075158607468e-05, "loss": 0.7446, "step": 1693 }, { "epoch": 0.3626341280672179, "grad_norm": 0.2016693979744296, "learning_rate": 2.8731852420980176e-05, "loss": 0.7346, "step": 1694 }, { "epoch": 0.3628481978004335, "grad_norm": 0.22805234274826633, "learning_rate": 2.871962565274579e-05, "loss": 0.7401, "step": 1695 }, { "epoch": 0.36306226753364906, "grad_norm": 0.21347228887531122, "learning_rate": 2.8707394859548167e-05, "loss": 0.7319, "step": 1696 }, { "epoch": 0.3632763372668647, "grad_norm": 0.20867067071041048, "learning_rate": 2.8695160047033012e-05, "loss": 0.7381, "step": 1697 }, { "epoch": 0.3634904070000803, "grad_norm": 0.19631814144259052, "learning_rate": 2.86829212208479e-05, "loss": 0.7204, "step": 1698 }, { "epoch": 0.36370447673329587, "grad_norm": 0.19467251504422736, "learning_rate": 2.8670678386642246e-05, "loss": 0.7196, "step": 1699 }, { "epoch": 0.36391854646651145, "grad_norm": 0.21482620723338536, "learning_rate": 2.8658431550067317e-05, "loss": 0.7474, "step": 1700 }, { "epoch": 0.3641326161997271, "grad_norm": 0.4260820557629357, "learning_rate": 2.8646180716776243e-05, "loss": 0.6992, "step": 1701 }, { "epoch": 0.36434668593294267, "grad_norm": 0.1942291886013534, "learning_rate": 2.863392589242397e-05, "loss": 0.7021, "step": 1702 }, { "epoch": 0.36456075566615825, "grad_norm": 0.20489161062821937, "learning_rate": 2.8621667082667316e-05, "loss": 0.7172, "step": 1703 }, { "epoch": 0.36477482539937384, "grad_norm": 0.19535321269181533, "learning_rate": 2.860940429316491e-05, "loss": 0.7432, "step": 1704 }, { "epoch": 0.3649888951325894, "grad_norm": 0.23216416966189832, "learning_rate": 2.859713752957725e-05, "loss": 0.7035, "step": 1705 }, { "epoch": 0.36520296486580506, "grad_norm": 0.20817838367972874, "learning_rate": 2.8584866797566645e-05, "loss": 0.7075, "step": 1706 }, { "epoch": 0.36541703459902064, "grad_norm": 0.20055889426234758, "learning_rate": 2.857259210279724e-05, "loss": 0.6914, "step": 1707 }, { "epoch": 0.3656311043322362, "grad_norm": 0.2286761813381872, "learning_rate": 2.8560313450935012e-05, "loss": 0.7321, "step": 1708 }, { "epoch": 0.3658451740654518, "grad_norm": 0.2041313674388944, "learning_rate": 2.854803084764777e-05, "loss": 0.7244, "step": 1709 }, { "epoch": 0.3660592437986674, "grad_norm": 0.1984995984196707, "learning_rate": 2.8535744298605127e-05, "loss": 0.7113, "step": 1710 }, { "epoch": 0.366273313531883, "grad_norm": 0.20423034432358758, "learning_rate": 2.8523453809478546e-05, "loss": 0.7375, "step": 1711 }, { "epoch": 0.3664873832650986, "grad_norm": 0.20856433063544874, "learning_rate": 2.851115938594129e-05, "loss": 0.7415, "step": 1712 }, { "epoch": 0.3667014529983142, "grad_norm": 0.18630271956272798, "learning_rate": 2.8498861033668444e-05, "loss": 0.7234, "step": 1713 }, { "epoch": 0.3669155227315298, "grad_norm": 0.21652364829720752, "learning_rate": 2.8486558758336896e-05, "loss": 0.6767, "step": 1714 }, { "epoch": 0.3671295924647454, "grad_norm": 0.18257187565594565, "learning_rate": 2.8474252565625368e-05, "loss": 0.7028, "step": 1715 }, { "epoch": 0.367343662197961, "grad_norm": 0.22788704890813255, "learning_rate": 2.846194246121436e-05, "loss": 0.7308, "step": 1716 }, { "epoch": 0.3675577319311766, "grad_norm": 0.1904359809868788, "learning_rate": 2.8449628450786207e-05, "loss": 0.7392, "step": 1717 }, { "epoch": 0.36777180166439216, "grad_norm": 0.20934717695243124, "learning_rate": 2.8437310540025033e-05, "loss": 0.7342, "step": 1718 }, { "epoch": 0.36798587139760774, "grad_norm": 0.18721907363554224, "learning_rate": 2.8424988734616747e-05, "loss": 0.7051, "step": 1719 }, { "epoch": 0.3681999411308234, "grad_norm": 0.20690251888753228, "learning_rate": 2.8412663040249097e-05, "loss": 0.6912, "step": 1720 }, { "epoch": 0.36841401086403897, "grad_norm": 0.19376329881339552, "learning_rate": 2.8400333462611578e-05, "loss": 0.7309, "step": 1721 }, { "epoch": 0.36862808059725455, "grad_norm": 0.20675741407692566, "learning_rate": 2.8388000007395512e-05, "loss": 0.7345, "step": 1722 }, { "epoch": 0.36884215033047013, "grad_norm": 0.20526333726628604, "learning_rate": 2.8375662680294e-05, "loss": 0.7425, "step": 1723 }, { "epoch": 0.36905622006368577, "grad_norm": 0.18939405189693614, "learning_rate": 2.836332148700193e-05, "loss": 0.714, "step": 1724 }, { "epoch": 0.36927028979690135, "grad_norm": 0.2257731688992446, "learning_rate": 2.8350976433215964e-05, "loss": 0.7305, "step": 1725 }, { "epoch": 0.36948435953011693, "grad_norm": 0.19399977270023452, "learning_rate": 2.8338627524634566e-05, "loss": 0.7, "step": 1726 }, { "epoch": 0.3696984292633325, "grad_norm": 0.22325392542928493, "learning_rate": 2.832627476695797e-05, "loss": 0.716, "step": 1727 }, { "epoch": 0.3699124989965481, "grad_norm": 0.2087277878166251, "learning_rate": 2.831391816588818e-05, "loss": 0.7319, "step": 1728 }, { "epoch": 0.37012656872976374, "grad_norm": 0.2263158186396704, "learning_rate": 2.830155772712899e-05, "loss": 0.7027, "step": 1729 }, { "epoch": 0.3703406384629793, "grad_norm": 0.19873845730486467, "learning_rate": 2.8289193456385944e-05, "loss": 0.7292, "step": 1730 }, { "epoch": 0.3705547081961949, "grad_norm": 0.20724591722087535, "learning_rate": 2.8276825359366374e-05, "loss": 0.7352, "step": 1731 }, { "epoch": 0.3707687779294105, "grad_norm": 0.2060158701752789, "learning_rate": 2.8264453441779366e-05, "loss": 0.7155, "step": 1732 }, { "epoch": 0.3709828476626261, "grad_norm": 0.2115945468043021, "learning_rate": 2.8252077709335782e-05, "loss": 0.7258, "step": 1733 }, { "epoch": 0.3711969173958417, "grad_norm": 0.22622279916089783, "learning_rate": 2.8239698167748232e-05, "loss": 0.7567, "step": 1734 }, { "epoch": 0.3714109871290573, "grad_norm": 0.22605142673964448, "learning_rate": 2.8227314822731092e-05, "loss": 0.7154, "step": 1735 }, { "epoch": 0.3716250568622729, "grad_norm": 0.19441628980835582, "learning_rate": 2.8214927680000493e-05, "loss": 0.6928, "step": 1736 }, { "epoch": 0.37183912659548846, "grad_norm": 0.23110417463174424, "learning_rate": 2.8202536745274307e-05, "loss": 0.7018, "step": 1737 }, { "epoch": 0.3720531963287041, "grad_norm": 0.21228303132353327, "learning_rate": 2.819014202427218e-05, "loss": 0.7367, "step": 1738 }, { "epoch": 0.3722672660619197, "grad_norm": 0.2191197843320111, "learning_rate": 2.817774352271549e-05, "loss": 0.721, "step": 1739 }, { "epoch": 0.37248133579513526, "grad_norm": 0.20734714732781498, "learning_rate": 2.8165341246327357e-05, "loss": 0.7023, "step": 1740 }, { "epoch": 0.37269540552835084, "grad_norm": 0.21337594074927818, "learning_rate": 2.8152935200832652e-05, "loss": 0.6865, "step": 1741 }, { "epoch": 0.3729094752615665, "grad_norm": 0.20082516749197782, "learning_rate": 2.814052539195798e-05, "loss": 0.7101, "step": 1742 }, { "epoch": 0.37312354499478206, "grad_norm": 0.22526597451442446, "learning_rate": 2.8128111825431692e-05, "loss": 0.7362, "step": 1743 }, { "epoch": 0.37333761472799765, "grad_norm": 0.19957108929175169, "learning_rate": 2.811569450698387e-05, "loss": 0.7067, "step": 1744 }, { "epoch": 0.37355168446121323, "grad_norm": 0.19593577994852204, "learning_rate": 2.8103273442346313e-05, "loss": 0.7073, "step": 1745 }, { "epoch": 0.3737657541944288, "grad_norm": 0.20281897215685657, "learning_rate": 2.8090848637252566e-05, "loss": 0.7202, "step": 1746 }, { "epoch": 0.37397982392764445, "grad_norm": 0.2060240234356956, "learning_rate": 2.80784200974379e-05, "loss": 0.7285, "step": 1747 }, { "epoch": 0.37419389366086003, "grad_norm": 0.2080764958143944, "learning_rate": 2.8065987828639308e-05, "loss": 0.7067, "step": 1748 }, { "epoch": 0.3744079633940756, "grad_norm": 0.20306795500453428, "learning_rate": 2.80535518365955e-05, "loss": 0.7256, "step": 1749 }, { "epoch": 0.3746220331272912, "grad_norm": 0.22562894892026664, "learning_rate": 2.8041112127046907e-05, "loss": 0.721, "step": 1750 }, { "epoch": 0.37483610286050684, "grad_norm": 0.19540902006214536, "learning_rate": 2.802866870573568e-05, "loss": 0.7279, "step": 1751 }, { "epoch": 0.3750501725937224, "grad_norm": 0.21536280890969858, "learning_rate": 2.8016221578405666e-05, "loss": 0.7482, "step": 1752 }, { "epoch": 0.375264242326938, "grad_norm": 0.2185950724307106, "learning_rate": 2.800377075080245e-05, "loss": 0.7703, "step": 1753 }, { "epoch": 0.3754783120601536, "grad_norm": 0.5483645678017114, "learning_rate": 2.799131622867331e-05, "loss": 0.7735, "step": 1754 }, { "epoch": 0.37569238179336917, "grad_norm": 0.23077590439820195, "learning_rate": 2.7978858017767227e-05, "loss": 0.7096, "step": 1755 }, { "epoch": 0.3759064515265848, "grad_norm": 0.21960201264980742, "learning_rate": 2.7966396123834885e-05, "loss": 0.7505, "step": 1756 }, { "epoch": 0.3761205212598004, "grad_norm": 0.21333600647268042, "learning_rate": 2.795393055262867e-05, "loss": 0.7367, "step": 1757 }, { "epoch": 0.37633459099301597, "grad_norm": 0.22219898638156713, "learning_rate": 2.794146130990268e-05, "loss": 0.7608, "step": 1758 }, { "epoch": 0.37654866072623155, "grad_norm": 0.19978754112008906, "learning_rate": 2.792898840141269e-05, "loss": 0.7265, "step": 1759 }, { "epoch": 0.3767627304594472, "grad_norm": 0.2505029699203242, "learning_rate": 2.7916511832916167e-05, "loss": 0.7155, "step": 1760 }, { "epoch": 0.3769768001926628, "grad_norm": 0.2106146592638751, "learning_rate": 2.790403161017227e-05, "loss": 0.7496, "step": 1761 }, { "epoch": 0.37719086992587836, "grad_norm": 0.22070540942901945, "learning_rate": 2.7891547738941847e-05, "loss": 0.7108, "step": 1762 }, { "epoch": 0.37740493965909394, "grad_norm": 0.22309957067127134, "learning_rate": 2.787906022498744e-05, "loss": 0.7095, "step": 1763 }, { "epoch": 0.3776190093923095, "grad_norm": 0.20847463681053063, "learning_rate": 2.7866569074073252e-05, "loss": 0.7152, "step": 1764 }, { "epoch": 0.37783307912552516, "grad_norm": 0.23237175509921956, "learning_rate": 2.7854074291965183e-05, "loss": 0.7183, "step": 1765 }, { "epoch": 0.37804714885874074, "grad_norm": 0.23090753195439292, "learning_rate": 2.78415758844308e-05, "loss": 0.7201, "step": 1766 }, { "epoch": 0.3782612185919563, "grad_norm": 0.19822757111781839, "learning_rate": 2.7829073857239342e-05, "loss": 0.7269, "step": 1767 }, { "epoch": 0.3784752883251719, "grad_norm": 0.21266903325817152, "learning_rate": 2.7816568216161717e-05, "loss": 0.7237, "step": 1768 }, { "epoch": 0.3786893580583875, "grad_norm": 0.21239506175053335, "learning_rate": 2.780405896697052e-05, "loss": 0.7382, "step": 1769 }, { "epoch": 0.37890342779160313, "grad_norm": 0.1990996390759198, "learning_rate": 2.7791546115439988e-05, "loss": 0.6949, "step": 1770 }, { "epoch": 0.3791174975248187, "grad_norm": 0.22595916033927044, "learning_rate": 2.7779029667346033e-05, "loss": 0.7287, "step": 1771 }, { "epoch": 0.3793315672580343, "grad_norm": 0.19218409645659082, "learning_rate": 2.7766509628466223e-05, "loss": 0.7207, "step": 1772 }, { "epoch": 0.3795456369912499, "grad_norm": 0.21488969989074852, "learning_rate": 2.7753986004579786e-05, "loss": 0.6924, "step": 1773 }, { "epoch": 0.3797597067244655, "grad_norm": 0.1999844006155712, "learning_rate": 2.77414588014676e-05, "loss": 0.735, "step": 1774 }, { "epoch": 0.3799737764576811, "grad_norm": 0.19940210277813755, "learning_rate": 2.7728928024912206e-05, "loss": 0.7231, "step": 1775 }, { "epoch": 0.3801878461908967, "grad_norm": 0.23614070028648362, "learning_rate": 2.771639368069778e-05, "loss": 0.7253, "step": 1776 }, { "epoch": 0.38040191592411227, "grad_norm": 0.2177230403996155, "learning_rate": 2.770385577461016e-05, "loss": 0.6919, "step": 1777 }, { "epoch": 0.38061598565732785, "grad_norm": 0.21134464251846, "learning_rate": 2.7691314312436815e-05, "loss": 0.7054, "step": 1778 }, { "epoch": 0.3808300553905435, "grad_norm": 0.23083480554672203, "learning_rate": 2.7678769299966864e-05, "loss": 0.7146, "step": 1779 }, { "epoch": 0.38104412512375907, "grad_norm": 0.2216722698933766, "learning_rate": 2.766622074299106e-05, "loss": 0.7199, "step": 1780 }, { "epoch": 0.38125819485697465, "grad_norm": 0.2239407026522653, "learning_rate": 2.7653668647301797e-05, "loss": 0.7164, "step": 1781 }, { "epoch": 0.38147226459019024, "grad_norm": 0.2359209677936568, "learning_rate": 2.76411130186931e-05, "loss": 0.737, "step": 1782 }, { "epoch": 0.3816863343234059, "grad_norm": 0.20231105289973436, "learning_rate": 2.7628553862960616e-05, "loss": 0.7395, "step": 1783 }, { "epoch": 0.38190040405662146, "grad_norm": 0.21307073531594614, "learning_rate": 2.761599118590163e-05, "loss": 0.7417, "step": 1784 }, { "epoch": 0.38211447378983704, "grad_norm": 0.2293381408754453, "learning_rate": 2.760342499331506e-05, "loss": 0.7273, "step": 1785 }, { "epoch": 0.3823285435230526, "grad_norm": 0.19091102829907344, "learning_rate": 2.759085529100143e-05, "loss": 0.7396, "step": 1786 }, { "epoch": 0.3825426132562682, "grad_norm": 0.20829281955244247, "learning_rate": 2.7578282084762893e-05, "loss": 0.7144, "step": 1787 }, { "epoch": 0.38275668298948384, "grad_norm": 0.18575514934808443, "learning_rate": 2.7565705380403218e-05, "loss": 0.723, "step": 1788 }, { "epoch": 0.3829707527226994, "grad_norm": 0.2018641164754752, "learning_rate": 2.7553125183727786e-05, "loss": 0.7005, "step": 1789 }, { "epoch": 0.383184822455915, "grad_norm": 0.19634296099432155, "learning_rate": 2.7540541500543604e-05, "loss": 0.7173, "step": 1790 }, { "epoch": 0.3833988921891306, "grad_norm": 0.18750371160676454, "learning_rate": 2.7527954336659264e-05, "loss": 0.7109, "step": 1791 }, { "epoch": 0.38361296192234623, "grad_norm": 0.20062052499594968, "learning_rate": 2.7515363697884983e-05, "loss": 0.7237, "step": 1792 }, { "epoch": 0.3838270316555618, "grad_norm": 0.18753502787433712, "learning_rate": 2.750276959003258e-05, "loss": 0.6676, "step": 1793 }, { "epoch": 0.3840411013887774, "grad_norm": 0.22289732741582527, "learning_rate": 2.7490172018915462e-05, "loss": 0.7171, "step": 1794 }, { "epoch": 0.384255171121993, "grad_norm": 0.19399632465467423, "learning_rate": 2.747757099034865e-05, "loss": 0.74, "step": 1795 }, { "epoch": 0.38446924085520856, "grad_norm": 0.2332560239406799, "learning_rate": 2.7464966510148766e-05, "loss": 0.7242, "step": 1796 }, { "epoch": 0.3846833105884242, "grad_norm": 0.2036769492361591, "learning_rate": 2.7452358584134e-05, "loss": 0.6991, "step": 1797 }, { "epoch": 0.3848973803216398, "grad_norm": 0.20329991688548135, "learning_rate": 2.7439747218124156e-05, "loss": 0.7407, "step": 1798 }, { "epoch": 0.38511145005485536, "grad_norm": 0.24852491612215835, "learning_rate": 2.7427132417940606e-05, "loss": 0.7247, "step": 1799 }, { "epoch": 0.38532551978807095, "grad_norm": 0.20990508651913883, "learning_rate": 2.741451418940634e-05, "loss": 0.695, "step": 1800 }, { "epoch": 0.3855395895212866, "grad_norm": 0.2165520777558981, "learning_rate": 2.7401892538345895e-05, "loss": 0.7115, "step": 1801 }, { "epoch": 0.38575365925450217, "grad_norm": 0.3442702245739841, "learning_rate": 2.73892674705854e-05, "loss": 0.7041, "step": 1802 }, { "epoch": 0.38596772898771775, "grad_norm": 0.20240417308796424, "learning_rate": 2.7376638991952565e-05, "loss": 0.6835, "step": 1803 }, { "epoch": 0.38618179872093333, "grad_norm": 0.23248374727170049, "learning_rate": 2.7364007108276682e-05, "loss": 0.7169, "step": 1804 }, { "epoch": 0.3863958684541489, "grad_norm": 0.2273561424825165, "learning_rate": 2.7351371825388597e-05, "loss": 0.7272, "step": 1805 }, { "epoch": 0.38660993818736455, "grad_norm": 0.1968252023241286, "learning_rate": 2.7338733149120726e-05, "loss": 0.74, "step": 1806 }, { "epoch": 0.38682400792058014, "grad_norm": 0.21363516693293966, "learning_rate": 2.7326091085307078e-05, "loss": 0.7105, "step": 1807 }, { "epoch": 0.3870380776537957, "grad_norm": 0.20097880326939904, "learning_rate": 2.7313445639783194e-05, "loss": 0.7179, "step": 1808 }, { "epoch": 0.3872521473870113, "grad_norm": 0.2989022241395946, "learning_rate": 2.7300796818386185e-05, "loss": 0.7153, "step": 1809 }, { "epoch": 0.38746621712022694, "grad_norm": 0.24756322170147138, "learning_rate": 2.728814462695473e-05, "loss": 0.7492, "step": 1810 }, { "epoch": 0.3876802868534425, "grad_norm": 0.18832672298797135, "learning_rate": 2.7275489071329065e-05, "loss": 0.7232, "step": 1811 }, { "epoch": 0.3878943565866581, "grad_norm": 0.22166867819908395, "learning_rate": 2.7262830157350957e-05, "loss": 0.7398, "step": 1812 }, { "epoch": 0.3881084263198737, "grad_norm": 0.19895813698935308, "learning_rate": 2.7250167890863743e-05, "loss": 0.7091, "step": 1813 }, { "epoch": 0.38832249605308927, "grad_norm": 0.2029446516468001, "learning_rate": 2.7237502277712305e-05, "loss": 0.7358, "step": 1814 }, { "epoch": 0.3885365657863049, "grad_norm": 0.2181862871392029, "learning_rate": 2.7224833323743064e-05, "loss": 0.7227, "step": 1815 }, { "epoch": 0.3887506355195205, "grad_norm": 0.2008808770570698, "learning_rate": 2.7212161034803977e-05, "loss": 0.706, "step": 1816 }, { "epoch": 0.3889647052527361, "grad_norm": 0.22061140212032707, "learning_rate": 2.7199485416744572e-05, "loss": 0.7062, "step": 1817 }, { "epoch": 0.38917877498595166, "grad_norm": 0.2038919546462918, "learning_rate": 2.718680647541587e-05, "loss": 0.7384, "step": 1818 }, { "epoch": 0.38939284471916724, "grad_norm": 0.19984869469755115, "learning_rate": 2.7174124216670462e-05, "loss": 0.7055, "step": 1819 }, { "epoch": 0.3896069144523829, "grad_norm": 0.19106707428113026, "learning_rate": 2.7161438646362444e-05, "loss": 0.6978, "step": 1820 }, { "epoch": 0.38982098418559846, "grad_norm": 0.20034773939138448, "learning_rate": 2.7148749770347453e-05, "loss": 0.7443, "step": 1821 }, { "epoch": 0.39003505391881405, "grad_norm": 0.20258415362901117, "learning_rate": 2.7136057594482656e-05, "loss": 0.7231, "step": 1822 }, { "epoch": 0.39024912365202963, "grad_norm": 0.2002972710807168, "learning_rate": 2.712336212462674e-05, "loss": 0.7508, "step": 1823 }, { "epoch": 0.39046319338524527, "grad_norm": 0.1887666338775884, "learning_rate": 2.711066336663991e-05, "loss": 0.711, "step": 1824 }, { "epoch": 0.39067726311846085, "grad_norm": 0.20610052214749, "learning_rate": 2.709796132638388e-05, "loss": 0.716, "step": 1825 }, { "epoch": 0.39089133285167643, "grad_norm": 0.18823667581409867, "learning_rate": 2.7085256009721895e-05, "loss": 0.7443, "step": 1826 }, { "epoch": 0.391105402584892, "grad_norm": 0.19858919507760137, "learning_rate": 2.7072547422518707e-05, "loss": 0.7378, "step": 1827 }, { "epoch": 0.3913194723181076, "grad_norm": 0.19339527041080595, "learning_rate": 2.705983557064058e-05, "loss": 0.7071, "step": 1828 }, { "epoch": 0.39153354205132324, "grad_norm": 0.19422095290511043, "learning_rate": 2.7047120459955274e-05, "loss": 0.7014, "step": 1829 }, { "epoch": 0.3917476117845388, "grad_norm": 0.20380937267452157, "learning_rate": 2.7034402096332063e-05, "loss": 0.7242, "step": 1830 }, { "epoch": 0.3919616815177544, "grad_norm": 0.18695351718613296, "learning_rate": 2.702168048564172e-05, "loss": 0.7121, "step": 1831 }, { "epoch": 0.39217575125097, "grad_norm": 0.19470267005279956, "learning_rate": 2.700895563375652e-05, "loss": 0.7166, "step": 1832 }, { "epoch": 0.3923898209841856, "grad_norm": 0.19864949644991833, "learning_rate": 2.699622754655023e-05, "loss": 0.7385, "step": 1833 }, { "epoch": 0.3926038907174012, "grad_norm": 0.3509291084831578, "learning_rate": 2.6983496229898114e-05, "loss": 0.7207, "step": 1834 }, { "epoch": 0.3928179604506168, "grad_norm": 0.2052961511773908, "learning_rate": 2.6970761689676922e-05, "loss": 0.7172, "step": 1835 }, { "epoch": 0.39303203018383237, "grad_norm": 0.20756454923945808, "learning_rate": 2.695802393176489e-05, "loss": 0.7318, "step": 1836 }, { "epoch": 0.39324609991704795, "grad_norm": 0.19990765561099857, "learning_rate": 2.6945282962041748e-05, "loss": 0.7331, "step": 1837 }, { "epoch": 0.3934601696502636, "grad_norm": 0.20308071953283982, "learning_rate": 2.6932538786388706e-05, "loss": 0.7546, "step": 1838 }, { "epoch": 0.3936742393834792, "grad_norm": 0.19257776403184904, "learning_rate": 2.6919791410688456e-05, "loss": 0.7424, "step": 1839 }, { "epoch": 0.39388830911669476, "grad_norm": 0.3725315128511502, "learning_rate": 2.6907040840825156e-05, "loss": 0.7312, "step": 1840 }, { "epoch": 0.39410237884991034, "grad_norm": 0.21448512794040087, "learning_rate": 2.689428708268444e-05, "loss": 0.7174, "step": 1841 }, { "epoch": 0.394316448583126, "grad_norm": 0.18667893597869978, "learning_rate": 2.6881530142153435e-05, "loss": 0.7325, "step": 1842 }, { "epoch": 0.39453051831634156, "grad_norm": 0.20516658000882992, "learning_rate": 2.686877002512071e-05, "loss": 0.7073, "step": 1843 }, { "epoch": 0.39474458804955714, "grad_norm": 0.1885414212304079, "learning_rate": 2.685600673747631e-05, "loss": 0.7217, "step": 1844 }, { "epoch": 0.3949586577827727, "grad_norm": 0.20023230636099884, "learning_rate": 2.684324028511176e-05, "loss": 0.707, "step": 1845 }, { "epoch": 0.3951727275159883, "grad_norm": 0.2059285593374697, "learning_rate": 2.683047067392002e-05, "loss": 0.75, "step": 1846 }, { "epoch": 0.39538679724920395, "grad_norm": 0.18334200201512382, "learning_rate": 2.6817697909795515e-05, "loss": 0.6988, "step": 1847 }, { "epoch": 0.39560086698241953, "grad_norm": 0.2156911167146091, "learning_rate": 2.680492199863414e-05, "loss": 0.7085, "step": 1848 }, { "epoch": 0.3958149367156351, "grad_norm": 0.19017938237739312, "learning_rate": 2.6792142946333227e-05, "loss": 0.707, "step": 1849 }, { "epoch": 0.3960290064488507, "grad_norm": 0.19714275913947663, "learning_rate": 2.6779360758791562e-05, "loss": 0.7341, "step": 1850 }, { "epoch": 0.39624307618206633, "grad_norm": 0.20087478676998408, "learning_rate": 2.6766575441909385e-05, "loss": 0.7097, "step": 1851 }, { "epoch": 0.3964571459152819, "grad_norm": 0.20793008326447115, "learning_rate": 2.6753787001588362e-05, "loss": 0.7248, "step": 1852 }, { "epoch": 0.3966712156484975, "grad_norm": 0.6508073669110191, "learning_rate": 2.6740995443731633e-05, "loss": 0.7027, "step": 1853 }, { "epoch": 0.3968852853817131, "grad_norm": 0.19346323481400934, "learning_rate": 2.6728200774243743e-05, "loss": 0.7196, "step": 1854 }, { "epoch": 0.39709935511492866, "grad_norm": 0.2159766043372786, "learning_rate": 2.671540299903069e-05, "loss": 0.7408, "step": 1855 }, { "epoch": 0.3973134248481443, "grad_norm": 0.19117659085865799, "learning_rate": 2.670260212399991e-05, "loss": 0.7003, "step": 1856 }, { "epoch": 0.3975274945813599, "grad_norm": 0.21387736853704956, "learning_rate": 2.6689798155060255e-05, "loss": 0.7206, "step": 1857 }, { "epoch": 0.39774156431457547, "grad_norm": 0.21098189486429983, "learning_rate": 2.6676991098122015e-05, "loss": 0.6961, "step": 1858 }, { "epoch": 0.39795563404779105, "grad_norm": 0.22704794833238748, "learning_rate": 2.6664180959096914e-05, "loss": 0.6859, "step": 1859 }, { "epoch": 0.3981697037810067, "grad_norm": 0.2141209364350246, "learning_rate": 2.6651367743898077e-05, "loss": 0.7247, "step": 1860 }, { "epoch": 0.3983837735142223, "grad_norm": 0.21345183388124112, "learning_rate": 2.6638551458440068e-05, "loss": 0.7122, "step": 1861 }, { "epoch": 0.39859784324743786, "grad_norm": 0.21988866363729603, "learning_rate": 2.662573210863886e-05, "loss": 0.7171, "step": 1862 }, { "epoch": 0.39881191298065344, "grad_norm": 0.22154389839515065, "learning_rate": 2.6612909700411827e-05, "loss": 0.7009, "step": 1863 }, { "epoch": 0.399025982713869, "grad_norm": 0.21850811127996428, "learning_rate": 2.6600084239677794e-05, "loss": 0.7225, "step": 1864 }, { "epoch": 0.39924005244708466, "grad_norm": 0.2076529383923364, "learning_rate": 2.658725573235695e-05, "loss": 0.7059, "step": 1865 }, { "epoch": 0.39945412218030024, "grad_norm": 0.20867367265947082, "learning_rate": 2.6574424184370927e-05, "loss": 0.7071, "step": 1866 }, { "epoch": 0.3996681919135158, "grad_norm": 0.20616840346644114, "learning_rate": 2.6561589601642732e-05, "loss": 0.7272, "step": 1867 }, { "epoch": 0.3998822616467314, "grad_norm": 0.21582995657450363, "learning_rate": 2.6548751990096783e-05, "loss": 0.7313, "step": 1868 }, { "epoch": 0.400096331379947, "grad_norm": 0.21784726207671676, "learning_rate": 2.6535911355658907e-05, "loss": 0.7514, "step": 1869 }, { "epoch": 0.40031040111316263, "grad_norm": 0.21100986770680158, "learning_rate": 2.6523067704256318e-05, "loss": 0.7352, "step": 1870 }, { "epoch": 0.4005244708463782, "grad_norm": 0.2309081220101376, "learning_rate": 2.6510221041817613e-05, "loss": 0.7178, "step": 1871 }, { "epoch": 0.4007385405795938, "grad_norm": 0.20078354730635983, "learning_rate": 2.6497371374272796e-05, "loss": 0.7211, "step": 1872 }, { "epoch": 0.4009526103128094, "grad_norm": 0.2299332800045435, "learning_rate": 2.648451870755324e-05, "loss": 0.7263, "step": 1873 }, { "epoch": 0.401166680046025, "grad_norm": 0.2047233556524905, "learning_rate": 2.6471663047591727e-05, "loss": 0.7087, "step": 1874 }, { "epoch": 0.4013807497792406, "grad_norm": 0.2213926142266927, "learning_rate": 2.6458804400322393e-05, "loss": 0.7556, "step": 1875 }, { "epoch": 0.4015948195124562, "grad_norm": 0.2349537978017295, "learning_rate": 2.6445942771680776e-05, "loss": 0.726, "step": 1876 }, { "epoch": 0.40180888924567176, "grad_norm": 0.1995934053519248, "learning_rate": 2.643307816760377e-05, "loss": 0.6919, "step": 1877 }, { "epoch": 0.40202295897888735, "grad_norm": 0.23635768573095461, "learning_rate": 2.642021059402966e-05, "loss": 0.7178, "step": 1878 }, { "epoch": 0.402237028712103, "grad_norm": 0.22746434828024978, "learning_rate": 2.640734005689809e-05, "loss": 0.7207, "step": 1879 }, { "epoch": 0.40245109844531857, "grad_norm": 0.2034955598877208, "learning_rate": 2.639446656215008e-05, "loss": 0.725, "step": 1880 }, { "epoch": 0.40266516817853415, "grad_norm": 0.24228383143082338, "learning_rate": 2.6381590115728015e-05, "loss": 0.7222, "step": 1881 }, { "epoch": 0.40287923791174973, "grad_norm": 0.22084391334038264, "learning_rate": 2.6368710723575633e-05, "loss": 0.7226, "step": 1882 }, { "epoch": 0.40309330764496537, "grad_norm": 0.21413001055885492, "learning_rate": 2.6355828391638036e-05, "loss": 0.7162, "step": 1883 }, { "epoch": 0.40330737737818095, "grad_norm": 0.25649482700300674, "learning_rate": 2.634294312586169e-05, "loss": 0.7188, "step": 1884 }, { "epoch": 0.40352144711139654, "grad_norm": 0.5704672527223288, "learning_rate": 2.633005493219441e-05, "loss": 0.7268, "step": 1885 }, { "epoch": 0.4037355168446121, "grad_norm": 0.8083517625708049, "learning_rate": 2.6317163816585357e-05, "loss": 0.7172, "step": 1886 }, { "epoch": 0.4039495865778277, "grad_norm": 0.23867837452281146, "learning_rate": 2.630426978498505e-05, "loss": 0.7368, "step": 1887 }, { "epoch": 0.40416365631104334, "grad_norm": 0.2609963405401781, "learning_rate": 2.6291372843345356e-05, "loss": 0.7167, "step": 1888 }, { "epoch": 0.4043777260442589, "grad_norm": 0.24308875778444147, "learning_rate": 2.6278472997619467e-05, "loss": 0.7447, "step": 1889 }, { "epoch": 0.4045917957774745, "grad_norm": 0.24630170170545196, "learning_rate": 2.626557025376194e-05, "loss": 0.7288, "step": 1890 }, { "epoch": 0.4048058655106901, "grad_norm": 0.22783814529196547, "learning_rate": 2.6252664617728655e-05, "loss": 0.7282, "step": 1891 }, { "epoch": 0.4050199352439057, "grad_norm": 0.20511205874940108, "learning_rate": 2.6239756095476824e-05, "loss": 0.6931, "step": 1892 }, { "epoch": 0.4052340049771213, "grad_norm": 0.2168482153786405, "learning_rate": 2.622684469296501e-05, "loss": 0.7347, "step": 1893 }, { "epoch": 0.4054480747103369, "grad_norm": 0.20211723967047945, "learning_rate": 2.6213930416153072e-05, "loss": 0.7445, "step": 1894 }, { "epoch": 0.4056621444435525, "grad_norm": 0.2225638010297805, "learning_rate": 2.620101327100224e-05, "loss": 0.7724, "step": 1895 }, { "epoch": 0.40587621417676806, "grad_norm": 0.22032432763512383, "learning_rate": 2.6188093263475028e-05, "loss": 0.7028, "step": 1896 }, { "epoch": 0.4060902839099837, "grad_norm": 0.2024287932420587, "learning_rate": 2.6175170399535298e-05, "loss": 0.6996, "step": 1897 }, { "epoch": 0.4063043536431993, "grad_norm": 0.24610753264990934, "learning_rate": 2.6162244685148212e-05, "loss": 0.7157, "step": 1898 }, { "epoch": 0.40651842337641486, "grad_norm": 0.23955346304070563, "learning_rate": 2.614931612628026e-05, "loss": 0.6938, "step": 1899 }, { "epoch": 0.40673249310963044, "grad_norm": 0.21850323457991389, "learning_rate": 2.6136384728899236e-05, "loss": 0.7198, "step": 1900 }, { "epoch": 0.4069465628428461, "grad_norm": 0.273305980195453, "learning_rate": 2.6123450498974263e-05, "loss": 0.7383, "step": 1901 }, { "epoch": 0.40716063257606167, "grad_norm": 0.24466052256934773, "learning_rate": 2.6110513442475743e-05, "loss": 0.7088, "step": 1902 }, { "epoch": 0.40737470230927725, "grad_norm": 0.24202780793832876, "learning_rate": 2.6097573565375412e-05, "loss": 0.7186, "step": 1903 }, { "epoch": 0.40758877204249283, "grad_norm": 0.2549373895446313, "learning_rate": 2.6084630873646278e-05, "loss": 0.7164, "step": 1904 }, { "epoch": 0.4078028417757084, "grad_norm": 0.21691531059719876, "learning_rate": 2.6071685373262668e-05, "loss": 0.7145, "step": 1905 }, { "epoch": 0.40801691150892405, "grad_norm": 0.25176802444622576, "learning_rate": 2.605873707020021e-05, "loss": 0.6862, "step": 1906 }, { "epoch": 0.40823098124213963, "grad_norm": 0.22918949951025416, "learning_rate": 2.604578597043581e-05, "loss": 0.7233, "step": 1907 }, { "epoch": 0.4084450509753552, "grad_norm": 0.19971965526751126, "learning_rate": 2.6032832079947676e-05, "loss": 0.7391, "step": 1908 }, { "epoch": 0.4086591207085708, "grad_norm": 0.24064664225175378, "learning_rate": 2.6019875404715293e-05, "loss": 0.711, "step": 1909 }, { "epoch": 0.40887319044178644, "grad_norm": 0.20561373418668893, "learning_rate": 2.6006915950719444e-05, "loss": 0.7371, "step": 1910 }, { "epoch": 0.409087260175002, "grad_norm": 0.22462580804939875, "learning_rate": 2.599395372394219e-05, "loss": 0.7016, "step": 1911 }, { "epoch": 0.4093013299082176, "grad_norm": 0.19432680958986376, "learning_rate": 2.598098873036687e-05, "loss": 0.7179, "step": 1912 }, { "epoch": 0.4095153996414332, "grad_norm": 0.20345685887146073, "learning_rate": 2.59680209759781e-05, "loss": 0.7404, "step": 1913 }, { "epoch": 0.40972946937464877, "grad_norm": 0.204595466035068, "learning_rate": 2.595505046676177e-05, "loss": 0.7383, "step": 1914 }, { "epoch": 0.4099435391078644, "grad_norm": 0.22033488011141625, "learning_rate": 2.5942077208705043e-05, "loss": 0.7286, "step": 1915 }, { "epoch": 0.41015760884108, "grad_norm": 0.1911724005421792, "learning_rate": 2.592910120779636e-05, "loss": 0.7083, "step": 1916 }, { "epoch": 0.4103716785742956, "grad_norm": 0.2325893700714623, "learning_rate": 2.5916122470025414e-05, "loss": 0.702, "step": 1917 }, { "epoch": 0.41058574830751116, "grad_norm": 0.20345234686251643, "learning_rate": 2.5903141001383162e-05, "loss": 0.7079, "step": 1918 }, { "epoch": 0.4107998180407268, "grad_norm": 0.2094767112427494, "learning_rate": 2.5890156807861832e-05, "loss": 0.7248, "step": 1919 }, { "epoch": 0.4110138877739424, "grad_norm": 0.20783405251155448, "learning_rate": 2.5877169895454902e-05, "loss": 0.6962, "step": 1920 }, { "epoch": 0.41122795750715796, "grad_norm": 0.1989495634669376, "learning_rate": 2.58641802701571e-05, "loss": 0.7393, "step": 1921 }, { "epoch": 0.41144202724037354, "grad_norm": 0.2066335761778752, "learning_rate": 2.5851187937964426e-05, "loss": 0.7257, "step": 1922 }, { "epoch": 0.4116560969735891, "grad_norm": 0.2094826026211216, "learning_rate": 2.5838192904874114e-05, "loss": 0.6955, "step": 1923 }, { "epoch": 0.41187016670680476, "grad_norm": 0.1910822520168472, "learning_rate": 2.5825195176884634e-05, "loss": 0.7483, "step": 1924 }, { "epoch": 0.41208423644002035, "grad_norm": 0.2150962594241971, "learning_rate": 2.581219475999573e-05, "loss": 0.7212, "step": 1925 }, { "epoch": 0.41229830617323593, "grad_norm": 0.2150972987064017, "learning_rate": 2.5799191660208366e-05, "loss": 0.6952, "step": 1926 }, { "epoch": 0.4125123759064515, "grad_norm": 0.18914642558669026, "learning_rate": 2.578618588352475e-05, "loss": 0.7445, "step": 1927 }, { "epoch": 0.4127264456396671, "grad_norm": 0.2255497767475264, "learning_rate": 2.5773177435948315e-05, "loss": 0.719, "step": 1928 }, { "epoch": 0.41294051537288273, "grad_norm": 0.2072085937871065, "learning_rate": 2.5760166323483747e-05, "loss": 0.6834, "step": 1929 }, { "epoch": 0.4131545851060983, "grad_norm": 0.20275291633445486, "learning_rate": 2.574715255213695e-05, "loss": 0.7173, "step": 1930 }, { "epoch": 0.4133686548393139, "grad_norm": 0.21153638621965765, "learning_rate": 2.5734136127915053e-05, "loss": 0.7049, "step": 1931 }, { "epoch": 0.4135827245725295, "grad_norm": 0.20017657516515155, "learning_rate": 2.572111705682642e-05, "loss": 0.7027, "step": 1932 }, { "epoch": 0.4137967943057451, "grad_norm": 0.1967481674114891, "learning_rate": 2.5708095344880627e-05, "loss": 0.6984, "step": 1933 }, { "epoch": 0.4140108640389607, "grad_norm": 0.2046817295814358, "learning_rate": 2.5695070998088465e-05, "loss": 0.7212, "step": 1934 }, { "epoch": 0.4142249337721763, "grad_norm": 0.19165665027618903, "learning_rate": 2.568204402246196e-05, "loss": 0.7316, "step": 1935 }, { "epoch": 0.41443900350539187, "grad_norm": 0.19922866996038252, "learning_rate": 2.5669014424014335e-05, "loss": 0.701, "step": 1936 }, { "epoch": 0.41465307323860745, "grad_norm": 0.2017645241746894, "learning_rate": 2.5655982208760032e-05, "loss": 0.7472, "step": 1937 }, { "epoch": 0.4148671429718231, "grad_norm": 0.18544441818376398, "learning_rate": 2.5642947382714693e-05, "loss": 0.7339, "step": 1938 }, { "epoch": 0.41508121270503867, "grad_norm": 0.20047856917863255, "learning_rate": 2.562990995189517e-05, "loss": 0.7296, "step": 1939 }, { "epoch": 0.41529528243825425, "grad_norm": 0.19468005756206275, "learning_rate": 2.5616869922319523e-05, "loss": 0.6956, "step": 1940 }, { "epoch": 0.41550935217146984, "grad_norm": 0.21796914272342383, "learning_rate": 2.5603827300007e-05, "loss": 0.7219, "step": 1941 }, { "epoch": 0.4157234219046855, "grad_norm": 0.20537795000094566, "learning_rate": 2.559078209097805e-05, "loss": 0.7526, "step": 1942 }, { "epoch": 0.41593749163790106, "grad_norm": 0.20912238933307317, "learning_rate": 2.5577734301254326e-05, "loss": 0.7083, "step": 1943 }, { "epoch": 0.41615156137111664, "grad_norm": 0.2097315785188199, "learning_rate": 2.5564683936858656e-05, "loss": 0.7165, "step": 1944 }, { "epoch": 0.4163656311043322, "grad_norm": 0.1964083983221885, "learning_rate": 2.5551631003815073e-05, "loss": 0.7257, "step": 1945 }, { "epoch": 0.4165797008375478, "grad_norm": 0.21864109289469968, "learning_rate": 2.553857550814877e-05, "loss": 0.7024, "step": 1946 }, { "epoch": 0.41679377057076344, "grad_norm": 0.2129954779508628, "learning_rate": 2.552551745588616e-05, "loss": 0.7068, "step": 1947 }, { "epoch": 0.417007840303979, "grad_norm": 0.20250983850460993, "learning_rate": 2.551245685305481e-05, "loss": 0.7009, "step": 1948 }, { "epoch": 0.4172219100371946, "grad_norm": 0.2321421577627822, "learning_rate": 2.5499393705683463e-05, "loss": 0.7214, "step": 1949 }, { "epoch": 0.4174359797704102, "grad_norm": 0.22581848164609403, "learning_rate": 2.5486328019802048e-05, "loss": 0.7387, "step": 1950 }, { "epoch": 0.41765004950362583, "grad_norm": 0.20473702821534412, "learning_rate": 2.5473259801441663e-05, "loss": 0.7036, "step": 1951 }, { "epoch": 0.4178641192368414, "grad_norm": 0.2272726521511251, "learning_rate": 2.546018905663457e-05, "loss": 0.7, "step": 1952 }, { "epoch": 0.418078188970057, "grad_norm": 0.19126378115809958, "learning_rate": 2.5447115791414206e-05, "loss": 0.7024, "step": 1953 }, { "epoch": 0.4182922587032726, "grad_norm": 0.21806172821592826, "learning_rate": 2.543404001181516e-05, "loss": 0.7166, "step": 1954 }, { "epoch": 0.41850632843648816, "grad_norm": 0.19743518950519123, "learning_rate": 2.54209617238732e-05, "loss": 0.7284, "step": 1955 }, { "epoch": 0.4187203981697038, "grad_norm": 0.21011429539358614, "learning_rate": 2.5407880933625234e-05, "loss": 0.6994, "step": 1956 }, { "epoch": 0.4189344679029194, "grad_norm": 0.18938765974344965, "learning_rate": 2.539479764710932e-05, "loss": 0.7151, "step": 1957 }, { "epoch": 0.41914853763613497, "grad_norm": 0.19169633785753745, "learning_rate": 2.5381711870364685e-05, "loss": 0.7416, "step": 1958 }, { "epoch": 0.41936260736935055, "grad_norm": 0.20286117142219692, "learning_rate": 2.5368623609431707e-05, "loss": 0.7366, "step": 1959 }, { "epoch": 0.4195766771025662, "grad_norm": 0.18160568259129048, "learning_rate": 2.5355532870351902e-05, "loss": 0.7102, "step": 1960 }, { "epoch": 0.41979074683578177, "grad_norm": 0.21587986305313922, "learning_rate": 2.5342439659167924e-05, "loss": 0.6974, "step": 1961 }, { "epoch": 0.42000481656899735, "grad_norm": 0.19979989518781668, "learning_rate": 2.5329343981923584e-05, "loss": 0.7029, "step": 1962 }, { "epoch": 0.42021888630221294, "grad_norm": 0.2024292381380751, "learning_rate": 2.5316245844663813e-05, "loss": 0.7171, "step": 1963 }, { "epoch": 0.4204329560354285, "grad_norm": 0.21342938170999215, "learning_rate": 2.5303145253434692e-05, "loss": 0.6812, "step": 1964 }, { "epoch": 0.42064702576864416, "grad_norm": 0.204676288891744, "learning_rate": 2.529004221428343e-05, "loss": 0.7186, "step": 1965 }, { "epoch": 0.42086109550185974, "grad_norm": 0.18963774223060337, "learning_rate": 2.527693673325836e-05, "loss": 0.7496, "step": 1966 }, { "epoch": 0.4210751652350753, "grad_norm": 0.2454498865550515, "learning_rate": 2.5263828816408963e-05, "loss": 0.6841, "step": 1967 }, { "epoch": 0.4212892349682909, "grad_norm": 0.19192947876362684, "learning_rate": 2.5250718469785812e-05, "loss": 0.7093, "step": 1968 }, { "epoch": 0.42150330470150654, "grad_norm": 0.20654780614157478, "learning_rate": 2.523760569944063e-05, "loss": 0.7041, "step": 1969 }, { "epoch": 0.4217173744347221, "grad_norm": 0.19539311908289664, "learning_rate": 2.522449051142625e-05, "loss": 0.7101, "step": 1970 }, { "epoch": 0.4219314441679377, "grad_norm": 0.20980351531615526, "learning_rate": 2.5211372911796613e-05, "loss": 0.7144, "step": 1971 }, { "epoch": 0.4221455139011533, "grad_norm": 0.20143555994115453, "learning_rate": 2.5198252906606778e-05, "loss": 0.7062, "step": 1972 }, { "epoch": 0.4223595836343689, "grad_norm": 0.23482288302307963, "learning_rate": 2.5185130501912913e-05, "loss": 0.7194, "step": 1973 }, { "epoch": 0.4225736533675845, "grad_norm": 0.2629980468265691, "learning_rate": 2.5172005703772306e-05, "loss": 0.7517, "step": 1974 }, { "epoch": 0.4227877231008001, "grad_norm": 0.28126509097849256, "learning_rate": 2.515887851824333e-05, "loss": 0.7169, "step": 1975 }, { "epoch": 0.4230017928340157, "grad_norm": 0.22778076892904672, "learning_rate": 2.5145748951385475e-05, "loss": 0.718, "step": 1976 }, { "epoch": 0.42321586256723126, "grad_norm": 0.23880179955790307, "learning_rate": 2.5132617009259324e-05, "loss": 0.7389, "step": 1977 }, { "epoch": 0.42342993230044684, "grad_norm": 0.26830862385188575, "learning_rate": 2.511948269792656e-05, "loss": 0.7002, "step": 1978 }, { "epoch": 0.4236440020336625, "grad_norm": 0.19470907074977814, "learning_rate": 2.5106346023449944e-05, "loss": 0.7097, "step": 1979 }, { "epoch": 0.42385807176687806, "grad_norm": 0.1917374137557521, "learning_rate": 2.509320699189336e-05, "loss": 0.7205, "step": 1980 }, { "epoch": 0.42407214150009365, "grad_norm": 0.22858071347959596, "learning_rate": 2.5080065609321755e-05, "loss": 0.7334, "step": 1981 }, { "epoch": 0.42428621123330923, "grad_norm": 0.19521587171761723, "learning_rate": 2.506692188180116e-05, "loss": 0.6953, "step": 1982 }, { "epoch": 0.42450028096652487, "grad_norm": 0.2044269464457987, "learning_rate": 2.5053775815398698e-05, "loss": 0.6957, "step": 1983 }, { "epoch": 0.42471435069974045, "grad_norm": 0.1949724665009927, "learning_rate": 2.504062741618257e-05, "loss": 0.7169, "step": 1984 }, { "epoch": 0.42492842043295603, "grad_norm": 0.18414223116499662, "learning_rate": 2.5027476690222058e-05, "loss": 0.735, "step": 1985 }, { "epoch": 0.4251424901661716, "grad_norm": 0.20613056383046116, "learning_rate": 2.5014323643587504e-05, "loss": 0.6837, "step": 1986 }, { "epoch": 0.4253565598993872, "grad_norm": 0.18405903615562325, "learning_rate": 2.5001168282350338e-05, "loss": 0.7247, "step": 1987 }, { "epoch": 0.42557062963260284, "grad_norm": 0.20336121514951713, "learning_rate": 2.4988010612583053e-05, "loss": 0.712, "step": 1988 }, { "epoch": 0.4257846993658184, "grad_norm": 0.19351664028649984, "learning_rate": 2.4974850640359192e-05, "loss": 0.7462, "step": 1989 }, { "epoch": 0.425998769099034, "grad_norm": 0.2014274630453742, "learning_rate": 2.4961688371753385e-05, "loss": 0.7053, "step": 1990 }, { "epoch": 0.4262128388322496, "grad_norm": 0.22483785467393871, "learning_rate": 2.494852381284131e-05, "loss": 0.7255, "step": 1991 }, { "epoch": 0.4264269085654652, "grad_norm": 0.19252394472523177, "learning_rate": 2.49353569696997e-05, "loss": 0.7224, "step": 1992 }, { "epoch": 0.4266409782986808, "grad_norm": 0.23309321637811792, "learning_rate": 2.4922187848406348e-05, "loss": 0.7487, "step": 1993 }, { "epoch": 0.4268550480318964, "grad_norm": 0.1979293761717918, "learning_rate": 2.490901645504009e-05, "loss": 0.7257, "step": 1994 }, { "epoch": 0.42706911776511197, "grad_norm": 0.20447486580244634, "learning_rate": 2.4895842795680834e-05, "loss": 0.6863, "step": 1995 }, { "epoch": 0.42728318749832755, "grad_norm": 0.22180729390738765, "learning_rate": 2.4882666876409495e-05, "loss": 0.7529, "step": 1996 }, { "epoch": 0.4274972572315432, "grad_norm": 0.20562832861605798, "learning_rate": 2.486948870330807e-05, "loss": 0.7051, "step": 1997 }, { "epoch": 0.4277113269647588, "grad_norm": 0.2835800373436375, "learning_rate": 2.4856308282459575e-05, "loss": 0.7083, "step": 1998 }, { "epoch": 0.42792539669797436, "grad_norm": 0.2125601755785729, "learning_rate": 2.4843125619948064e-05, "loss": 0.706, "step": 1999 }, { "epoch": 0.42813946643118994, "grad_norm": 0.19951958840163336, "learning_rate": 2.482994072185863e-05, "loss": 0.7333, "step": 2000 }, { "epoch": 0.4283535361644056, "grad_norm": 0.23352727958314665, "learning_rate": 2.4816753594277402e-05, "loss": 0.7056, "step": 2001 }, { "epoch": 0.42856760589762116, "grad_norm": 0.17971296646507665, "learning_rate": 2.4803564243291534e-05, "loss": 0.7399, "step": 2002 }, { "epoch": 0.42878167563083674, "grad_norm": 0.21442277455899422, "learning_rate": 2.4790372674989205e-05, "loss": 0.6932, "step": 2003 }, { "epoch": 0.4289957453640523, "grad_norm": 0.21751520674543545, "learning_rate": 2.4777178895459617e-05, "loss": 0.7191, "step": 2004 }, { "epoch": 0.4292098150972679, "grad_norm": 0.18357623494158104, "learning_rate": 2.4763982910792993e-05, "loss": 0.7189, "step": 2005 }, { "epoch": 0.42942388483048355, "grad_norm": 0.19412430991067642, "learning_rate": 2.475078472708058e-05, "loss": 0.7033, "step": 2006 }, { "epoch": 0.42963795456369913, "grad_norm": 0.20851442089275044, "learning_rate": 2.4737584350414635e-05, "loss": 0.6986, "step": 2007 }, { "epoch": 0.4298520242969147, "grad_norm": 0.2012724231766711, "learning_rate": 2.4724381786888426e-05, "loss": 0.6853, "step": 2008 }, { "epoch": 0.4300660940301303, "grad_norm": 0.19014397968561714, "learning_rate": 2.4711177042596232e-05, "loss": 0.7229, "step": 2009 }, { "epoch": 0.43028016376334594, "grad_norm": 0.2031082273686637, "learning_rate": 2.469797012363334e-05, "loss": 0.7419, "step": 2010 }, { "epoch": 0.4304942334965615, "grad_norm": 0.18665928937625678, "learning_rate": 2.4684761036096036e-05, "loss": 0.7099, "step": 2011 }, { "epoch": 0.4307083032297771, "grad_norm": 0.19617117047536298, "learning_rate": 2.4671549786081615e-05, "loss": 0.7343, "step": 2012 }, { "epoch": 0.4309223729629927, "grad_norm": 0.20369053805829532, "learning_rate": 2.4658336379688366e-05, "loss": 0.7151, "step": 2013 }, { "epoch": 0.43113644269620827, "grad_norm": 0.18934183530174764, "learning_rate": 2.4645120823015572e-05, "loss": 0.7272, "step": 2014 }, { "epoch": 0.4313505124294239, "grad_norm": 0.2001952717059697, "learning_rate": 2.463190312216351e-05, "loss": 0.6889, "step": 2015 }, { "epoch": 0.4315645821626395, "grad_norm": 0.1989442744871271, "learning_rate": 2.461868328323344e-05, "loss": 0.7228, "step": 2016 }, { "epoch": 0.43177865189585507, "grad_norm": 0.21450949569335329, "learning_rate": 2.4605461312327624e-05, "loss": 0.7149, "step": 2017 }, { "epoch": 0.43199272162907065, "grad_norm": 0.20680933027804005, "learning_rate": 2.4592237215549305e-05, "loss": 0.7267, "step": 2018 }, { "epoch": 0.4322067913622863, "grad_norm": 0.20812795097276574, "learning_rate": 2.4579010999002683e-05, "loss": 0.7183, "step": 2019 }, { "epoch": 0.4324208610955019, "grad_norm": 0.24126825216101877, "learning_rate": 2.4565782668792975e-05, "loss": 0.7179, "step": 2020 }, { "epoch": 0.43263493082871746, "grad_norm": 0.1933731221445781, "learning_rate": 2.4552552231026337e-05, "loss": 0.7175, "step": 2021 }, { "epoch": 0.43284900056193304, "grad_norm": 0.23747618529883308, "learning_rate": 2.4539319691809924e-05, "loss": 0.7302, "step": 2022 }, { "epoch": 0.4330630702951486, "grad_norm": 0.21002887072082252, "learning_rate": 2.4526085057251856e-05, "loss": 0.7075, "step": 2023 }, { "epoch": 0.43327714002836426, "grad_norm": 0.2096821368675051, "learning_rate": 2.4512848333461206e-05, "loss": 0.7227, "step": 2024 }, { "epoch": 0.43349120976157984, "grad_norm": 0.24851552961252657, "learning_rate": 2.4499609526548033e-05, "loss": 0.694, "step": 2025 }, { "epoch": 0.4337052794947954, "grad_norm": 0.20719164220240477, "learning_rate": 2.4486368642623327e-05, "loss": 0.7098, "step": 2026 }, { "epoch": 0.433919349228011, "grad_norm": 0.20652034437446656, "learning_rate": 2.447312568779908e-05, "loss": 0.7256, "step": 2027 }, { "epoch": 0.43413341896122665, "grad_norm": 0.22951985548119255, "learning_rate": 2.44598806681882e-05, "loss": 0.7082, "step": 2028 }, { "epoch": 0.43434748869444223, "grad_norm": 0.18357655157498282, "learning_rate": 2.4446633589904564e-05, "loss": 0.6882, "step": 2029 }, { "epoch": 0.4345615584276578, "grad_norm": 0.2221418129967864, "learning_rate": 2.443338445906301e-05, "loss": 0.71, "step": 2030 }, { "epoch": 0.4347756281608734, "grad_norm": 0.19384174925665745, "learning_rate": 2.4420133281779297e-05, "loss": 0.6931, "step": 2031 }, { "epoch": 0.434989697894089, "grad_norm": 0.19453526708516902, "learning_rate": 2.4406880064170156e-05, "loss": 0.7394, "step": 2032 }, { "epoch": 0.4352037676273046, "grad_norm": 0.20168817080497953, "learning_rate": 2.439362481235325e-05, "loss": 0.7099, "step": 2033 }, { "epoch": 0.4354178373605202, "grad_norm": 0.1923342230318402, "learning_rate": 2.4380367532447168e-05, "loss": 0.7287, "step": 2034 }, { "epoch": 0.4356319070937358, "grad_norm": 0.20388412643436957, "learning_rate": 2.4367108230571453e-05, "loss": 0.6853, "step": 2035 }, { "epoch": 0.43584597682695136, "grad_norm": 0.2078287711057543, "learning_rate": 2.4353846912846567e-05, "loss": 0.7216, "step": 2036 }, { "epoch": 0.43606004656016695, "grad_norm": 0.17972524824407618, "learning_rate": 2.4340583585393925e-05, "loss": 0.6891, "step": 2037 }, { "epoch": 0.4362741162933826, "grad_norm": 0.1871383780412847, "learning_rate": 2.4327318254335845e-05, "loss": 0.711, "step": 2038 }, { "epoch": 0.43648818602659817, "grad_norm": 0.1876217730924942, "learning_rate": 2.4314050925795578e-05, "loss": 0.7329, "step": 2039 }, { "epoch": 0.43670225575981375, "grad_norm": 0.18719098254903513, "learning_rate": 2.43007816058973e-05, "loss": 0.7131, "step": 2040 }, { "epoch": 0.43691632549302933, "grad_norm": 0.18356727762805758, "learning_rate": 2.4287510300766107e-05, "loss": 0.6964, "step": 2041 }, { "epoch": 0.43713039522624497, "grad_norm": 0.17536655782284724, "learning_rate": 2.4274237016528e-05, "loss": 0.6883, "step": 2042 }, { "epoch": 0.43734446495946055, "grad_norm": 0.19540611769329763, "learning_rate": 2.426096175930992e-05, "loss": 0.7179, "step": 2043 }, { "epoch": 0.43755853469267614, "grad_norm": 0.1856451253028403, "learning_rate": 2.424768453523969e-05, "loss": 0.7021, "step": 2044 }, { "epoch": 0.4377726044258917, "grad_norm": 0.19665118427903588, "learning_rate": 2.4234405350446055e-05, "loss": 0.7191, "step": 2045 }, { "epoch": 0.4379866741591073, "grad_norm": 0.27210370905867626, "learning_rate": 2.422112421105866e-05, "loss": 0.7391, "step": 2046 }, { "epoch": 0.43820074389232294, "grad_norm": 0.2049765553860846, "learning_rate": 2.4207841123208055e-05, "loss": 0.7298, "step": 2047 }, { "epoch": 0.4384148136255385, "grad_norm": 0.18648693299756902, "learning_rate": 2.419455609302569e-05, "loss": 0.7176, "step": 2048 }, { "epoch": 0.4386288833587541, "grad_norm": 0.1973831653039735, "learning_rate": 2.4181269126643918e-05, "loss": 0.686, "step": 2049 }, { "epoch": 0.4388429530919697, "grad_norm": 0.18632585455591297, "learning_rate": 2.416798023019596e-05, "loss": 0.6905, "step": 2050 }, { "epoch": 0.43905702282518533, "grad_norm": 0.18149024732686886, "learning_rate": 2.4154689409815967e-05, "loss": 0.6879, "step": 2051 }, { "epoch": 0.4392710925584009, "grad_norm": 0.21353439002266103, "learning_rate": 2.414139667163894e-05, "loss": 0.733, "step": 2052 }, { "epoch": 0.4394851622916165, "grad_norm": 0.18791274780190753, "learning_rate": 2.4128102021800794e-05, "loss": 0.7366, "step": 2053 }, { "epoch": 0.4396992320248321, "grad_norm": 0.1896264638812108, "learning_rate": 2.4114805466438315e-05, "loss": 0.7141, "step": 2054 }, { "epoch": 0.43991330175804766, "grad_norm": 0.18831087940307026, "learning_rate": 2.4101507011689162e-05, "loss": 0.711, "step": 2055 }, { "epoch": 0.4401273714912633, "grad_norm": 0.18427173728735963, "learning_rate": 2.408820666369188e-05, "loss": 0.7197, "step": 2056 }, { "epoch": 0.4403414412244789, "grad_norm": 0.19079804512387546, "learning_rate": 2.4074904428585884e-05, "loss": 0.6952, "step": 2057 }, { "epoch": 0.44055551095769446, "grad_norm": 0.1897212969120429, "learning_rate": 2.4061600312511468e-05, "loss": 0.7489, "step": 2058 }, { "epoch": 0.44076958069091005, "grad_norm": 0.19581239342259346, "learning_rate": 2.4048294321609782e-05, "loss": 0.7612, "step": 2059 }, { "epoch": 0.4409836504241257, "grad_norm": 0.19963804135525962, "learning_rate": 2.4034986462022847e-05, "loss": 0.7355, "step": 2060 }, { "epoch": 0.44119772015734127, "grad_norm": 0.18445213879820282, "learning_rate": 2.4021676739893547e-05, "loss": 0.6854, "step": 2061 }, { "epoch": 0.44141178989055685, "grad_norm": 0.19668533263438023, "learning_rate": 2.4008365161365624e-05, "loss": 0.7418, "step": 2062 }, { "epoch": 0.44162585962377243, "grad_norm": 0.20749067960177486, "learning_rate": 2.3995051732583684e-05, "loss": 0.7091, "step": 2063 }, { "epoch": 0.441839929356988, "grad_norm": 0.2081349996777893, "learning_rate": 2.3981736459693172e-05, "loss": 0.7311, "step": 2064 }, { "epoch": 0.44205399909020365, "grad_norm": 0.20498156314794147, "learning_rate": 2.3968419348840403e-05, "loss": 0.7133, "step": 2065 }, { "epoch": 0.44226806882341924, "grad_norm": 0.20214150959688085, "learning_rate": 2.3955100406172533e-05, "loss": 0.7189, "step": 2066 }, { "epoch": 0.4424821385566348, "grad_norm": 0.18175552919557034, "learning_rate": 2.394177963783755e-05, "loss": 0.7188, "step": 2067 }, { "epoch": 0.4426962082898504, "grad_norm": 0.21315627286015912, "learning_rate": 2.3928457049984294e-05, "loss": 0.7003, "step": 2068 }, { "epoch": 0.44291027802306604, "grad_norm": 0.17134304533018946, "learning_rate": 2.391513264876246e-05, "loss": 0.717, "step": 2069 }, { "epoch": 0.4431243477562816, "grad_norm": 0.20499242709853013, "learning_rate": 2.390180644032257e-05, "loss": 0.6917, "step": 2070 }, { "epoch": 0.4433384174894972, "grad_norm": 0.18718900299139493, "learning_rate": 2.3888478430815963e-05, "loss": 0.6969, "step": 2071 }, { "epoch": 0.4435524872227128, "grad_norm": 0.18602463829878715, "learning_rate": 2.387514862639483e-05, "loss": 0.7029, "step": 2072 }, { "epoch": 0.44376655695592837, "grad_norm": 0.22042874220191952, "learning_rate": 2.3861817033212185e-05, "loss": 0.6947, "step": 2073 }, { "epoch": 0.443980626689144, "grad_norm": 0.18009310184104058, "learning_rate": 2.3848483657421868e-05, "loss": 0.7088, "step": 2074 }, { "epoch": 0.4441946964223596, "grad_norm": 0.2006868546139837, "learning_rate": 2.383514850517854e-05, "loss": 0.6993, "step": 2075 }, { "epoch": 0.4444087661555752, "grad_norm": 0.2062353966689945, "learning_rate": 2.3821811582637687e-05, "loss": 0.7176, "step": 2076 }, { "epoch": 0.44462283588879076, "grad_norm": 0.18428277020755152, "learning_rate": 2.38084728959556e-05, "loss": 0.7136, "step": 2077 }, { "epoch": 0.4448369056220064, "grad_norm": 0.19040739557886138, "learning_rate": 2.379513245128939e-05, "loss": 0.7177, "step": 2078 }, { "epoch": 0.445050975355222, "grad_norm": 0.181095972545011, "learning_rate": 2.3781790254796993e-05, "loss": 0.7115, "step": 2079 }, { "epoch": 0.44526504508843756, "grad_norm": 0.213847801473037, "learning_rate": 2.3768446312637137e-05, "loss": 0.7004, "step": 2080 }, { "epoch": 0.44547911482165314, "grad_norm": 0.2064309884570408, "learning_rate": 2.375510063096936e-05, "loss": 0.7197, "step": 2081 }, { "epoch": 0.4456931845548687, "grad_norm": 0.1890483606201992, "learning_rate": 2.374175321595401e-05, "loss": 0.6993, "step": 2082 }, { "epoch": 0.44590725428808436, "grad_norm": 0.21580698521259575, "learning_rate": 2.372840407375222e-05, "loss": 0.7168, "step": 2083 }, { "epoch": 0.44612132402129995, "grad_norm": 0.18883476811571928, "learning_rate": 2.3715053210525937e-05, "loss": 0.7019, "step": 2084 }, { "epoch": 0.44633539375451553, "grad_norm": 0.18565956158490704, "learning_rate": 2.3701700632437892e-05, "loss": 0.725, "step": 2085 }, { "epoch": 0.4465494634877311, "grad_norm": 0.2081209436161066, "learning_rate": 2.3688346345651612e-05, "loss": 0.7163, "step": 2086 }, { "epoch": 0.4467635332209467, "grad_norm": 0.18502706911103697, "learning_rate": 2.367499035633141e-05, "loss": 0.7079, "step": 2087 }, { "epoch": 0.44697760295416233, "grad_norm": 0.19961209619379117, "learning_rate": 2.3661632670642386e-05, "loss": 0.7405, "step": 2088 }, { "epoch": 0.4471916726873779, "grad_norm": 0.18601078708322014, "learning_rate": 2.3648273294750425e-05, "loss": 0.6957, "step": 2089 }, { "epoch": 0.4474057424205935, "grad_norm": 0.20980173919175385, "learning_rate": 2.3634912234822194e-05, "loss": 0.7033, "step": 2090 }, { "epoch": 0.4476198121538091, "grad_norm": 0.19028429850671252, "learning_rate": 2.3621549497025118e-05, "loss": 0.706, "step": 2091 }, { "epoch": 0.4478338818870247, "grad_norm": 0.186215889007293, "learning_rate": 2.3608185087527432e-05, "loss": 0.7038, "step": 2092 }, { "epoch": 0.4480479516202403, "grad_norm": 0.18355418658930112, "learning_rate": 2.3594819012498115e-05, "loss": 0.6964, "step": 2093 }, { "epoch": 0.4482620213534559, "grad_norm": 0.18758532546585963, "learning_rate": 2.3581451278106924e-05, "loss": 0.7057, "step": 2094 }, { "epoch": 0.44847609108667147, "grad_norm": 0.19956411209155378, "learning_rate": 2.356808189052437e-05, "loss": 0.7236, "step": 2095 }, { "epoch": 0.44869016081988705, "grad_norm": 0.1778401001903628, "learning_rate": 2.3554710855921756e-05, "loss": 0.7422, "step": 2096 }, { "epoch": 0.4489042305531027, "grad_norm": 0.21045480952717957, "learning_rate": 2.3541338180471115e-05, "loss": 0.6927, "step": 2097 }, { "epoch": 0.4491183002863183, "grad_norm": 0.19311618494876245, "learning_rate": 2.352796387034525e-05, "loss": 0.7094, "step": 2098 }, { "epoch": 0.44933237001953386, "grad_norm": 0.19975375908131546, "learning_rate": 2.3514587931717724e-05, "loss": 0.723, "step": 2099 }, { "epoch": 0.44954643975274944, "grad_norm": 0.20640555154849616, "learning_rate": 2.350121037076284e-05, "loss": 0.7163, "step": 2100 }, { "epoch": 0.4497605094859651, "grad_norm": 0.1948182699005542, "learning_rate": 2.3487831193655666e-05, "loss": 0.719, "step": 2101 }, { "epoch": 0.44997457921918066, "grad_norm": 0.19614745708909373, "learning_rate": 2.347445040657199e-05, "loss": 0.7032, "step": 2102 }, { "epoch": 0.45018864895239624, "grad_norm": 0.21701631897555446, "learning_rate": 2.3461068015688372e-05, "loss": 0.6824, "step": 2103 }, { "epoch": 0.4504027186856118, "grad_norm": 0.19471397707591673, "learning_rate": 2.344768402718209e-05, "loss": 0.7108, "step": 2104 }, { "epoch": 0.4506167884188274, "grad_norm": 0.3836571163793701, "learning_rate": 2.3434298447231165e-05, "loss": 0.7346, "step": 2105 }, { "epoch": 0.45083085815204305, "grad_norm": 0.18322266761973127, "learning_rate": 2.3420911282014373e-05, "loss": 0.705, "step": 2106 }, { "epoch": 0.45104492788525863, "grad_norm": 0.21850351001536295, "learning_rate": 2.340752253771119e-05, "loss": 0.7049, "step": 2107 }, { "epoch": 0.4512589976184742, "grad_norm": 0.18326116754485203, "learning_rate": 2.339413222050185e-05, "loss": 0.758, "step": 2108 }, { "epoch": 0.4514730673516898, "grad_norm": 0.22517947759114104, "learning_rate": 2.3380740336567285e-05, "loss": 0.7028, "step": 2109 }, { "epoch": 0.45168713708490543, "grad_norm": 0.22960359498581612, "learning_rate": 2.3367346892089166e-05, "loss": 0.7203, "step": 2110 }, { "epoch": 0.451901206818121, "grad_norm": 0.21405634986151675, "learning_rate": 2.335395189324989e-05, "loss": 0.7425, "step": 2111 }, { "epoch": 0.4521152765513366, "grad_norm": 0.20123821408705836, "learning_rate": 2.334055534623256e-05, "loss": 0.7208, "step": 2112 }, { "epoch": 0.4523293462845522, "grad_norm": 0.20140231544872989, "learning_rate": 2.3327157257220994e-05, "loss": 0.7107, "step": 2113 }, { "epoch": 0.45254341601776776, "grad_norm": 0.1804525636336557, "learning_rate": 2.331375763239973e-05, "loss": 0.7075, "step": 2114 }, { "epoch": 0.4527574857509834, "grad_norm": 0.26302564080958823, "learning_rate": 2.3300356477954008e-05, "loss": 0.7043, "step": 2115 }, { "epoch": 0.452971555484199, "grad_norm": 0.19783153830963043, "learning_rate": 2.328695380006978e-05, "loss": 0.7424, "step": 2116 }, { "epoch": 0.45318562521741457, "grad_norm": 0.19999876206622547, "learning_rate": 2.3273549604933693e-05, "loss": 0.7164, "step": 2117 }, { "epoch": 0.45339969495063015, "grad_norm": 0.19216671514409614, "learning_rate": 2.3260143898733106e-05, "loss": 0.7093, "step": 2118 }, { "epoch": 0.4536137646838458, "grad_norm": 0.20994204910084707, "learning_rate": 2.3246736687656055e-05, "loss": 0.7162, "step": 2119 }, { "epoch": 0.45382783441706137, "grad_norm": 0.20692097620296712, "learning_rate": 2.3233327977891295e-05, "loss": 0.7248, "step": 2120 }, { "epoch": 0.45404190415027695, "grad_norm": 0.20359848058700244, "learning_rate": 2.321991777562826e-05, "loss": 0.7529, "step": 2121 }, { "epoch": 0.45425597388349254, "grad_norm": 0.1959436518862538, "learning_rate": 2.3206506087057076e-05, "loss": 0.7134, "step": 2122 }, { "epoch": 0.4544700436167081, "grad_norm": 0.20624523193827454, "learning_rate": 2.319309291836855e-05, "loss": 0.7262, "step": 2123 }, { "epoch": 0.45468411334992376, "grad_norm": 0.1960895968942528, "learning_rate": 2.317967827575418e-05, "loss": 0.7324, "step": 2124 }, { "epoch": 0.45489818308313934, "grad_norm": 0.21014796008763786, "learning_rate": 2.316626216540614e-05, "loss": 0.7394, "step": 2125 }, { "epoch": 0.4551122528163549, "grad_norm": 0.1899205626449186, "learning_rate": 2.315284459351727e-05, "loss": 0.6772, "step": 2126 }, { "epoch": 0.4553263225495705, "grad_norm": 0.20052880229653133, "learning_rate": 2.3139425566281118e-05, "loss": 0.7412, "step": 2127 }, { "epoch": 0.45554039228278614, "grad_norm": 0.19392973954038528, "learning_rate": 2.312600508989187e-05, "loss": 0.7218, "step": 2128 }, { "epoch": 0.4557544620160017, "grad_norm": 0.1892379766232198, "learning_rate": 2.3112583170544395e-05, "loss": 0.7103, "step": 2129 }, { "epoch": 0.4559685317492173, "grad_norm": 0.19748742318767742, "learning_rate": 2.309915981443422e-05, "loss": 0.7146, "step": 2130 }, { "epoch": 0.4561826014824329, "grad_norm": 0.18467300783025856, "learning_rate": 2.3085735027757548e-05, "loss": 0.691, "step": 2131 }, { "epoch": 0.4563966712156485, "grad_norm": 0.20449863566538318, "learning_rate": 2.3072308816711243e-05, "loss": 0.7143, "step": 2132 }, { "epoch": 0.4566107409488641, "grad_norm": 0.18470223636476016, "learning_rate": 2.3058881187492808e-05, "loss": 0.7254, "step": 2133 }, { "epoch": 0.4568248106820797, "grad_norm": 0.21115686334907097, "learning_rate": 2.304545214630042e-05, "loss": 0.6858, "step": 2134 }, { "epoch": 0.4570388804152953, "grad_norm": 0.2070400787293092, "learning_rate": 2.303202169933289e-05, "loss": 0.7223, "step": 2135 }, { "epoch": 0.45725295014851086, "grad_norm": 0.20071912284859644, "learning_rate": 2.30185898527897e-05, "loss": 0.7186, "step": 2136 }, { "epoch": 0.4574670198817265, "grad_norm": 0.20029551412812613, "learning_rate": 2.3005156612870954e-05, "loss": 0.7055, "step": 2137 }, { "epoch": 0.4576810896149421, "grad_norm": 0.19326759988473818, "learning_rate": 2.2991721985777425e-05, "loss": 0.7031, "step": 2138 }, { "epoch": 0.45789515934815767, "grad_norm": 0.18588353186976161, "learning_rate": 2.2978285977710496e-05, "loss": 0.7005, "step": 2139 }, { "epoch": 0.45810922908137325, "grad_norm": 0.18433258134248923, "learning_rate": 2.2964848594872217e-05, "loss": 0.7447, "step": 2140 }, { "epoch": 0.45832329881458883, "grad_norm": 0.19340810567094072, "learning_rate": 2.2951409843465248e-05, "loss": 0.7423, "step": 2141 }, { "epoch": 0.45853736854780447, "grad_norm": 0.18558020866153613, "learning_rate": 2.2937969729692902e-05, "loss": 0.7526, "step": 2142 }, { "epoch": 0.45875143828102005, "grad_norm": 0.2025393114195531, "learning_rate": 2.292452825975911e-05, "loss": 0.7396, "step": 2143 }, { "epoch": 0.45896550801423563, "grad_norm": 0.1827442394547482, "learning_rate": 2.2911085439868425e-05, "loss": 0.7034, "step": 2144 }, { "epoch": 0.4591795777474512, "grad_norm": 0.1925697563862605, "learning_rate": 2.2897641276226028e-05, "loss": 0.7147, "step": 2145 }, { "epoch": 0.4593936474806668, "grad_norm": 0.19874786800278074, "learning_rate": 2.288419577503772e-05, "loss": 0.7111, "step": 2146 }, { "epoch": 0.45960771721388244, "grad_norm": 0.18229270314709667, "learning_rate": 2.2870748942509928e-05, "loss": 0.6978, "step": 2147 }, { "epoch": 0.459821786947098, "grad_norm": 0.20061734811137624, "learning_rate": 2.2857300784849672e-05, "loss": 0.7063, "step": 2148 }, { "epoch": 0.4600358566803136, "grad_norm": 0.19297151527985856, "learning_rate": 2.2843851308264613e-05, "loss": 0.7252, "step": 2149 }, { "epoch": 0.4602499264135292, "grad_norm": 0.19265491238083898, "learning_rate": 2.2830400518962986e-05, "loss": 0.7352, "step": 2150 }, { "epoch": 0.4604639961467448, "grad_norm": 0.20238884594899037, "learning_rate": 2.281694842315367e-05, "loss": 0.7201, "step": 2151 }, { "epoch": 0.4606780658799604, "grad_norm": 0.21753805097857884, "learning_rate": 2.2803495027046113e-05, "loss": 0.7296, "step": 2152 }, { "epoch": 0.460892135613176, "grad_norm": 0.21355056727719166, "learning_rate": 2.2790040336850386e-05, "loss": 0.6886, "step": 2153 }, { "epoch": 0.4611062053463916, "grad_norm": 0.2220846866903324, "learning_rate": 2.2776584358777143e-05, "loss": 0.7268, "step": 2154 }, { "epoch": 0.46132027507960716, "grad_norm": 0.20854007523915405, "learning_rate": 2.2763127099037646e-05, "loss": 0.7246, "step": 2155 }, { "epoch": 0.4615343448128228, "grad_norm": 0.18591572673778503, "learning_rate": 2.274966856384374e-05, "loss": 0.6805, "step": 2156 }, { "epoch": 0.4617484145460384, "grad_norm": 0.21008652628761115, "learning_rate": 2.2736208759407853e-05, "loss": 0.7286, "step": 2157 }, { "epoch": 0.46196248427925396, "grad_norm": 0.17872799128498604, "learning_rate": 2.2722747691943017e-05, "loss": 0.7209, "step": 2158 }, { "epoch": 0.46217655401246954, "grad_norm": 0.1995400707408316, "learning_rate": 2.2709285367662828e-05, "loss": 0.702, "step": 2159 }, { "epoch": 0.4623906237456852, "grad_norm": 0.1963409303381132, "learning_rate": 2.2695821792781474e-05, "loss": 0.716, "step": 2160 }, { "epoch": 0.46260469347890076, "grad_norm": 0.19212834799908093, "learning_rate": 2.2682356973513714e-05, "loss": 0.7265, "step": 2161 }, { "epoch": 0.46281876321211635, "grad_norm": 0.18979304318996393, "learning_rate": 2.2668890916074882e-05, "loss": 0.7194, "step": 2162 }, { "epoch": 0.46303283294533193, "grad_norm": 0.19390822371444827, "learning_rate": 2.2655423626680893e-05, "loss": 0.6937, "step": 2163 }, { "epoch": 0.4632469026785475, "grad_norm": 0.19594228612862122, "learning_rate": 2.2641955111548223e-05, "loss": 0.7165, "step": 2164 }, { "epoch": 0.46346097241176315, "grad_norm": 0.328611684424364, "learning_rate": 2.26284853768939e-05, "loss": 0.7529, "step": 2165 }, { "epoch": 0.46367504214497873, "grad_norm": 0.18678760685275683, "learning_rate": 2.2615014428935548e-05, "loss": 0.7057, "step": 2166 }, { "epoch": 0.4638891118781943, "grad_norm": 0.19678736007739475, "learning_rate": 2.2601542273891317e-05, "loss": 0.7522, "step": 2167 }, { "epoch": 0.4641031816114099, "grad_norm": 0.19321796388205487, "learning_rate": 2.2588068917979933e-05, "loss": 0.7135, "step": 2168 }, { "epoch": 0.46431725134462554, "grad_norm": 0.17534417061737678, "learning_rate": 2.257459436742068e-05, "loss": 0.7165, "step": 2169 }, { "epoch": 0.4645313210778411, "grad_norm": 0.19223165724507915, "learning_rate": 2.2561118628433377e-05, "loss": 0.7146, "step": 2170 }, { "epoch": 0.4647453908110567, "grad_norm": 0.19244694558663425, "learning_rate": 2.2547641707238402e-05, "loss": 0.7336, "step": 2171 }, { "epoch": 0.4649594605442723, "grad_norm": 0.20335622665185274, "learning_rate": 2.253416361005668e-05, "loss": 0.7086, "step": 2172 }, { "epoch": 0.46517353027748787, "grad_norm": 0.5434599869321535, "learning_rate": 2.2520684343109675e-05, "loss": 0.6898, "step": 2173 }, { "epoch": 0.4653876000107035, "grad_norm": 0.1889701876371436, "learning_rate": 2.2507203912619388e-05, "loss": 0.7254, "step": 2174 }, { "epoch": 0.4656016697439191, "grad_norm": 0.18927889582436427, "learning_rate": 2.2493722324808368e-05, "loss": 0.7061, "step": 2175 }, { "epoch": 0.46581573947713467, "grad_norm": 0.19879273062813915, "learning_rate": 2.2480239585899688e-05, "loss": 0.7179, "step": 2176 }, { "epoch": 0.46602980921035025, "grad_norm": 0.21245819806627703, "learning_rate": 2.2466755702116957e-05, "loss": 0.725, "step": 2177 }, { "epoch": 0.4662438789435659, "grad_norm": 0.18089828361663096, "learning_rate": 2.24532706796843e-05, "loss": 0.6809, "step": 2178 }, { "epoch": 0.4664579486767815, "grad_norm": 0.20796703517907206, "learning_rate": 2.24397845248264e-05, "loss": 0.7228, "step": 2179 }, { "epoch": 0.46667201840999706, "grad_norm": 0.18658784740039108, "learning_rate": 2.2426297243768423e-05, "loss": 0.7125, "step": 2180 }, { "epoch": 0.46688608814321264, "grad_norm": 0.22190722914833058, "learning_rate": 2.2412808842736083e-05, "loss": 0.7191, "step": 2181 }, { "epoch": 0.4671001578764282, "grad_norm": 0.18321387426286206, "learning_rate": 2.23993193279556e-05, "loss": 0.7026, "step": 2182 }, { "epoch": 0.46731422760964386, "grad_norm": 0.1930234572929713, "learning_rate": 2.2385828705653697e-05, "loss": 0.7133, "step": 2183 }, { "epoch": 0.46752829734285944, "grad_norm": 0.21405394720514132, "learning_rate": 2.2372336982057644e-05, "loss": 0.7114, "step": 2184 }, { "epoch": 0.467742367076075, "grad_norm": 0.18934323146710155, "learning_rate": 2.2358844163395177e-05, "loss": 0.7249, "step": 2185 }, { "epoch": 0.4679564368092906, "grad_norm": 0.21659338425984373, "learning_rate": 2.2345350255894563e-05, "loss": 0.7295, "step": 2186 }, { "epoch": 0.46817050654250625, "grad_norm": 0.1937781425082282, "learning_rate": 2.2331855265784562e-05, "loss": 0.7068, "step": 2187 }, { "epoch": 0.46838457627572183, "grad_norm": 0.20862195676835077, "learning_rate": 2.2318359199294443e-05, "loss": 0.699, "step": 2188 }, { "epoch": 0.4685986460089374, "grad_norm": 0.19356935963089814, "learning_rate": 2.2304862062653956e-05, "loss": 0.7364, "step": 2189 }, { "epoch": 0.468812715742153, "grad_norm": 0.21522912935697439, "learning_rate": 2.2291363862093363e-05, "loss": 0.7386, "step": 2190 }, { "epoch": 0.4690267854753686, "grad_norm": 0.20601797392840152, "learning_rate": 2.2277864603843405e-05, "loss": 0.7279, "step": 2191 }, { "epoch": 0.4692408552085842, "grad_norm": 0.21339548740533315, "learning_rate": 2.2264364294135316e-05, "loss": 0.7087, "step": 2192 }, { "epoch": 0.4694549249417998, "grad_norm": 0.21461038435187735, "learning_rate": 2.2250862939200815e-05, "loss": 0.7003, "step": 2193 }, { "epoch": 0.4696689946750154, "grad_norm": 0.2063913413177893, "learning_rate": 2.22373605452721e-05, "loss": 0.7064, "step": 2194 }, { "epoch": 0.46988306440823097, "grad_norm": 0.21786751809931207, "learning_rate": 2.2223857118581856e-05, "loss": 0.709, "step": 2195 }, { "epoch": 0.47009713414144655, "grad_norm": 0.20358799992582777, "learning_rate": 2.2210352665363234e-05, "loss": 0.7151, "step": 2196 }, { "epoch": 0.4703112038746622, "grad_norm": 0.2082103443229798, "learning_rate": 2.219684719184987e-05, "loss": 0.7424, "step": 2197 }, { "epoch": 0.47052527360787777, "grad_norm": 0.18248657801484386, "learning_rate": 2.2183340704275862e-05, "loss": 0.6843, "step": 2198 }, { "epoch": 0.47073934334109335, "grad_norm": 0.22323684825994702, "learning_rate": 2.216983320887578e-05, "loss": 0.6894, "step": 2199 }, { "epoch": 0.47095341307430894, "grad_norm": 0.17675028993984335, "learning_rate": 2.2156324711884665e-05, "loss": 0.6725, "step": 2200 }, { "epoch": 0.4711674828075246, "grad_norm": 0.22712610008087614, "learning_rate": 2.2142815219538006e-05, "loss": 0.7295, "step": 2201 }, { "epoch": 0.47138155254074016, "grad_norm": 0.19932839798355986, "learning_rate": 2.212930473807177e-05, "loss": 0.7174, "step": 2202 }, { "epoch": 0.47159562227395574, "grad_norm": 0.2138376063777719, "learning_rate": 2.2115793273722363e-05, "loss": 0.717, "step": 2203 }, { "epoch": 0.4718096920071713, "grad_norm": 0.23804969504798007, "learning_rate": 2.2102280832726644e-05, "loss": 0.7129, "step": 2204 }, { "epoch": 0.4720237617403869, "grad_norm": 0.19630526223385558, "learning_rate": 2.208876742132195e-05, "loss": 0.7186, "step": 2205 }, { "epoch": 0.47223783147360254, "grad_norm": 0.23860936524030293, "learning_rate": 2.207525304574604e-05, "loss": 0.6855, "step": 2206 }, { "epoch": 0.4724519012068181, "grad_norm": 0.18635130539411032, "learning_rate": 2.206173771223712e-05, "loss": 0.722, "step": 2207 }, { "epoch": 0.4726659709400337, "grad_norm": 0.22061354178715856, "learning_rate": 2.204822142703385e-05, "loss": 0.7414, "step": 2208 }, { "epoch": 0.4728800406732493, "grad_norm": 0.17771494882280253, "learning_rate": 2.2034704196375314e-05, "loss": 0.7262, "step": 2209 }, { "epoch": 0.47309411040646493, "grad_norm": 0.22177052502399974, "learning_rate": 2.2021186026501042e-05, "loss": 0.7394, "step": 2210 }, { "epoch": 0.4733081801396805, "grad_norm": 0.2836119350005059, "learning_rate": 2.2007666923651007e-05, "loss": 0.7142, "step": 2211 }, { "epoch": 0.4735222498728961, "grad_norm": 0.20142207277353355, "learning_rate": 2.1994146894065596e-05, "loss": 0.7011, "step": 2212 }, { "epoch": 0.4737363196061117, "grad_norm": 0.2195095836282597, "learning_rate": 2.198062594398562e-05, "loss": 0.6977, "step": 2213 }, { "epoch": 0.47395038933932726, "grad_norm": 0.1843272108922569, "learning_rate": 2.1967104079652342e-05, "loss": 0.6985, "step": 2214 }, { "epoch": 0.4741644590725429, "grad_norm": 0.21025139701485807, "learning_rate": 2.195358130730742e-05, "loss": 0.7259, "step": 2215 }, { "epoch": 0.4743785288057585, "grad_norm": 0.1955009279018932, "learning_rate": 2.1940057633192943e-05, "loss": 0.6931, "step": 2216 }, { "epoch": 0.47459259853897406, "grad_norm": 0.21209494188514003, "learning_rate": 2.192653306355141e-05, "loss": 0.7059, "step": 2217 }, { "epoch": 0.47480666827218965, "grad_norm": 0.1913984807616019, "learning_rate": 2.1913007604625746e-05, "loss": 0.7035, "step": 2218 }, { "epoch": 0.4750207380054053, "grad_norm": 0.20272947480292364, "learning_rate": 2.1899481262659273e-05, "loss": 0.7242, "step": 2219 }, { "epoch": 0.47523480773862087, "grad_norm": 0.18602166579461094, "learning_rate": 2.188595404389572e-05, "loss": 0.7084, "step": 2220 }, { "epoch": 0.47544887747183645, "grad_norm": 0.19229797224622588, "learning_rate": 2.1872425954579243e-05, "loss": 0.7243, "step": 2221 }, { "epoch": 0.47566294720505203, "grad_norm": 0.19772756152008628, "learning_rate": 2.185889700095437e-05, "loss": 0.7004, "step": 2222 }, { "epoch": 0.4758770169382676, "grad_norm": 0.18776691774748028, "learning_rate": 2.184536718926604e-05, "loss": 0.7127, "step": 2223 }, { "epoch": 0.47609108667148325, "grad_norm": 0.19986289483928601, "learning_rate": 2.1831836525759596e-05, "loss": 0.7008, "step": 2224 }, { "epoch": 0.47630515640469884, "grad_norm": 0.17887994580565705, "learning_rate": 2.1818305016680767e-05, "loss": 0.706, "step": 2225 }, { "epoch": 0.4765192261379144, "grad_norm": 0.20062621179748724, "learning_rate": 2.1804772668275668e-05, "loss": 0.7085, "step": 2226 }, { "epoch": 0.47673329587113, "grad_norm": 0.1816060001069782, "learning_rate": 2.179123948679081e-05, "loss": 0.6828, "step": 2227 }, { "epoch": 0.47694736560434564, "grad_norm": 0.20607887352035112, "learning_rate": 2.177770547847309e-05, "loss": 0.7298, "step": 2228 }, { "epoch": 0.4771614353375612, "grad_norm": 0.18395325230022824, "learning_rate": 2.1764170649569766e-05, "loss": 0.7487, "step": 2229 }, { "epoch": 0.4773755050707768, "grad_norm": 0.32829652909794255, "learning_rate": 2.1750635006328506e-05, "loss": 0.7182, "step": 2230 }, { "epoch": 0.4775895748039924, "grad_norm": 0.18739509457265122, "learning_rate": 2.1737098554997322e-05, "loss": 0.726, "step": 2231 }, { "epoch": 0.47780364453720797, "grad_norm": 0.1899002120746162, "learning_rate": 2.1723561301824632e-05, "loss": 0.7011, "step": 2232 }, { "epoch": 0.4780177142704236, "grad_norm": 0.19690319467732328, "learning_rate": 2.17100232530592e-05, "loss": 0.6923, "step": 2233 }, { "epoch": 0.4782317840036392, "grad_norm": 0.19066691168538474, "learning_rate": 2.1696484414950166e-05, "loss": 0.6811, "step": 2234 }, { "epoch": 0.4784458537368548, "grad_norm": 0.17528084411290765, "learning_rate": 2.1682944793747032e-05, "loss": 0.7091, "step": 2235 }, { "epoch": 0.47865992347007036, "grad_norm": 0.18537712986273827, "learning_rate": 2.1669404395699658e-05, "loss": 0.7102, "step": 2236 }, { "epoch": 0.478873993203286, "grad_norm": 0.17236799719458346, "learning_rate": 2.1655863227058273e-05, "loss": 0.7119, "step": 2237 }, { "epoch": 0.4790880629365016, "grad_norm": 0.19124950022431447, "learning_rate": 2.1642321294073456e-05, "loss": 0.7088, "step": 2238 }, { "epoch": 0.47930213266971716, "grad_norm": 0.17534095309799322, "learning_rate": 2.1628778602996133e-05, "loss": 0.7063, "step": 2239 }, { "epoch": 0.47951620240293275, "grad_norm": 0.1760151295715762, "learning_rate": 2.1615235160077594e-05, "loss": 0.6914, "step": 2240 }, { "epoch": 0.47973027213614833, "grad_norm": 0.2018822797808939, "learning_rate": 2.160169097156945e-05, "loss": 0.7299, "step": 2241 }, { "epoch": 0.47994434186936397, "grad_norm": 0.17174582192430415, "learning_rate": 2.158814604372369e-05, "loss": 0.7198, "step": 2242 }, { "epoch": 0.48015841160257955, "grad_norm": 0.20404193653752453, "learning_rate": 2.157460038279263e-05, "loss": 0.6987, "step": 2243 }, { "epoch": 0.48037248133579513, "grad_norm": 0.2022425540689897, "learning_rate": 2.1561053995028916e-05, "loss": 0.7465, "step": 2244 }, { "epoch": 0.4805865510690107, "grad_norm": 0.20972648040376374, "learning_rate": 2.154750688668553e-05, "loss": 0.7049, "step": 2245 }, { "epoch": 0.48080062080222635, "grad_norm": 0.17349165190856564, "learning_rate": 2.1533959064015798e-05, "loss": 0.707, "step": 2246 }, { "epoch": 0.48101469053544194, "grad_norm": 0.19821909126638199, "learning_rate": 2.1520410533273372e-05, "loss": 0.719, "step": 2247 }, { "epoch": 0.4812287602686575, "grad_norm": 0.18990298742636988, "learning_rate": 2.1506861300712223e-05, "loss": 0.6977, "step": 2248 }, { "epoch": 0.4814428300018731, "grad_norm": 0.19449175179036848, "learning_rate": 2.149331137258666e-05, "loss": 0.7216, "step": 2249 }, { "epoch": 0.4816568997350887, "grad_norm": 0.1810279416218013, "learning_rate": 2.1479760755151304e-05, "loss": 0.7056, "step": 2250 }, { "epoch": 0.4818709694683043, "grad_norm": 0.19002562059362754, "learning_rate": 2.1466209454661088e-05, "loss": 0.7318, "step": 2251 }, { "epoch": 0.4820850392015199, "grad_norm": 0.1836020226690981, "learning_rate": 2.1452657477371267e-05, "loss": 0.7032, "step": 2252 }, { "epoch": 0.4822991089347355, "grad_norm": 0.1842724046109878, "learning_rate": 2.143910482953742e-05, "loss": 0.7042, "step": 2253 }, { "epoch": 0.48251317866795107, "grad_norm": 0.18971483550570742, "learning_rate": 2.142555151741542e-05, "loss": 0.7135, "step": 2254 }, { "epoch": 0.48272724840116665, "grad_norm": 0.19163180476199815, "learning_rate": 2.1411997547261444e-05, "loss": 0.6964, "step": 2255 }, { "epoch": 0.4829413181343823, "grad_norm": 0.18818854261034648, "learning_rate": 2.139844292533199e-05, "loss": 0.7184, "step": 2256 }, { "epoch": 0.4831553878675979, "grad_norm": 0.19510093910806356, "learning_rate": 2.1384887657883836e-05, "loss": 0.7217, "step": 2257 }, { "epoch": 0.48336945760081346, "grad_norm": 0.19945217780349028, "learning_rate": 2.1371331751174074e-05, "loss": 0.7079, "step": 2258 }, { "epoch": 0.48358352733402904, "grad_norm": 0.20874140469478644, "learning_rate": 2.1357775211460087e-05, "loss": 0.6922, "step": 2259 }, { "epoch": 0.4837975970672447, "grad_norm": 0.20908910227409855, "learning_rate": 2.1344218044999554e-05, "loss": 0.7048, "step": 2260 }, { "epoch": 0.48401166680046026, "grad_norm": 0.20475202083773375, "learning_rate": 2.1330660258050427e-05, "loss": 0.7144, "step": 2261 }, { "epoch": 0.48422573653367584, "grad_norm": 0.2515990189295899, "learning_rate": 2.131710185687096e-05, "loss": 0.7029, "step": 2262 }, { "epoch": 0.4844398062668914, "grad_norm": 0.19377934490853652, "learning_rate": 2.130354284771969e-05, "loss": 0.6908, "step": 2263 }, { "epoch": 0.484653876000107, "grad_norm": 0.2038222499424192, "learning_rate": 2.1289983236855428e-05, "loss": 0.7045, "step": 2264 }, { "epoch": 0.48486794573332265, "grad_norm": 0.19998878810257695, "learning_rate": 2.127642303053726e-05, "loss": 0.716, "step": 2265 }, { "epoch": 0.48508201546653823, "grad_norm": 0.20571719169026145, "learning_rate": 2.1262862235024567e-05, "loss": 0.715, "step": 2266 }, { "epoch": 0.4852960851997538, "grad_norm": 0.19014981487799368, "learning_rate": 2.1249300856576972e-05, "loss": 0.7337, "step": 2267 }, { "epoch": 0.4855101549329694, "grad_norm": 0.20788350079763343, "learning_rate": 2.1235738901454385e-05, "loss": 0.6961, "step": 2268 }, { "epoch": 0.48572422466618503, "grad_norm": 0.21612197002966596, "learning_rate": 2.122217637591699e-05, "loss": 0.7449, "step": 2269 }, { "epoch": 0.4859382943994006, "grad_norm": 0.19972069186062108, "learning_rate": 2.1208613286225214e-05, "loss": 0.7216, "step": 2270 }, { "epoch": 0.4861523641326162, "grad_norm": 0.18881056958815423, "learning_rate": 2.119504963863976e-05, "loss": 0.7087, "step": 2271 }, { "epoch": 0.4863664338658318, "grad_norm": 0.1925151607037651, "learning_rate": 2.118148543942158e-05, "loss": 0.7383, "step": 2272 }, { "epoch": 0.48658050359904736, "grad_norm": 0.195449876371769, "learning_rate": 2.1167920694831876e-05, "loss": 0.7107, "step": 2273 }, { "epoch": 0.486794573332263, "grad_norm": 0.20005252445025037, "learning_rate": 2.1154355411132122e-05, "loss": 0.6841, "step": 2274 }, { "epoch": 0.4870086430654786, "grad_norm": 0.18416558455168575, "learning_rate": 2.114078959458403e-05, "loss": 0.7289, "step": 2275 }, { "epoch": 0.48722271279869417, "grad_norm": 0.20309618756159076, "learning_rate": 2.1127223251449543e-05, "loss": 0.6937, "step": 2276 }, { "epoch": 0.48743678253190975, "grad_norm": 0.18878437985187504, "learning_rate": 2.111365638799087e-05, "loss": 0.7074, "step": 2277 }, { "epoch": 0.4876508522651254, "grad_norm": 0.20122807405960974, "learning_rate": 2.110008901047044e-05, "loss": 0.6767, "step": 2278 }, { "epoch": 0.487864921998341, "grad_norm": 0.1869339865653749, "learning_rate": 2.108652112515094e-05, "loss": 0.7267, "step": 2279 }, { "epoch": 0.48807899173155656, "grad_norm": 0.18685701646559502, "learning_rate": 2.1072952738295284e-05, "loss": 0.7064, "step": 2280 }, { "epoch": 0.48829306146477214, "grad_norm": 0.18757687185499403, "learning_rate": 2.1059383856166602e-05, "loss": 0.7112, "step": 2281 }, { "epoch": 0.4885071311979877, "grad_norm": 0.18045589237477888, "learning_rate": 2.104581448502827e-05, "loss": 0.7032, "step": 2282 }, { "epoch": 0.48872120093120336, "grad_norm": 0.2243567008518612, "learning_rate": 2.103224463114389e-05, "loss": 0.711, "step": 2283 }, { "epoch": 0.48893527066441894, "grad_norm": 0.17785672479561243, "learning_rate": 2.1018674300777274e-05, "loss": 0.6939, "step": 2284 }, { "epoch": 0.4891493403976345, "grad_norm": 0.18966677078905494, "learning_rate": 2.100510350019247e-05, "loss": 0.7088, "step": 2285 }, { "epoch": 0.4893634101308501, "grad_norm": 0.19997803944261977, "learning_rate": 2.099153223565373e-05, "loss": 0.6697, "step": 2286 }, { "epoch": 0.48957747986406575, "grad_norm": 0.18593898857181565, "learning_rate": 2.0977960513425523e-05, "loss": 0.7045, "step": 2287 }, { "epoch": 0.48979154959728133, "grad_norm": 0.18443672398784963, "learning_rate": 2.096438833977253e-05, "loss": 0.7163, "step": 2288 }, { "epoch": 0.4900056193304969, "grad_norm": 0.18905170424942327, "learning_rate": 2.095081572095965e-05, "loss": 0.6901, "step": 2289 }, { "epoch": 0.4902196890637125, "grad_norm": 0.18465422435741757, "learning_rate": 2.093724266325197e-05, "loss": 0.7215, "step": 2290 }, { "epoch": 0.4904337587969281, "grad_norm": 0.24225053244823289, "learning_rate": 2.0923669172914796e-05, "loss": 0.7064, "step": 2291 }, { "epoch": 0.4906478285301437, "grad_norm": 0.20772301045923566, "learning_rate": 2.0910095256213624e-05, "loss": 0.6744, "step": 2292 }, { "epoch": 0.4908618982633593, "grad_norm": 0.21870061034685065, "learning_rate": 2.0896520919414142e-05, "loss": 0.7351, "step": 2293 }, { "epoch": 0.4910759679965749, "grad_norm": 0.20715028862976584, "learning_rate": 2.0882946168782247e-05, "loss": 0.6835, "step": 2294 }, { "epoch": 0.49129003772979046, "grad_norm": 0.20582083098621823, "learning_rate": 2.0869371010584017e-05, "loss": 0.7072, "step": 2295 }, { "epoch": 0.4915041074630061, "grad_norm": 0.2380438839202891, "learning_rate": 2.085579545108572e-05, "loss": 0.7236, "step": 2296 }, { "epoch": 0.4917181771962217, "grad_norm": 0.2050310270181436, "learning_rate": 2.0842219496553808e-05, "loss": 0.7367, "step": 2297 }, { "epoch": 0.49193224692943727, "grad_norm": 0.20857720028133456, "learning_rate": 2.0828643153254918e-05, "loss": 0.7256, "step": 2298 }, { "epoch": 0.49214631666265285, "grad_norm": 0.1983561651271981, "learning_rate": 2.081506642745587e-05, "loss": 0.6879, "step": 2299 }, { "epoch": 0.49236038639586843, "grad_norm": 0.27889800775756773, "learning_rate": 2.0801489325423642e-05, "loss": 0.7288, "step": 2300 }, { "epoch": 0.49257445612908407, "grad_norm": 0.21067068280653148, "learning_rate": 2.0787911853425418e-05, "loss": 0.7299, "step": 2301 }, { "epoch": 0.49278852586229965, "grad_norm": 0.1901153136259381, "learning_rate": 2.077433401772852e-05, "loss": 0.7047, "step": 2302 }, { "epoch": 0.49300259559551524, "grad_norm": 0.20697270328001657, "learning_rate": 2.0760755824600462e-05, "loss": 0.7041, "step": 2303 }, { "epoch": 0.4932166653287308, "grad_norm": 0.18261106983665978, "learning_rate": 2.0747177280308895e-05, "loss": 0.7081, "step": 2304 }, { "epoch": 0.4934307350619464, "grad_norm": 0.18367551658853998, "learning_rate": 2.073359839112168e-05, "loss": 0.6817, "step": 2305 }, { "epoch": 0.49364480479516204, "grad_norm": 0.19832615632886225, "learning_rate": 2.072001916330678e-05, "loss": 0.7102, "step": 2306 }, { "epoch": 0.4938588745283776, "grad_norm": 0.21393930601194572, "learning_rate": 2.0706439603132357e-05, "loss": 0.732, "step": 2307 }, { "epoch": 0.4940729442615932, "grad_norm": 0.1784007718620903, "learning_rate": 2.069285971686671e-05, "loss": 0.7249, "step": 2308 }, { "epoch": 0.4942870139948088, "grad_norm": 0.21697739711195274, "learning_rate": 2.067927951077828e-05, "loss": 0.7148, "step": 2309 }, { "epoch": 0.4945010837280244, "grad_norm": 0.1983332192416552, "learning_rate": 2.0665698991135666e-05, "loss": 0.7147, "step": 2310 }, { "epoch": 0.49471515346124, "grad_norm": 0.19218358718375153, "learning_rate": 2.0652118164207624e-05, "loss": 0.721, "step": 2311 }, { "epoch": 0.4949292231944556, "grad_norm": 0.19910159640157518, "learning_rate": 2.0638537036263032e-05, "loss": 0.7113, "step": 2312 }, { "epoch": 0.4951432929276712, "grad_norm": 0.19599306878955114, "learning_rate": 2.062495561357091e-05, "loss": 0.7084, "step": 2313 }, { "epoch": 0.49535736266088676, "grad_norm": 0.20615678706193064, "learning_rate": 2.061137390240042e-05, "loss": 0.7087, "step": 2314 }, { "epoch": 0.4955714323941024, "grad_norm": 0.18882100983035943, "learning_rate": 2.059779190902085e-05, "loss": 0.7557, "step": 2315 }, { "epoch": 0.495785502127318, "grad_norm": 0.28029007840889947, "learning_rate": 2.0584209639701643e-05, "loss": 0.6984, "step": 2316 }, { "epoch": 0.49599957186053356, "grad_norm": 0.20861673098181618, "learning_rate": 2.057062710071233e-05, "loss": 0.7229, "step": 2317 }, { "epoch": 0.49621364159374914, "grad_norm": 0.19678723848133275, "learning_rate": 2.055704429832259e-05, "loss": 0.6991, "step": 2318 }, { "epoch": 0.4964277113269648, "grad_norm": 0.18357941824676727, "learning_rate": 2.0543461238802224e-05, "loss": 0.7254, "step": 2319 }, { "epoch": 0.49664178106018037, "grad_norm": 0.184084335224731, "learning_rate": 2.0529877928421136e-05, "loss": 0.685, "step": 2320 }, { "epoch": 0.49685585079339595, "grad_norm": 0.1814205878044885, "learning_rate": 2.0516294373449378e-05, "loss": 0.7062, "step": 2321 }, { "epoch": 0.49706992052661153, "grad_norm": 0.1831755680322796, "learning_rate": 2.050271058015708e-05, "loss": 0.7133, "step": 2322 }, { "epoch": 0.4972839902598271, "grad_norm": 0.17480465497898204, "learning_rate": 2.0489126554814493e-05, "loss": 0.6761, "step": 2323 }, { "epoch": 0.49749805999304275, "grad_norm": 0.1951662502268692, "learning_rate": 2.047554230369199e-05, "loss": 0.7183, "step": 2324 }, { "epoch": 0.49771212972625833, "grad_norm": 0.17915017788063614, "learning_rate": 2.0461957833060025e-05, "loss": 0.6992, "step": 2325 }, { "epoch": 0.4979261994594739, "grad_norm": 0.18937104512906497, "learning_rate": 2.0448373149189172e-05, "loss": 0.7005, "step": 2326 }, { "epoch": 0.4981402691926895, "grad_norm": 0.19891811608452778, "learning_rate": 2.0434788258350094e-05, "loss": 0.7259, "step": 2327 }, { "epoch": 0.49835433892590514, "grad_norm": 0.1741124121040168, "learning_rate": 2.0421203166813552e-05, "loss": 0.6881, "step": 2328 }, { "epoch": 0.4985684086591207, "grad_norm": 0.20268039786935235, "learning_rate": 2.0407617880850403e-05, "loss": 0.706, "step": 2329 }, { "epoch": 0.4987824783923363, "grad_norm": 0.17551348267840383, "learning_rate": 2.039403240673158e-05, "loss": 0.7259, "step": 2330 }, { "epoch": 0.4989965481255519, "grad_norm": 0.2009671644300957, "learning_rate": 2.038044675072812e-05, "loss": 0.7202, "step": 2331 }, { "epoch": 0.49921061785876747, "grad_norm": 0.17093429434405302, "learning_rate": 2.036686091911114e-05, "loss": 0.6964, "step": 2332 }, { "epoch": 0.4994246875919831, "grad_norm": 0.191369264221559, "learning_rate": 2.0353274918151832e-05, "loss": 0.7355, "step": 2333 }, { "epoch": 0.4996387573251987, "grad_norm": 0.18551508301521066, "learning_rate": 2.0339688754121468e-05, "loss": 0.7291, "step": 2334 }, { "epoch": 0.4998528270584143, "grad_norm": 0.17065764579513973, "learning_rate": 2.0326102433291387e-05, "loss": 0.6915, "step": 2335 }, { "epoch": 0.5000668967916299, "grad_norm": 0.1764580327298373, "learning_rate": 2.031251596193303e-05, "loss": 0.715, "step": 2336 }, { "epoch": 0.5002809665248454, "grad_norm": 0.16821877466370244, "learning_rate": 2.0298929346317876e-05, "loss": 0.6839, "step": 2337 }, { "epoch": 0.500495036258061, "grad_norm": 0.17016892606393189, "learning_rate": 2.0285342592717483e-05, "loss": 0.6956, "step": 2338 }, { "epoch": 0.5007091059912766, "grad_norm": 0.5551613601013521, "learning_rate": 2.0271755707403467e-05, "loss": 0.7196, "step": 2339 }, { "epoch": 0.5009231757244923, "grad_norm": 0.1637426430439625, "learning_rate": 2.0258168696647517e-05, "loss": 0.6909, "step": 2340 }, { "epoch": 0.5011372454577079, "grad_norm": 0.16709579796395363, "learning_rate": 2.0244581566721373e-05, "loss": 0.6995, "step": 2341 }, { "epoch": 0.5013513151909235, "grad_norm": 0.1731379826836654, "learning_rate": 2.0230994323896817e-05, "loss": 0.7312, "step": 2342 }, { "epoch": 0.501565384924139, "grad_norm": 0.16601482400831105, "learning_rate": 2.021740697444571e-05, "loss": 0.6862, "step": 2343 }, { "epoch": 0.5017794546573546, "grad_norm": 0.188188415154757, "learning_rate": 2.020381952463994e-05, "loss": 0.7243, "step": 2344 }, { "epoch": 0.5019935243905702, "grad_norm": 0.17006152360939655, "learning_rate": 2.019023198075145e-05, "loss": 0.7431, "step": 2345 }, { "epoch": 0.5022075941237858, "grad_norm": 0.18321705383784387, "learning_rate": 2.0176644349052225e-05, "loss": 0.7106, "step": 2346 }, { "epoch": 0.5024216638570014, "grad_norm": 0.17119446549725556, "learning_rate": 2.0163056635814294e-05, "loss": 0.7076, "step": 2347 }, { "epoch": 0.502635733590217, "grad_norm": 0.1782311070606052, "learning_rate": 2.014946884730972e-05, "loss": 0.7, "step": 2348 }, { "epoch": 0.5028498033234327, "grad_norm": 0.1783066570538049, "learning_rate": 2.01358809898106e-05, "loss": 0.6851, "step": 2349 }, { "epoch": 0.5030638730566482, "grad_norm": 0.1747335542605542, "learning_rate": 2.0122293069589062e-05, "loss": 0.6973, "step": 2350 }, { "epoch": 0.5032779427898638, "grad_norm": 0.17782417520246982, "learning_rate": 2.0108705092917268e-05, "loss": 0.7129, "step": 2351 }, { "epoch": 0.5034920125230794, "grad_norm": 0.17511836005573053, "learning_rate": 2.0095117066067398e-05, "loss": 0.7111, "step": 2352 }, { "epoch": 0.503706082256295, "grad_norm": 0.18274102767963865, "learning_rate": 2.0081528995311666e-05, "loss": 0.6832, "step": 2353 }, { "epoch": 0.5039201519895106, "grad_norm": 0.19232256991867974, "learning_rate": 2.0067940886922305e-05, "loss": 0.6998, "step": 2354 }, { "epoch": 0.5041342217227262, "grad_norm": 0.19133307064488894, "learning_rate": 2.005435274717155e-05, "loss": 0.7169, "step": 2355 }, { "epoch": 0.5043482914559417, "grad_norm": 0.19299429614965022, "learning_rate": 2.0040764582331666e-05, "loss": 0.7222, "step": 2356 }, { "epoch": 0.5045623611891573, "grad_norm": 0.20788566687208576, "learning_rate": 2.002717639867492e-05, "loss": 0.6984, "step": 2357 }, { "epoch": 0.504776430922373, "grad_norm": 0.17868743120764646, "learning_rate": 2.0013588202473605e-05, "loss": 0.685, "step": 2358 }, { "epoch": 0.5049905006555886, "grad_norm": 0.19885115695005554, "learning_rate": 2e-05, "loss": 0.7283, "step": 2359 }, { "epoch": 0.5052045703888042, "grad_norm": 0.18236181566858858, "learning_rate": 1.9986411797526395e-05, "loss": 0.6857, "step": 2360 }, { "epoch": 0.5054186401220198, "grad_norm": 0.18572831251879549, "learning_rate": 1.9972823601325084e-05, "loss": 0.7045, "step": 2361 }, { "epoch": 0.5056327098552353, "grad_norm": 0.19423726834554206, "learning_rate": 1.9959235417668337e-05, "loss": 0.6945, "step": 2362 }, { "epoch": 0.5058467795884509, "grad_norm": 0.16967139105862453, "learning_rate": 1.9945647252828462e-05, "loss": 0.6808, "step": 2363 }, { "epoch": 0.5060608493216665, "grad_norm": 0.18706938255117278, "learning_rate": 1.9932059113077705e-05, "loss": 0.7303, "step": 2364 }, { "epoch": 0.5062749190548821, "grad_norm": 0.18583122159610296, "learning_rate": 1.9918471004688334e-05, "loss": 0.7307, "step": 2365 }, { "epoch": 0.5064889887880977, "grad_norm": 0.18808429291888643, "learning_rate": 1.990488293393261e-05, "loss": 0.7252, "step": 2366 }, { "epoch": 0.5067030585213134, "grad_norm": 0.1726240382474707, "learning_rate": 1.989129490708274e-05, "loss": 0.6967, "step": 2367 }, { "epoch": 0.506917128254529, "grad_norm": 0.18071632718672434, "learning_rate": 1.9877706930410948e-05, "loss": 0.6804, "step": 2368 }, { "epoch": 0.5071311979877445, "grad_norm": 0.19357683267040604, "learning_rate": 1.9864119010189407e-05, "loss": 0.7231, "step": 2369 }, { "epoch": 0.5073452677209601, "grad_norm": 0.17114860137255122, "learning_rate": 1.985053115269028e-05, "loss": 0.6918, "step": 2370 }, { "epoch": 0.5075593374541757, "grad_norm": 0.2572212038646177, "learning_rate": 1.983694336418571e-05, "loss": 0.7033, "step": 2371 }, { "epoch": 0.5077734071873913, "grad_norm": 0.18477427085141998, "learning_rate": 1.9823355650947775e-05, "loss": 0.7125, "step": 2372 }, { "epoch": 0.5079874769206069, "grad_norm": 0.2741212601974107, "learning_rate": 1.9809768019248557e-05, "loss": 0.7322, "step": 2373 }, { "epoch": 0.5082015466538224, "grad_norm": 0.19105586062027488, "learning_rate": 1.9796180475360064e-05, "loss": 0.7245, "step": 2374 }, { "epoch": 0.508415616387038, "grad_norm": 0.19053957467597, "learning_rate": 1.978259302555429e-05, "loss": 0.7181, "step": 2375 }, { "epoch": 0.5086296861202537, "grad_norm": 0.18842262608680713, "learning_rate": 1.976900567610319e-05, "loss": 0.7147, "step": 2376 }, { "epoch": 0.5088437558534693, "grad_norm": 0.17734224520190128, "learning_rate": 1.9755418433278633e-05, "loss": 0.7294, "step": 2377 }, { "epoch": 0.5090578255866849, "grad_norm": 0.18709617782173277, "learning_rate": 1.9741831303352486e-05, "loss": 0.7143, "step": 2378 }, { "epoch": 0.5092718953199005, "grad_norm": 0.18088516800298257, "learning_rate": 1.972824429259654e-05, "loss": 0.7197, "step": 2379 }, { "epoch": 0.509485965053116, "grad_norm": 0.2822352214414661, "learning_rate": 1.9714657407282527e-05, "loss": 0.6969, "step": 2380 }, { "epoch": 0.5097000347863316, "grad_norm": 0.1851516478757197, "learning_rate": 1.970107065368213e-05, "loss": 0.7339, "step": 2381 }, { "epoch": 0.5099141045195472, "grad_norm": 0.19218223083206867, "learning_rate": 1.9687484038066976e-05, "loss": 0.6844, "step": 2382 }, { "epoch": 0.5101281742527628, "grad_norm": 0.17603925537645054, "learning_rate": 1.9673897566708616e-05, "loss": 0.6866, "step": 2383 }, { "epoch": 0.5103422439859784, "grad_norm": 0.19739203187639282, "learning_rate": 1.9660311245878542e-05, "loss": 0.6979, "step": 2384 }, { "epoch": 0.5105563137191941, "grad_norm": 0.19193124684619162, "learning_rate": 1.9646725081848178e-05, "loss": 0.7023, "step": 2385 }, { "epoch": 0.5107703834524097, "grad_norm": 0.17646203306384048, "learning_rate": 1.9633139080888865e-05, "loss": 0.701, "step": 2386 }, { "epoch": 0.5109844531856252, "grad_norm": 0.19494243832438965, "learning_rate": 1.9619553249271882e-05, "loss": 0.701, "step": 2387 }, { "epoch": 0.5111985229188408, "grad_norm": 0.17101539925619594, "learning_rate": 1.9605967593268427e-05, "loss": 0.7008, "step": 2388 }, { "epoch": 0.5114125926520564, "grad_norm": 0.19285906469546862, "learning_rate": 1.9592382119149604e-05, "loss": 0.7182, "step": 2389 }, { "epoch": 0.511626662385272, "grad_norm": 0.17375748525740428, "learning_rate": 1.9578796833186458e-05, "loss": 0.6884, "step": 2390 }, { "epoch": 0.5118407321184876, "grad_norm": 0.18760868495860536, "learning_rate": 1.9565211741649913e-05, "loss": 0.7014, "step": 2391 }, { "epoch": 0.5120548018517032, "grad_norm": 0.17339425461333224, "learning_rate": 1.9551626850810828e-05, "loss": 0.717, "step": 2392 }, { "epoch": 0.5122688715849187, "grad_norm": 0.1951409833582809, "learning_rate": 1.9538042166939982e-05, "loss": 0.7159, "step": 2393 }, { "epoch": 0.5124829413181344, "grad_norm": 0.17417982450498132, "learning_rate": 1.9524457696308017e-05, "loss": 0.7204, "step": 2394 }, { "epoch": 0.51269701105135, "grad_norm": 0.17865834090670113, "learning_rate": 1.9510873445185514e-05, "loss": 0.7355, "step": 2395 }, { "epoch": 0.5129110807845656, "grad_norm": 0.17343813112653667, "learning_rate": 1.949728941984293e-05, "loss": 0.7189, "step": 2396 }, { "epoch": 0.5131251505177812, "grad_norm": 0.1731638562684162, "learning_rate": 1.9483705626550625e-05, "loss": 0.7224, "step": 2397 }, { "epoch": 0.5133392202509968, "grad_norm": 0.17229895338371198, "learning_rate": 1.9470122071578867e-05, "loss": 0.7087, "step": 2398 }, { "epoch": 0.5135532899842123, "grad_norm": 0.1701206650462555, "learning_rate": 1.9456538761197782e-05, "loss": 0.718, "step": 2399 }, { "epoch": 0.5137673597174279, "grad_norm": 0.17781763681545806, "learning_rate": 1.944295570167742e-05, "loss": 0.6886, "step": 2400 }, { "epoch": 0.5139814294506435, "grad_norm": 0.1665046907054677, "learning_rate": 1.9429372899287678e-05, "loss": 0.6856, "step": 2401 }, { "epoch": 0.5141954991838591, "grad_norm": 0.17594804152767285, "learning_rate": 1.941579036029836e-05, "loss": 0.6817, "step": 2402 }, { "epoch": 0.5144095689170748, "grad_norm": 0.17811135886153168, "learning_rate": 1.9402208090979152e-05, "loss": 0.7143, "step": 2403 }, { "epoch": 0.5146236386502904, "grad_norm": 0.16743693823359704, "learning_rate": 1.9388626097599585e-05, "loss": 0.7129, "step": 2404 }, { "epoch": 0.514837708383506, "grad_norm": 0.18181922997233096, "learning_rate": 1.9375044386429103e-05, "loss": 0.7009, "step": 2405 }, { "epoch": 0.5150517781167215, "grad_norm": 0.18291054112383048, "learning_rate": 1.9361462963736978e-05, "loss": 0.705, "step": 2406 }, { "epoch": 0.5152658478499371, "grad_norm": 0.18050104738981726, "learning_rate": 1.934788183579238e-05, "loss": 0.711, "step": 2407 }, { "epoch": 0.5154799175831527, "grad_norm": 0.1665601928447741, "learning_rate": 1.933430100886434e-05, "loss": 0.7117, "step": 2408 }, { "epoch": 0.5156939873163683, "grad_norm": 0.1899409126838694, "learning_rate": 1.9320720489221728e-05, "loss": 0.7109, "step": 2409 }, { "epoch": 0.5159080570495839, "grad_norm": 0.17591987081234786, "learning_rate": 1.9307140283133305e-05, "loss": 0.688, "step": 2410 }, { "epoch": 0.5161221267827995, "grad_norm": 0.19600856341742107, "learning_rate": 1.9293560396867646e-05, "loss": 0.7295, "step": 2411 }, { "epoch": 0.5163361965160151, "grad_norm": 0.18149886765818582, "learning_rate": 1.927998083669322e-05, "loss": 0.7116, "step": 2412 }, { "epoch": 0.5165502662492307, "grad_norm": 0.21087260529720359, "learning_rate": 1.926640160887833e-05, "loss": 0.7403, "step": 2413 }, { "epoch": 0.5167643359824463, "grad_norm": 0.1810112048437299, "learning_rate": 1.92528227196911e-05, "loss": 0.7074, "step": 2414 }, { "epoch": 0.5169784057156619, "grad_norm": 0.1811772106321527, "learning_rate": 1.9239244175399548e-05, "loss": 0.7052, "step": 2415 }, { "epoch": 0.5171924754488775, "grad_norm": 0.18435248533126594, "learning_rate": 1.9225665982271483e-05, "loss": 0.7068, "step": 2416 }, { "epoch": 0.5174065451820931, "grad_norm": 0.17567521068438643, "learning_rate": 1.9212088146574585e-05, "loss": 0.6824, "step": 2417 }, { "epoch": 0.5176206149153086, "grad_norm": 0.19517579534362361, "learning_rate": 1.919851067457636e-05, "loss": 0.7073, "step": 2418 }, { "epoch": 0.5178346846485242, "grad_norm": 0.173956476489934, "learning_rate": 1.918493357254414e-05, "loss": 0.7255, "step": 2419 }, { "epoch": 0.5180487543817398, "grad_norm": 0.19263265915578986, "learning_rate": 1.9171356846745085e-05, "loss": 0.701, "step": 2420 }, { "epoch": 0.5182628241149555, "grad_norm": 0.1771915668668789, "learning_rate": 1.91577805034462e-05, "loss": 0.6748, "step": 2421 }, { "epoch": 0.5184768938481711, "grad_norm": 0.1762121557303122, "learning_rate": 1.914420454891429e-05, "loss": 0.7138, "step": 2422 }, { "epoch": 0.5186909635813867, "grad_norm": 0.17280975868188017, "learning_rate": 1.913062898941599e-05, "loss": 0.695, "step": 2423 }, { "epoch": 0.5189050333146022, "grad_norm": 0.16191149556216228, "learning_rate": 1.911705383121776e-05, "loss": 0.6652, "step": 2424 }, { "epoch": 0.5191191030478178, "grad_norm": 0.18077192547039, "learning_rate": 1.9103479080585868e-05, "loss": 0.7123, "step": 2425 }, { "epoch": 0.5193331727810334, "grad_norm": 0.16549281930608375, "learning_rate": 1.9089904743786383e-05, "loss": 0.7075, "step": 2426 }, { "epoch": 0.519547242514249, "grad_norm": 0.17844717983546715, "learning_rate": 1.9076330827085214e-05, "loss": 0.7379, "step": 2427 }, { "epoch": 0.5197613122474646, "grad_norm": 0.17288216637910517, "learning_rate": 1.9062757336748034e-05, "loss": 0.672, "step": 2428 }, { "epoch": 0.5199753819806802, "grad_norm": 0.17782160290731314, "learning_rate": 1.9049184279040354e-05, "loss": 0.7098, "step": 2429 }, { "epoch": 0.5201894517138959, "grad_norm": 0.17626367258519576, "learning_rate": 1.9035611660227476e-05, "loss": 0.6914, "step": 2430 }, { "epoch": 0.5204035214471114, "grad_norm": 0.1766203569094171, "learning_rate": 1.9022039486574484e-05, "loss": 0.6912, "step": 2431 }, { "epoch": 0.520617591180327, "grad_norm": 0.16746964356194322, "learning_rate": 1.900846776434628e-05, "loss": 0.684, "step": 2432 }, { "epoch": 0.5208316609135426, "grad_norm": 0.17332127773247646, "learning_rate": 1.8994896499807534e-05, "loss": 0.6955, "step": 2433 }, { "epoch": 0.5210457306467582, "grad_norm": 0.1854011416630557, "learning_rate": 1.8981325699222726e-05, "loss": 0.6895, "step": 2434 }, { "epoch": 0.5212598003799738, "grad_norm": 0.16844781247433485, "learning_rate": 1.8967755368856118e-05, "loss": 0.6974, "step": 2435 }, { "epoch": 0.5214738701131894, "grad_norm": 0.17433800797431942, "learning_rate": 1.8954185514971733e-05, "loss": 0.7107, "step": 2436 }, { "epoch": 0.5216879398464049, "grad_norm": 0.16382371927723935, "learning_rate": 1.8940616143833408e-05, "loss": 0.6465, "step": 2437 }, { "epoch": 0.5219020095796205, "grad_norm": 0.16725203105456918, "learning_rate": 1.8927047261704723e-05, "loss": 0.7091, "step": 2438 }, { "epoch": 0.5221160793128361, "grad_norm": 0.17193985324127656, "learning_rate": 1.891347887484906e-05, "loss": 0.7233, "step": 2439 }, { "epoch": 0.5223301490460518, "grad_norm": 0.17006006807904392, "learning_rate": 1.8899910989529567e-05, "loss": 0.7243, "step": 2440 }, { "epoch": 0.5225442187792674, "grad_norm": 0.17500226871993238, "learning_rate": 1.8886343612009138e-05, "loss": 0.7263, "step": 2441 }, { "epoch": 0.522758288512483, "grad_norm": 0.1693394120196878, "learning_rate": 1.8872776748550467e-05, "loss": 0.738, "step": 2442 }, { "epoch": 0.5229723582456985, "grad_norm": 0.17241829192872127, "learning_rate": 1.8859210405415977e-05, "loss": 0.718, "step": 2443 }, { "epoch": 0.5231864279789141, "grad_norm": 0.16618010241416503, "learning_rate": 1.8845644588867878e-05, "loss": 0.6773, "step": 2444 }, { "epoch": 0.5234004977121297, "grad_norm": 0.18077908505361287, "learning_rate": 1.883207930516813e-05, "loss": 0.7226, "step": 2445 }, { "epoch": 0.5236145674453453, "grad_norm": 0.17848471786808595, "learning_rate": 1.881851456057843e-05, "loss": 0.7342, "step": 2446 }, { "epoch": 0.5238286371785609, "grad_norm": 0.180198603087818, "learning_rate": 1.880495036136025e-05, "loss": 0.7081, "step": 2447 }, { "epoch": 0.5240427069117765, "grad_norm": 0.18003694994380037, "learning_rate": 1.8791386713774793e-05, "loss": 0.6921, "step": 2448 }, { "epoch": 0.5242567766449922, "grad_norm": 0.18435628537362891, "learning_rate": 1.8777823624083014e-05, "loss": 0.6965, "step": 2449 }, { "epoch": 0.5244708463782077, "grad_norm": 0.1822405965776421, "learning_rate": 1.876426109854562e-05, "loss": 0.7269, "step": 2450 }, { "epoch": 0.5246849161114233, "grad_norm": 0.1791863797660689, "learning_rate": 1.8750699143423034e-05, "loss": 0.7164, "step": 2451 }, { "epoch": 0.5248989858446389, "grad_norm": 0.18568060559199578, "learning_rate": 1.8737137764975446e-05, "loss": 0.7336, "step": 2452 }, { "epoch": 0.5251130555778545, "grad_norm": 0.1740890741721583, "learning_rate": 1.8723576969462743e-05, "loss": 0.6711, "step": 2453 }, { "epoch": 0.5253271253110701, "grad_norm": 0.18135128756071045, "learning_rate": 1.8710016763144575e-05, "loss": 0.7166, "step": 2454 }, { "epoch": 0.5255411950442856, "grad_norm": 0.1771530160603431, "learning_rate": 1.8696457152280317e-05, "loss": 0.6871, "step": 2455 }, { "epoch": 0.5257552647775012, "grad_norm": 0.17283987479795945, "learning_rate": 1.8682898143129044e-05, "loss": 0.7281, "step": 2456 }, { "epoch": 0.5259693345107168, "grad_norm": 0.17865973348635084, "learning_rate": 1.8669339741949577e-05, "loss": 0.7419, "step": 2457 }, { "epoch": 0.5261834042439325, "grad_norm": 0.1860920436859969, "learning_rate": 1.8655781955000452e-05, "loss": 0.6974, "step": 2458 }, { "epoch": 0.5263974739771481, "grad_norm": 0.18102381730441347, "learning_rate": 1.864222478853991e-05, "loss": 0.6928, "step": 2459 }, { "epoch": 0.5266115437103637, "grad_norm": 0.18503940273221842, "learning_rate": 1.8628668248825933e-05, "loss": 0.7144, "step": 2460 }, { "epoch": 0.5268256134435793, "grad_norm": 0.16673122364110576, "learning_rate": 1.861511234211617e-05, "loss": 0.7049, "step": 2461 }, { "epoch": 0.5270396831767948, "grad_norm": 0.17903155801938797, "learning_rate": 1.8601557074668018e-05, "loss": 0.6836, "step": 2462 }, { "epoch": 0.5272537529100104, "grad_norm": 0.1696083896982828, "learning_rate": 1.8588002452738562e-05, "loss": 0.7278, "step": 2463 }, { "epoch": 0.527467822643226, "grad_norm": 0.18973019059171162, "learning_rate": 1.857444848258459e-05, "loss": 0.7013, "step": 2464 }, { "epoch": 0.5276818923764416, "grad_norm": 0.17079550591332204, "learning_rate": 1.8560895170462582e-05, "loss": 0.7046, "step": 2465 }, { "epoch": 0.5278959621096572, "grad_norm": 0.3044465689473129, "learning_rate": 1.8547342522628737e-05, "loss": 0.7344, "step": 2466 }, { "epoch": 0.5281100318428729, "grad_norm": 0.16982640971868748, "learning_rate": 1.8533790545338922e-05, "loss": 0.7013, "step": 2467 }, { "epoch": 0.5283241015760884, "grad_norm": 0.17904394406706206, "learning_rate": 1.8520239244848703e-05, "loss": 0.6943, "step": 2468 }, { "epoch": 0.528538171309304, "grad_norm": 0.17778436166215425, "learning_rate": 1.8506688627413348e-05, "loss": 0.7192, "step": 2469 }, { "epoch": 0.5287522410425196, "grad_norm": 0.1785608464525092, "learning_rate": 1.849313869928778e-05, "loss": 0.6952, "step": 2470 }, { "epoch": 0.5289663107757352, "grad_norm": 0.1905136133771529, "learning_rate": 1.847958946672663e-05, "loss": 0.6875, "step": 2471 }, { "epoch": 0.5291803805089508, "grad_norm": 0.17887130244242824, "learning_rate": 1.8466040935984212e-05, "loss": 0.7104, "step": 2472 }, { "epoch": 0.5293944502421664, "grad_norm": 0.19150569144067042, "learning_rate": 1.8452493113314476e-05, "loss": 0.7318, "step": 2473 }, { "epoch": 0.5296085199753819, "grad_norm": 0.18285341693850285, "learning_rate": 1.8438946004971097e-05, "loss": 0.7137, "step": 2474 }, { "epoch": 0.5298225897085975, "grad_norm": 0.19699083379961962, "learning_rate": 1.8425399617207374e-05, "loss": 0.7144, "step": 2475 }, { "epoch": 0.5300366594418132, "grad_norm": 0.1800731314307582, "learning_rate": 1.8411853956276308e-05, "loss": 0.7416, "step": 2476 }, { "epoch": 0.5302507291750288, "grad_norm": 0.19874803726187476, "learning_rate": 1.8398309028430553e-05, "loss": 0.6787, "step": 2477 }, { "epoch": 0.5304647989082444, "grad_norm": 0.1788852593300529, "learning_rate": 1.8384764839922416e-05, "loss": 0.7287, "step": 2478 }, { "epoch": 0.53067886864146, "grad_norm": 0.19454786239054195, "learning_rate": 1.8371221397003877e-05, "loss": 0.7153, "step": 2479 }, { "epoch": 0.5308929383746755, "grad_norm": 0.1903782778511923, "learning_rate": 1.835767870592655e-05, "loss": 0.7272, "step": 2480 }, { "epoch": 0.5311070081078911, "grad_norm": 0.19530298402538088, "learning_rate": 1.8344136772941726e-05, "loss": 0.7331, "step": 2481 }, { "epoch": 0.5313210778411067, "grad_norm": 0.1852973371593222, "learning_rate": 1.833059560430035e-05, "loss": 0.7076, "step": 2482 }, { "epoch": 0.5315351475743223, "grad_norm": 0.18426494349686592, "learning_rate": 1.831705520625297e-05, "loss": 0.6885, "step": 2483 }, { "epoch": 0.5317492173075379, "grad_norm": 0.17152523280607498, "learning_rate": 1.8303515585049844e-05, "loss": 0.7142, "step": 2484 }, { "epoch": 0.5319632870407536, "grad_norm": 0.19455230656334577, "learning_rate": 1.8289976746940802e-05, "loss": 0.7088, "step": 2485 }, { "epoch": 0.5321773567739692, "grad_norm": 0.17678173349173643, "learning_rate": 1.8276438698175368e-05, "loss": 0.6899, "step": 2486 }, { "epoch": 0.5323914265071847, "grad_norm": 0.1825525586582506, "learning_rate": 1.826290144500268e-05, "loss": 0.7143, "step": 2487 }, { "epoch": 0.5326054962404003, "grad_norm": 0.1786219845029516, "learning_rate": 1.82493649936715e-05, "loss": 0.6967, "step": 2488 }, { "epoch": 0.5328195659736159, "grad_norm": 0.1732376213961871, "learning_rate": 1.8235829350430244e-05, "loss": 0.7377, "step": 2489 }, { "epoch": 0.5330336357068315, "grad_norm": 0.1730755691180236, "learning_rate": 1.822229452152692e-05, "loss": 0.7046, "step": 2490 }, { "epoch": 0.5332477054400471, "grad_norm": 0.16994482017550197, "learning_rate": 1.820876051320919e-05, "loss": 0.704, "step": 2491 }, { "epoch": 0.5334617751732627, "grad_norm": 0.3308067603674807, "learning_rate": 1.8195227331724335e-05, "loss": 0.721, "step": 2492 }, { "epoch": 0.5336758449064782, "grad_norm": 0.27628204100007286, "learning_rate": 1.8181694983319237e-05, "loss": 0.7047, "step": 2493 }, { "epoch": 0.5338899146396939, "grad_norm": 0.17224375740682724, "learning_rate": 1.816816347424041e-05, "loss": 0.7094, "step": 2494 }, { "epoch": 0.5341039843729095, "grad_norm": 0.18466956821318758, "learning_rate": 1.815463281073396e-05, "loss": 0.7179, "step": 2495 }, { "epoch": 0.5343180541061251, "grad_norm": 0.21228352619403165, "learning_rate": 1.814110299904563e-05, "loss": 0.6739, "step": 2496 }, { "epoch": 0.5345321238393407, "grad_norm": 0.17250103128322827, "learning_rate": 1.8127574045420764e-05, "loss": 0.6992, "step": 2497 }, { "epoch": 0.5347461935725563, "grad_norm": 0.2009493652652341, "learning_rate": 1.8114045956104278e-05, "loss": 0.7052, "step": 2498 }, { "epoch": 0.5349602633057718, "grad_norm": 0.17549417277338095, "learning_rate": 1.8100518737340734e-05, "loss": 0.6988, "step": 2499 }, { "epoch": 0.5351743330389874, "grad_norm": 0.19766238626801938, "learning_rate": 1.8086992395374258e-05, "loss": 0.7033, "step": 2500 }, { "epoch": 0.535388402772203, "grad_norm": 0.16937998352030223, "learning_rate": 1.807346693644859e-05, "loss": 0.7295, "step": 2501 }, { "epoch": 0.5356024725054186, "grad_norm": 0.19666489403075987, "learning_rate": 1.805994236680706e-05, "loss": 0.6958, "step": 2502 }, { "epoch": 0.5358165422386343, "grad_norm": 0.17901559768882327, "learning_rate": 1.8046418692692587e-05, "loss": 0.7262, "step": 2503 }, { "epoch": 0.5360306119718499, "grad_norm": 0.1871379220935042, "learning_rate": 1.8032895920347665e-05, "loss": 0.6701, "step": 2504 }, { "epoch": 0.5362446817050655, "grad_norm": 0.16836786587567446, "learning_rate": 1.8019374056014385e-05, "loss": 0.6858, "step": 2505 }, { "epoch": 0.536458751438281, "grad_norm": 0.20209169038728814, "learning_rate": 1.8005853105934417e-05, "loss": 0.7006, "step": 2506 }, { "epoch": 0.5366728211714966, "grad_norm": 0.17277068183601035, "learning_rate": 1.7992333076349e-05, "loss": 0.7348, "step": 2507 }, { "epoch": 0.5368868909047122, "grad_norm": 0.1882623040160267, "learning_rate": 1.7978813973498965e-05, "loss": 0.7129, "step": 2508 }, { "epoch": 0.5371009606379278, "grad_norm": 0.16952614277498532, "learning_rate": 1.7965295803624696e-05, "loss": 0.7196, "step": 2509 }, { "epoch": 0.5373150303711434, "grad_norm": 0.182894354867448, "learning_rate": 1.795177857296616e-05, "loss": 0.7146, "step": 2510 }, { "epoch": 0.537529100104359, "grad_norm": 0.17701931790939363, "learning_rate": 1.793826228776289e-05, "loss": 0.7213, "step": 2511 }, { "epoch": 0.5377431698375746, "grad_norm": 0.18513538550607694, "learning_rate": 1.7924746954253966e-05, "loss": 0.6996, "step": 2512 }, { "epoch": 0.5379572395707902, "grad_norm": 0.17918502491815952, "learning_rate": 1.791123257867805e-05, "loss": 0.7212, "step": 2513 }, { "epoch": 0.5381713093040058, "grad_norm": 0.19801345031052334, "learning_rate": 1.789771916727336e-05, "loss": 0.7277, "step": 2514 }, { "epoch": 0.5383853790372214, "grad_norm": 0.17546767434987479, "learning_rate": 1.7884206726277647e-05, "loss": 0.6851, "step": 2515 }, { "epoch": 0.538599448770437, "grad_norm": 0.18694259453156425, "learning_rate": 1.787069526192824e-05, "loss": 0.6869, "step": 2516 }, { "epoch": 0.5388135185036526, "grad_norm": 0.18277557945745132, "learning_rate": 1.7857184780461997e-05, "loss": 0.7258, "step": 2517 }, { "epoch": 0.5390275882368681, "grad_norm": 0.19117307738056796, "learning_rate": 1.7843675288115338e-05, "loss": 0.6986, "step": 2518 }, { "epoch": 0.5392416579700837, "grad_norm": 0.17177527905827172, "learning_rate": 1.7830166791124227e-05, "loss": 0.6963, "step": 2519 }, { "epoch": 0.5394557277032993, "grad_norm": 0.1923204926492419, "learning_rate": 1.7816659295724145e-05, "loss": 0.7134, "step": 2520 }, { "epoch": 0.539669797436515, "grad_norm": 0.1677900634007861, "learning_rate": 1.780315280815014e-05, "loss": 0.6986, "step": 2521 }, { "epoch": 0.5398838671697306, "grad_norm": 0.1939207425932993, "learning_rate": 1.7789647334636773e-05, "loss": 0.6829, "step": 2522 }, { "epoch": 0.5400979369029462, "grad_norm": 0.21773054703624947, "learning_rate": 1.7776142881418147e-05, "loss": 0.682, "step": 2523 }, { "epoch": 0.5403120066361617, "grad_norm": 0.18946022567408957, "learning_rate": 1.7762639454727905e-05, "loss": 0.7201, "step": 2524 }, { "epoch": 0.5405260763693773, "grad_norm": 0.1780260453553282, "learning_rate": 1.774913706079919e-05, "loss": 0.7285, "step": 2525 }, { "epoch": 0.5407401461025929, "grad_norm": 0.1831236711113459, "learning_rate": 1.7735635705864694e-05, "loss": 0.7015, "step": 2526 }, { "epoch": 0.5409542158358085, "grad_norm": 0.1947019346245163, "learning_rate": 1.77221353961566e-05, "loss": 0.7054, "step": 2527 }, { "epoch": 0.5411682855690241, "grad_norm": 0.17259788856167682, "learning_rate": 1.770863613790664e-05, "loss": 0.7167, "step": 2528 }, { "epoch": 0.5413823553022397, "grad_norm": 0.18353811467179074, "learning_rate": 1.769513793734605e-05, "loss": 0.7043, "step": 2529 }, { "epoch": 0.5415964250354554, "grad_norm": 0.17218651591518727, "learning_rate": 1.7681640800705564e-05, "loss": 0.7058, "step": 2530 }, { "epoch": 0.5418104947686709, "grad_norm": 0.18030673359814622, "learning_rate": 1.7668144734215448e-05, "loss": 0.6836, "step": 2531 }, { "epoch": 0.5420245645018865, "grad_norm": 0.17340359072259717, "learning_rate": 1.7654649744105447e-05, "loss": 0.724, "step": 2532 }, { "epoch": 0.5422386342351021, "grad_norm": 0.18179653224184358, "learning_rate": 1.7641155836604826e-05, "loss": 0.6939, "step": 2533 }, { "epoch": 0.5424527039683177, "grad_norm": 0.16520294440466055, "learning_rate": 1.7627663017942366e-05, "loss": 0.7026, "step": 2534 }, { "epoch": 0.5426667737015333, "grad_norm": 0.1941746797113647, "learning_rate": 1.7614171294346303e-05, "loss": 0.6831, "step": 2535 }, { "epoch": 0.5428808434347488, "grad_norm": 0.1764436024799967, "learning_rate": 1.7600680672044412e-05, "loss": 0.7112, "step": 2536 }, { "epoch": 0.5430949131679644, "grad_norm": 0.17581816549404258, "learning_rate": 1.758719115726392e-05, "loss": 0.6996, "step": 2537 }, { "epoch": 0.54330898290118, "grad_norm": 0.19685010722831256, "learning_rate": 1.7573702756231577e-05, "loss": 0.6726, "step": 2538 }, { "epoch": 0.5435230526343957, "grad_norm": 0.16555053117582616, "learning_rate": 1.7560215475173607e-05, "loss": 0.6879, "step": 2539 }, { "epoch": 0.5437371223676113, "grad_norm": 0.2000040365903853, "learning_rate": 1.75467293203157e-05, "loss": 0.7563, "step": 2540 }, { "epoch": 0.5439511921008269, "grad_norm": 0.1777753976061916, "learning_rate": 1.753324429788305e-05, "loss": 0.7281, "step": 2541 }, { "epoch": 0.5441652618340425, "grad_norm": 0.1746452388649787, "learning_rate": 1.751976041410032e-05, "loss": 0.7215, "step": 2542 }, { "epoch": 0.544379331567258, "grad_norm": 0.178911481431613, "learning_rate": 1.7506277675191635e-05, "loss": 0.696, "step": 2543 }, { "epoch": 0.5445934013004736, "grad_norm": 0.16467846087276522, "learning_rate": 1.7492796087380615e-05, "loss": 0.708, "step": 2544 }, { "epoch": 0.5448074710336892, "grad_norm": 0.18767225162608395, "learning_rate": 1.7479315656890332e-05, "loss": 0.7018, "step": 2545 }, { "epoch": 0.5450215407669048, "grad_norm": 0.15993321565973773, "learning_rate": 1.7465836389943327e-05, "loss": 0.6808, "step": 2546 }, { "epoch": 0.5452356105001204, "grad_norm": 0.17624590328709866, "learning_rate": 1.74523582927616e-05, "loss": 0.6877, "step": 2547 }, { "epoch": 0.545449680233336, "grad_norm": 0.15943500774359257, "learning_rate": 1.7438881371566633e-05, "loss": 0.6834, "step": 2548 }, { "epoch": 0.5456637499665516, "grad_norm": 0.1740527866314105, "learning_rate": 1.7425405632579328e-05, "loss": 0.6873, "step": 2549 }, { "epoch": 0.5458778196997672, "grad_norm": 0.17160190918333196, "learning_rate": 1.741193108202007e-05, "loss": 0.7255, "step": 2550 }, { "epoch": 0.5460918894329828, "grad_norm": 0.172909564738029, "learning_rate": 1.739845772610869e-05, "loss": 0.6853, "step": 2551 }, { "epoch": 0.5463059591661984, "grad_norm": 0.17533365700687084, "learning_rate": 1.738498557106446e-05, "loss": 0.715, "step": 2552 }, { "epoch": 0.546520028899414, "grad_norm": 0.1769648455907618, "learning_rate": 1.7371514623106106e-05, "loss": 0.6963, "step": 2553 }, { "epoch": 0.5467340986326296, "grad_norm": 0.1712616404508207, "learning_rate": 1.7358044888451787e-05, "loss": 0.7238, "step": 2554 }, { "epoch": 0.5469481683658451, "grad_norm": 0.1744943811449071, "learning_rate": 1.734457637331911e-05, "loss": 0.6942, "step": 2555 }, { "epoch": 0.5471622380990607, "grad_norm": 0.17051226930599167, "learning_rate": 1.7331109083925124e-05, "loss": 0.7011, "step": 2556 }, { "epoch": 0.5473763078322763, "grad_norm": 0.17398364127373958, "learning_rate": 1.731764302648629e-05, "loss": 0.7145, "step": 2557 }, { "epoch": 0.547590377565492, "grad_norm": 0.16602616685694127, "learning_rate": 1.7304178207218536e-05, "loss": 0.6965, "step": 2558 }, { "epoch": 0.5478044472987076, "grad_norm": 0.17487757201427861, "learning_rate": 1.729071463233718e-05, "loss": 0.6809, "step": 2559 }, { "epoch": 0.5480185170319232, "grad_norm": 0.17426771543368935, "learning_rate": 1.7277252308056986e-05, "loss": 0.7233, "step": 2560 }, { "epoch": 0.5482325867651388, "grad_norm": 0.1666560575533171, "learning_rate": 1.726379124059215e-05, "loss": 0.7168, "step": 2561 }, { "epoch": 0.5484466564983543, "grad_norm": 0.18332265522648047, "learning_rate": 1.7250331436156263e-05, "loss": 0.6603, "step": 2562 }, { "epoch": 0.5486607262315699, "grad_norm": 0.16905805750435804, "learning_rate": 1.7236872900962364e-05, "loss": 0.7031, "step": 2563 }, { "epoch": 0.5488747959647855, "grad_norm": 0.1743030892947747, "learning_rate": 1.722341564122286e-05, "loss": 0.6987, "step": 2564 }, { "epoch": 0.5490888656980011, "grad_norm": 0.17335530962757986, "learning_rate": 1.7209959663149617e-05, "loss": 0.6898, "step": 2565 }, { "epoch": 0.5493029354312167, "grad_norm": 0.17775522104816577, "learning_rate": 1.7196504972953897e-05, "loss": 0.7169, "step": 2566 }, { "epoch": 0.5495170051644324, "grad_norm": 0.17867472568886555, "learning_rate": 1.7183051576846335e-05, "loss": 0.6835, "step": 2567 }, { "epoch": 0.5497310748976479, "grad_norm": 0.17642125315462062, "learning_rate": 1.716959948103702e-05, "loss": 0.6756, "step": 2568 }, { "epoch": 0.5499451446308635, "grad_norm": 0.1763120193474745, "learning_rate": 1.7156148691735394e-05, "loss": 0.6895, "step": 2569 }, { "epoch": 0.5501592143640791, "grad_norm": 0.16548942879504808, "learning_rate": 1.7142699215150328e-05, "loss": 0.7236, "step": 2570 }, { "epoch": 0.5503732840972947, "grad_norm": 0.19127343155067736, "learning_rate": 1.7129251057490083e-05, "loss": 0.7185, "step": 2571 }, { "epoch": 0.5505873538305103, "grad_norm": 0.1686480442065763, "learning_rate": 1.711580422496228e-05, "loss": 0.6881, "step": 2572 }, { "epoch": 0.5508014235637259, "grad_norm": 0.1895178577464964, "learning_rate": 1.7102358723773983e-05, "loss": 0.7061, "step": 2573 }, { "epoch": 0.5510154932969414, "grad_norm": 0.18006601362226637, "learning_rate": 1.7088914560131582e-05, "loss": 0.734, "step": 2574 }, { "epoch": 0.551229563030157, "grad_norm": 0.18119314894926422, "learning_rate": 1.7075471740240893e-05, "loss": 0.7235, "step": 2575 }, { "epoch": 0.5514436327633727, "grad_norm": 0.18492238624793897, "learning_rate": 1.70620302703071e-05, "loss": 0.6922, "step": 2576 }, { "epoch": 0.5516577024965883, "grad_norm": 0.18838903479435018, "learning_rate": 1.7048590156534752e-05, "loss": 0.716, "step": 2577 }, { "epoch": 0.5518717722298039, "grad_norm": 0.1806492899153125, "learning_rate": 1.7035151405127793e-05, "loss": 0.733, "step": 2578 }, { "epoch": 0.5520858419630195, "grad_norm": 0.21034240583069322, "learning_rate": 1.7021714022289508e-05, "loss": 0.7136, "step": 2579 }, { "epoch": 0.552299911696235, "grad_norm": 0.16257297871228057, "learning_rate": 1.700827801422258e-05, "loss": 0.7046, "step": 2580 }, { "epoch": 0.5525139814294506, "grad_norm": 0.19590110820251028, "learning_rate": 1.699484338712905e-05, "loss": 0.7197, "step": 2581 }, { "epoch": 0.5527280511626662, "grad_norm": 0.17367892373826405, "learning_rate": 1.6981410147210305e-05, "loss": 0.688, "step": 2582 }, { "epoch": 0.5529421208958818, "grad_norm": 0.18599098998833577, "learning_rate": 1.6967978300667112e-05, "loss": 0.6871, "step": 2583 }, { "epoch": 0.5531561906290974, "grad_norm": 0.17739346439819054, "learning_rate": 1.6954547853699588e-05, "loss": 0.7012, "step": 2584 }, { "epoch": 0.5533702603623131, "grad_norm": 0.18558321718696819, "learning_rate": 1.6941118812507192e-05, "loss": 0.7022, "step": 2585 }, { "epoch": 0.5535843300955287, "grad_norm": 0.1791576529627214, "learning_rate": 1.692769118328876e-05, "loss": 0.6882, "step": 2586 }, { "epoch": 0.5537983998287442, "grad_norm": 0.17295047952605672, "learning_rate": 1.6914264972242455e-05, "loss": 0.7063, "step": 2587 }, { "epoch": 0.5540124695619598, "grad_norm": 0.1831145624108626, "learning_rate": 1.6900840185565788e-05, "loss": 0.6885, "step": 2588 }, { "epoch": 0.5542265392951754, "grad_norm": 0.17348722888698428, "learning_rate": 1.6887416829455615e-05, "loss": 0.7033, "step": 2589 }, { "epoch": 0.554440609028391, "grad_norm": 0.1698163045766598, "learning_rate": 1.687399491010814e-05, "loss": 0.7207, "step": 2590 }, { "epoch": 0.5546546787616066, "grad_norm": 0.19242643053915104, "learning_rate": 1.686057443371889e-05, "loss": 0.7041, "step": 2591 }, { "epoch": 0.5548687484948222, "grad_norm": 0.16853852755838822, "learning_rate": 1.684715540648273e-05, "loss": 0.6866, "step": 2592 }, { "epoch": 0.5550828182280377, "grad_norm": 0.171034883093637, "learning_rate": 1.6833737834593874e-05, "loss": 0.6867, "step": 2593 }, { "epoch": 0.5552968879612534, "grad_norm": 0.16969581032873562, "learning_rate": 1.6820321724245824e-05, "loss": 0.7015, "step": 2594 }, { "epoch": 0.555510957694469, "grad_norm": 0.17473056491589362, "learning_rate": 1.6806907081631458e-05, "loss": 0.6934, "step": 2595 }, { "epoch": 0.5557250274276846, "grad_norm": 0.165962716375049, "learning_rate": 1.6793493912942927e-05, "loss": 0.7182, "step": 2596 }, { "epoch": 0.5559390971609002, "grad_norm": 0.1632106660159136, "learning_rate": 1.678008222437174e-05, "loss": 0.6909, "step": 2597 }, { "epoch": 0.5561531668941158, "grad_norm": 0.17493867782339634, "learning_rate": 1.6766672022108712e-05, "loss": 0.6894, "step": 2598 }, { "epoch": 0.5563672366273313, "grad_norm": 0.15575514788163156, "learning_rate": 1.6753263312343948e-05, "loss": 0.6832, "step": 2599 }, { "epoch": 0.5565813063605469, "grad_norm": 0.18760416784486758, "learning_rate": 1.6739856101266907e-05, "loss": 0.6981, "step": 2600 }, { "epoch": 0.5567953760937625, "grad_norm": 0.1562575177121708, "learning_rate": 1.672645039506631e-05, "loss": 0.6472, "step": 2601 }, { "epoch": 0.5570094458269781, "grad_norm": 0.17204576736151733, "learning_rate": 1.671304619993022e-05, "loss": 0.7119, "step": 2602 }, { "epoch": 0.5572235155601938, "grad_norm": 0.17453972682224395, "learning_rate": 1.6699643522046e-05, "loss": 0.6771, "step": 2603 }, { "epoch": 0.5574375852934094, "grad_norm": 0.1746598260501258, "learning_rate": 1.6686242367600272e-05, "loss": 0.6948, "step": 2604 }, { "epoch": 0.557651655026625, "grad_norm": 0.17631254753360737, "learning_rate": 1.6672842742779013e-05, "loss": 0.7102, "step": 2605 }, { "epoch": 0.5578657247598405, "grad_norm": 0.16830374020009, "learning_rate": 1.6659444653767448e-05, "loss": 0.7043, "step": 2606 }, { "epoch": 0.5580797944930561, "grad_norm": 0.1753334674085283, "learning_rate": 1.6646048106750113e-05, "loss": 0.7314, "step": 2607 }, { "epoch": 0.5582938642262717, "grad_norm": 0.17564178531192245, "learning_rate": 1.663265310791084e-05, "loss": 0.6912, "step": 2608 }, { "epoch": 0.5585079339594873, "grad_norm": 0.17138112941188524, "learning_rate": 1.661925966343272e-05, "loss": 0.7065, "step": 2609 }, { "epoch": 0.5587220036927029, "grad_norm": 0.16152491010415923, "learning_rate": 1.6605867779498163e-05, "loss": 0.6636, "step": 2610 }, { "epoch": 0.5589360734259184, "grad_norm": 0.17239711920107642, "learning_rate": 1.6592477462288812e-05, "loss": 0.6798, "step": 2611 }, { "epoch": 0.5591501431591341, "grad_norm": 0.1779910584696381, "learning_rate": 1.6579088717985627e-05, "loss": 0.6925, "step": 2612 }, { "epoch": 0.5593642128923497, "grad_norm": 0.17606720772471096, "learning_rate": 1.656570155276884e-05, "loss": 0.7108, "step": 2613 }, { "epoch": 0.5595782826255653, "grad_norm": 0.1680597618729834, "learning_rate": 1.6552315972817918e-05, "loss": 0.6745, "step": 2614 }, { "epoch": 0.5597923523587809, "grad_norm": 0.158575936679841, "learning_rate": 1.653893198431164e-05, "loss": 0.6854, "step": 2615 }, { "epoch": 0.5600064220919965, "grad_norm": 0.18133138312663244, "learning_rate": 1.6525549593428017e-05, "loss": 0.6982, "step": 2616 }, { "epoch": 0.560220491825212, "grad_norm": 0.16530422178113097, "learning_rate": 1.6512168806344337e-05, "loss": 0.702, "step": 2617 }, { "epoch": 0.5604345615584276, "grad_norm": 0.17993673783077352, "learning_rate": 1.6498789629237163e-05, "loss": 0.7126, "step": 2618 }, { "epoch": 0.5606486312916432, "grad_norm": 0.17628390841629152, "learning_rate": 1.648541206828228e-05, "loss": 0.6928, "step": 2619 }, { "epoch": 0.5608627010248588, "grad_norm": 0.1644657133198672, "learning_rate": 1.6472036129654757e-05, "loss": 0.6985, "step": 2620 }, { "epoch": 0.5610767707580745, "grad_norm": 0.19042556331669824, "learning_rate": 1.645866181952889e-05, "loss": 0.6843, "step": 2621 }, { "epoch": 0.5612908404912901, "grad_norm": 0.16715886766047117, "learning_rate": 1.6445289144078244e-05, "loss": 0.6817, "step": 2622 }, { "epoch": 0.5615049102245057, "grad_norm": 0.18078979944140733, "learning_rate": 1.6431918109475634e-05, "loss": 0.6983, "step": 2623 }, { "epoch": 0.5617189799577212, "grad_norm": 0.16609052584351036, "learning_rate": 1.6418548721893082e-05, "loss": 0.7092, "step": 2624 }, { "epoch": 0.5619330496909368, "grad_norm": 0.17347879809534777, "learning_rate": 1.6405180987501888e-05, "loss": 0.6927, "step": 2625 }, { "epoch": 0.5621471194241524, "grad_norm": 0.17637115524575825, "learning_rate": 1.639181491247257e-05, "loss": 0.7165, "step": 2626 }, { "epoch": 0.562361189157368, "grad_norm": 0.1628063507107413, "learning_rate": 1.6378450502974882e-05, "loss": 0.7085, "step": 2627 }, { "epoch": 0.5625752588905836, "grad_norm": 0.1802493672573469, "learning_rate": 1.6365087765177812e-05, "loss": 0.7201, "step": 2628 }, { "epoch": 0.5627893286237992, "grad_norm": 0.16543058426564483, "learning_rate": 1.635172670524958e-05, "loss": 0.6728, "step": 2629 }, { "epoch": 0.5630033983570149, "grad_norm": 0.17102790589571118, "learning_rate": 1.633836732935762e-05, "loss": 0.6901, "step": 2630 }, { "epoch": 0.5632174680902304, "grad_norm": 0.1730735339285674, "learning_rate": 1.6325009643668592e-05, "loss": 0.7195, "step": 2631 }, { "epoch": 0.563431537823446, "grad_norm": 0.17142689767208405, "learning_rate": 1.6311653654348395e-05, "loss": 0.6971, "step": 2632 }, { "epoch": 0.5636456075566616, "grad_norm": 0.16529788965033945, "learning_rate": 1.6298299367562114e-05, "loss": 0.6927, "step": 2633 }, { "epoch": 0.5638596772898772, "grad_norm": 0.18094645166343623, "learning_rate": 1.6284946789474066e-05, "loss": 0.6952, "step": 2634 }, { "epoch": 0.5640737470230928, "grad_norm": 0.17062995761548738, "learning_rate": 1.627159592624779e-05, "loss": 0.6907, "step": 2635 }, { "epoch": 0.5642878167563083, "grad_norm": 0.17924096999215827, "learning_rate": 1.6258246784045994e-05, "loss": 0.6946, "step": 2636 }, { "epoch": 0.5645018864895239, "grad_norm": 0.17725927375921324, "learning_rate": 1.6244899369030647e-05, "loss": 0.6766, "step": 2637 }, { "epoch": 0.5647159562227395, "grad_norm": 0.1792954319475049, "learning_rate": 1.623155368736287e-05, "loss": 0.7019, "step": 2638 }, { "epoch": 0.5649300259559552, "grad_norm": 0.17573567680536856, "learning_rate": 1.621820974520301e-05, "loss": 0.6866, "step": 2639 }, { "epoch": 0.5651440956891708, "grad_norm": 0.17827040307814296, "learning_rate": 1.6204867548710618e-05, "loss": 0.7164, "step": 2640 }, { "epoch": 0.5653581654223864, "grad_norm": 0.1823574922633907, "learning_rate": 1.6191527104044407e-05, "loss": 0.6762, "step": 2641 }, { "epoch": 0.565572235155602, "grad_norm": 0.17409638027917007, "learning_rate": 1.6178188417362326e-05, "loss": 0.6839, "step": 2642 }, { "epoch": 0.5657863048888175, "grad_norm": 0.17981781509710126, "learning_rate": 1.6164851494821463e-05, "loss": 0.7023, "step": 2643 }, { "epoch": 0.5660003746220331, "grad_norm": 0.17012733163495317, "learning_rate": 1.6151516342578132e-05, "loss": 0.6899, "step": 2644 }, { "epoch": 0.5662144443552487, "grad_norm": 0.16773900252249282, "learning_rate": 1.6138182966787822e-05, "loss": 0.6975, "step": 2645 }, { "epoch": 0.5664285140884643, "grad_norm": 0.1597815913860277, "learning_rate": 1.6124851373605174e-05, "loss": 0.6907, "step": 2646 }, { "epoch": 0.5666425838216799, "grad_norm": 0.2552136725855513, "learning_rate": 1.6111521569184047e-05, "loss": 0.7129, "step": 2647 }, { "epoch": 0.5668566535548956, "grad_norm": 0.16858063026817874, "learning_rate": 1.609819355967744e-05, "loss": 0.6801, "step": 2648 }, { "epoch": 0.5670707232881111, "grad_norm": 0.25403964328041073, "learning_rate": 1.6084867351237538e-05, "loss": 0.721, "step": 2649 }, { "epoch": 0.5672847930213267, "grad_norm": 0.168614949343269, "learning_rate": 1.6071542950015713e-05, "loss": 0.7157, "step": 2650 }, { "epoch": 0.5674988627545423, "grad_norm": 0.1697398856475484, "learning_rate": 1.605822036216246e-05, "loss": 0.7337, "step": 2651 }, { "epoch": 0.5677129324877579, "grad_norm": 0.16600190026545894, "learning_rate": 1.604489959382748e-05, "loss": 0.695, "step": 2652 }, { "epoch": 0.5679270022209735, "grad_norm": 0.1757455401999051, "learning_rate": 1.60315806511596e-05, "loss": 0.7007, "step": 2653 }, { "epoch": 0.5681410719541891, "grad_norm": 0.1656177914157262, "learning_rate": 1.6018263540306827e-05, "loss": 0.6779, "step": 2654 }, { "epoch": 0.5683551416874046, "grad_norm": 0.1704239067581863, "learning_rate": 1.6004948267416326e-05, "loss": 0.6823, "step": 2655 }, { "epoch": 0.5685692114206202, "grad_norm": 8.912496396978549, "learning_rate": 1.599163483863438e-05, "loss": 0.7275, "step": 2656 }, { "epoch": 0.5687832811538358, "grad_norm": 0.1855300823528869, "learning_rate": 1.5978323260106463e-05, "loss": 0.6995, "step": 2657 }, { "epoch": 0.5689973508870515, "grad_norm": 0.16269820285223507, "learning_rate": 1.596501353797716e-05, "loss": 0.6987, "step": 2658 }, { "epoch": 0.5692114206202671, "grad_norm": 0.23223472693764194, "learning_rate": 1.595170567839022e-05, "loss": 0.7094, "step": 2659 }, { "epoch": 0.5694254903534827, "grad_norm": 0.17086118303863418, "learning_rate": 1.5938399687488536e-05, "loss": 0.7075, "step": 2660 }, { "epoch": 0.5696395600866982, "grad_norm": 0.18525578051426128, "learning_rate": 1.5925095571414116e-05, "loss": 0.6659, "step": 2661 }, { "epoch": 0.5698536298199138, "grad_norm": 0.1697856717238186, "learning_rate": 1.5911793336308126e-05, "loss": 0.6949, "step": 2662 }, { "epoch": 0.5700676995531294, "grad_norm": 0.17264177768579375, "learning_rate": 1.589849298831084e-05, "loss": 0.7093, "step": 2663 }, { "epoch": 0.570281769286345, "grad_norm": 0.17456447740166398, "learning_rate": 1.5885194533561688e-05, "loss": 0.6846, "step": 2664 }, { "epoch": 0.5704958390195606, "grad_norm": 0.1660651117548356, "learning_rate": 1.5871897978199213e-05, "loss": 0.7231, "step": 2665 }, { "epoch": 0.5707099087527762, "grad_norm": 0.16830182730304524, "learning_rate": 1.5858603328361062e-05, "loss": 0.6791, "step": 2666 }, { "epoch": 0.5709239784859919, "grad_norm": 0.17009361623168645, "learning_rate": 1.584531059018404e-05, "loss": 0.6895, "step": 2667 }, { "epoch": 0.5711380482192074, "grad_norm": 0.1636000034657648, "learning_rate": 1.5832019769804046e-05, "loss": 0.6814, "step": 2668 }, { "epoch": 0.571352117952423, "grad_norm": 0.16430727377195042, "learning_rate": 1.5818730873356096e-05, "loss": 0.7202, "step": 2669 }, { "epoch": 0.5715661876856386, "grad_norm": 0.1751136044026341, "learning_rate": 1.580544390697431e-05, "loss": 0.6912, "step": 2670 }, { "epoch": 0.5717802574188542, "grad_norm": 0.16778141925927165, "learning_rate": 1.579215887679195e-05, "loss": 0.7356, "step": 2671 }, { "epoch": 0.5719943271520698, "grad_norm": 0.1748980080116598, "learning_rate": 1.5778875788941348e-05, "loss": 0.6911, "step": 2672 }, { "epoch": 0.5722083968852854, "grad_norm": 0.16807866825292672, "learning_rate": 1.576559464955395e-05, "loss": 0.7164, "step": 2673 }, { "epoch": 0.5724224666185009, "grad_norm": 0.22042166108926137, "learning_rate": 1.5752315464760316e-05, "loss": 0.7545, "step": 2674 }, { "epoch": 0.5726365363517165, "grad_norm": 0.16454085108082656, "learning_rate": 1.5739038240690084e-05, "loss": 0.6824, "step": 2675 }, { "epoch": 0.5728506060849322, "grad_norm": 0.17524623135800574, "learning_rate": 1.5725762983472e-05, "loss": 0.7217, "step": 2676 }, { "epoch": 0.5730646758181478, "grad_norm": 0.17727082057636445, "learning_rate": 1.5712489699233903e-05, "loss": 0.7177, "step": 2677 }, { "epoch": 0.5732787455513634, "grad_norm": 0.1756032630367442, "learning_rate": 1.5699218394102705e-05, "loss": 0.7081, "step": 2678 }, { "epoch": 0.573492815284579, "grad_norm": 0.18349669376757125, "learning_rate": 1.5685949074204436e-05, "loss": 0.7097, "step": 2679 }, { "epoch": 0.5737068850177945, "grad_norm": 0.17347757811077608, "learning_rate": 1.5672681745664162e-05, "loss": 0.7062, "step": 2680 }, { "epoch": 0.5739209547510101, "grad_norm": 0.17591544246657548, "learning_rate": 1.5659416414606075e-05, "loss": 0.6869, "step": 2681 }, { "epoch": 0.5741350244842257, "grad_norm": 0.1808924848994917, "learning_rate": 1.5646153087153437e-05, "loss": 0.6883, "step": 2682 }, { "epoch": 0.5743490942174413, "grad_norm": 0.17121195399124406, "learning_rate": 1.5632891769428554e-05, "loss": 0.6745, "step": 2683 }, { "epoch": 0.5745631639506569, "grad_norm": 0.1769316701709479, "learning_rate": 1.5619632467552842e-05, "loss": 0.701, "step": 2684 }, { "epoch": 0.5747772336838726, "grad_norm": 0.1722387619036292, "learning_rate": 1.5606375187646755e-05, "loss": 0.7072, "step": 2685 }, { "epoch": 0.5749913034170882, "grad_norm": 0.2063465496069471, "learning_rate": 1.5593119935829844e-05, "loss": 0.7169, "step": 2686 }, { "epoch": 0.5752053731503037, "grad_norm": 0.16092048876232048, "learning_rate": 1.557986671822071e-05, "loss": 0.6831, "step": 2687 }, { "epoch": 0.5754194428835193, "grad_norm": 0.17672007983765237, "learning_rate": 1.5566615540936994e-05, "loss": 0.6648, "step": 2688 }, { "epoch": 0.5756335126167349, "grad_norm": 0.16833732076961716, "learning_rate": 1.5553366410095443e-05, "loss": 0.6987, "step": 2689 }, { "epoch": 0.5758475823499505, "grad_norm": 0.16961713857908878, "learning_rate": 1.5540119331811807e-05, "loss": 0.7092, "step": 2690 }, { "epoch": 0.5760616520831661, "grad_norm": 0.1694528421671422, "learning_rate": 1.5526874312200923e-05, "loss": 0.7058, "step": 2691 }, { "epoch": 0.5762757218163816, "grad_norm": 0.17170558961931925, "learning_rate": 1.5513631357376677e-05, "loss": 0.682, "step": 2692 }, { "epoch": 0.5764897915495972, "grad_norm": 0.16871501912314427, "learning_rate": 1.5500390473451977e-05, "loss": 0.6945, "step": 2693 }, { "epoch": 0.5767038612828129, "grad_norm": 0.17504256833279142, "learning_rate": 1.54871516665388e-05, "loss": 0.7149, "step": 2694 }, { "epoch": 0.5769179310160285, "grad_norm": 0.1633022256535095, "learning_rate": 1.547391494274815e-05, "loss": 0.6902, "step": 2695 }, { "epoch": 0.5771320007492441, "grad_norm": 0.1655482773500039, "learning_rate": 1.5460680308190076e-05, "loss": 0.713, "step": 2696 }, { "epoch": 0.5773460704824597, "grad_norm": 0.16413844359398785, "learning_rate": 1.544744776897367e-05, "loss": 0.7046, "step": 2697 }, { "epoch": 0.5775601402156753, "grad_norm": 0.1629869752071672, "learning_rate": 1.543421733120703e-05, "loss": 0.7139, "step": 2698 }, { "epoch": 0.5777742099488908, "grad_norm": 0.15968617666908208, "learning_rate": 1.5420989000997324e-05, "loss": 0.6876, "step": 2699 }, { "epoch": 0.5779882796821064, "grad_norm": 0.16300519938956157, "learning_rate": 1.5407762784450705e-05, "loss": 0.6885, "step": 2700 }, { "epoch": 0.578202349415322, "grad_norm": 0.16071963520224197, "learning_rate": 1.5394538687672372e-05, "loss": 0.7394, "step": 2701 }, { "epoch": 0.5784164191485376, "grad_norm": 0.17296846367692134, "learning_rate": 1.5381316716766566e-05, "loss": 0.6864, "step": 2702 }, { "epoch": 0.5786304888817533, "grad_norm": 0.16966570424573013, "learning_rate": 1.5368096877836495e-05, "loss": 0.6946, "step": 2703 }, { "epoch": 0.5788445586149689, "grad_norm": 0.16801847811970264, "learning_rate": 1.5354879176984438e-05, "loss": 0.7012, "step": 2704 }, { "epoch": 0.5790586283481844, "grad_norm": 0.16345224853796655, "learning_rate": 1.5341663620311638e-05, "loss": 0.6963, "step": 2705 }, { "epoch": 0.5792726980814, "grad_norm": 0.16318160860193356, "learning_rate": 1.5328450213918388e-05, "loss": 0.7224, "step": 2706 }, { "epoch": 0.5794867678146156, "grad_norm": 0.16574054622915096, "learning_rate": 1.531523896390397e-05, "loss": 0.73, "step": 2707 }, { "epoch": 0.5797008375478312, "grad_norm": 0.166160445983208, "learning_rate": 1.5302029876366667e-05, "loss": 0.7126, "step": 2708 }, { "epoch": 0.5799149072810468, "grad_norm": 0.15654746520301857, "learning_rate": 1.5288822957403775e-05, "loss": 0.6844, "step": 2709 }, { "epoch": 0.5801289770142624, "grad_norm": 0.17873582425754628, "learning_rate": 1.527561821311158e-05, "loss": 0.7014, "step": 2710 }, { "epoch": 0.5803430467474779, "grad_norm": 0.1640892628615522, "learning_rate": 1.5262415649585375e-05, "loss": 0.7246, "step": 2711 }, { "epoch": 0.5805571164806936, "grad_norm": 0.16525787461111266, "learning_rate": 1.5249215272919424e-05, "loss": 0.7177, "step": 2712 }, { "epoch": 0.5807711862139092, "grad_norm": 0.1650028056548862, "learning_rate": 1.5236017089207014e-05, "loss": 0.6935, "step": 2713 }, { "epoch": 0.5809852559471248, "grad_norm": 0.16254064172306787, "learning_rate": 1.5222821104540393e-05, "loss": 0.6909, "step": 2714 }, { "epoch": 0.5811993256803404, "grad_norm": 0.15840833276929261, "learning_rate": 1.52096273250108e-05, "loss": 0.662, "step": 2715 }, { "epoch": 0.581413395413556, "grad_norm": 0.16580998810550387, "learning_rate": 1.5196435756708474e-05, "loss": 0.6955, "step": 2716 }, { "epoch": 0.5816274651467715, "grad_norm": 0.16274111320780607, "learning_rate": 1.5183246405722603e-05, "loss": 0.7137, "step": 2717 }, { "epoch": 0.5818415348799871, "grad_norm": 0.16765261211098964, "learning_rate": 1.5170059278141371e-05, "loss": 0.6955, "step": 2718 }, { "epoch": 0.5820556046132027, "grad_norm": 0.1702200327390562, "learning_rate": 1.5156874380051945e-05, "loss": 0.7213, "step": 2719 }, { "epoch": 0.5822696743464183, "grad_norm": 0.1854837920569842, "learning_rate": 1.514369171754043e-05, "loss": 0.7008, "step": 2720 }, { "epoch": 0.582483744079634, "grad_norm": 0.18219078464002583, "learning_rate": 1.5130511296691937e-05, "loss": 0.697, "step": 2721 }, { "epoch": 0.5826978138128496, "grad_norm": 0.1788694770784065, "learning_rate": 1.5117333123590508e-05, "loss": 0.7069, "step": 2722 }, { "epoch": 0.5829118835460652, "grad_norm": 0.1746042320161375, "learning_rate": 1.5104157204319169e-05, "loss": 0.7078, "step": 2723 }, { "epoch": 0.5831259532792807, "grad_norm": 0.17680237787055583, "learning_rate": 1.5090983544959914e-05, "loss": 0.6968, "step": 2724 }, { "epoch": 0.5833400230124963, "grad_norm": 0.17766671131807185, "learning_rate": 1.5077812151593655e-05, "loss": 0.7207, "step": 2725 }, { "epoch": 0.5835540927457119, "grad_norm": 0.17263375304450626, "learning_rate": 1.506464303030031e-05, "loss": 0.6882, "step": 2726 }, { "epoch": 0.5837681624789275, "grad_norm": 0.1773322650045857, "learning_rate": 1.5051476187158698e-05, "loss": 0.7228, "step": 2727 }, { "epoch": 0.5839822322121431, "grad_norm": 0.16299003445156277, "learning_rate": 1.5038311628246619e-05, "loss": 0.7027, "step": 2728 }, { "epoch": 0.5841963019453587, "grad_norm": 0.1865519783868536, "learning_rate": 1.5025149359640816e-05, "loss": 0.7296, "step": 2729 }, { "epoch": 0.5844103716785743, "grad_norm": 0.1627341280597865, "learning_rate": 1.5011989387416954e-05, "loss": 0.6691, "step": 2730 }, { "epoch": 0.5846244414117899, "grad_norm": 0.16657588961357114, "learning_rate": 1.4998831717649668e-05, "loss": 0.6908, "step": 2731 }, { "epoch": 0.5848385111450055, "grad_norm": 0.20490407719349335, "learning_rate": 1.49856763564125e-05, "loss": 0.7106, "step": 2732 }, { "epoch": 0.5850525808782211, "grad_norm": 0.16340349893692208, "learning_rate": 1.4972523309777947e-05, "loss": 0.7386, "step": 2733 }, { "epoch": 0.5852666506114367, "grad_norm": 0.17609615496676675, "learning_rate": 1.4959372583817438e-05, "loss": 0.6765, "step": 2734 }, { "epoch": 0.5854807203446523, "grad_norm": 0.16286203130168111, "learning_rate": 1.4946224184601308e-05, "loss": 0.6992, "step": 2735 }, { "epoch": 0.5856947900778678, "grad_norm": 0.16676236941421238, "learning_rate": 1.4933078118198851e-05, "loss": 0.7078, "step": 2736 }, { "epoch": 0.5859088598110834, "grad_norm": 0.15477849951635472, "learning_rate": 1.4919934390678252e-05, "loss": 0.6895, "step": 2737 }, { "epoch": 0.586122929544299, "grad_norm": 0.1704130964499219, "learning_rate": 1.490679300810664e-05, "loss": 0.7007, "step": 2738 }, { "epoch": 0.5863369992775147, "grad_norm": 0.15630732815757206, "learning_rate": 1.4893653976550057e-05, "loss": 0.6827, "step": 2739 }, { "epoch": 0.5865510690107303, "grad_norm": 0.16091649855807497, "learning_rate": 1.4880517302073447e-05, "loss": 0.6951, "step": 2740 }, { "epoch": 0.5867651387439459, "grad_norm": 0.15774658223729976, "learning_rate": 1.4867382990740686e-05, "loss": 0.6812, "step": 2741 }, { "epoch": 0.5869792084771615, "grad_norm": 0.1646144254437179, "learning_rate": 1.4854251048614531e-05, "loss": 0.7227, "step": 2742 }, { "epoch": 0.587193278210377, "grad_norm": 0.15827064039679917, "learning_rate": 1.484112148175667e-05, "loss": 0.7189, "step": 2743 }, { "epoch": 0.5874073479435926, "grad_norm": 0.16587505835067617, "learning_rate": 1.4827994296227704e-05, "loss": 0.6933, "step": 2744 }, { "epoch": 0.5876214176768082, "grad_norm": 0.1576660131843526, "learning_rate": 1.481486949808709e-05, "loss": 0.7036, "step": 2745 }, { "epoch": 0.5878354874100238, "grad_norm": 0.1618836470440896, "learning_rate": 1.4801747093393229e-05, "loss": 0.7007, "step": 2746 }, { "epoch": 0.5880495571432394, "grad_norm": 0.16452588982706243, "learning_rate": 1.4788627088203394e-05, "loss": 0.6932, "step": 2747 }, { "epoch": 0.5882636268764551, "grad_norm": 0.16883568099615812, "learning_rate": 1.4775509488573751e-05, "loss": 0.7159, "step": 2748 }, { "epoch": 0.5884776966096706, "grad_norm": 0.16325082793248, "learning_rate": 1.4762394300559373e-05, "loss": 0.6863, "step": 2749 }, { "epoch": 0.5886917663428862, "grad_norm": 0.16526408761461683, "learning_rate": 1.474928153021419e-05, "loss": 0.7169, "step": 2750 }, { "epoch": 0.5889058360761018, "grad_norm": 0.1689393345787796, "learning_rate": 1.4736171183591044e-05, "loss": 0.6992, "step": 2751 }, { "epoch": 0.5891199058093174, "grad_norm": 0.5073757563284773, "learning_rate": 1.4723063266741644e-05, "loss": 0.7031, "step": 2752 }, { "epoch": 0.589333975542533, "grad_norm": 0.16192856735154973, "learning_rate": 1.4709957785716582e-05, "loss": 0.6941, "step": 2753 }, { "epoch": 0.5895480452757486, "grad_norm": 0.16701122262533197, "learning_rate": 1.4696854746565316e-05, "loss": 0.6918, "step": 2754 }, { "epoch": 0.5897621150089641, "grad_norm": 0.1796473393516434, "learning_rate": 1.4683754155336196e-05, "loss": 0.7097, "step": 2755 }, { "epoch": 0.5899761847421797, "grad_norm": 0.16193738971829216, "learning_rate": 1.4670656018076428e-05, "loss": 0.7207, "step": 2756 }, { "epoch": 0.5901902544753954, "grad_norm": 0.17590962510084385, "learning_rate": 1.4657560340832078e-05, "loss": 0.6936, "step": 2757 }, { "epoch": 0.590404324208611, "grad_norm": 0.30933642674275985, "learning_rate": 1.4644467129648106e-05, "loss": 0.7, "step": 2758 }, { "epoch": 0.5906183939418266, "grad_norm": 0.178397385660487, "learning_rate": 1.4631376390568294e-05, "loss": 0.7173, "step": 2759 }, { "epoch": 0.5908324636750422, "grad_norm": 0.19148816228214477, "learning_rate": 1.4618288129635314e-05, "loss": 0.6957, "step": 2760 }, { "epoch": 0.5910465334082577, "grad_norm": 0.17337939676446942, "learning_rate": 1.4605202352890691e-05, "loss": 0.687, "step": 2761 }, { "epoch": 0.5912606031414733, "grad_norm": 0.16812102940261198, "learning_rate": 1.4592119066374775e-05, "loss": 0.715, "step": 2762 }, { "epoch": 0.5914746728746889, "grad_norm": 0.19026483539071762, "learning_rate": 1.4579038276126806e-05, "loss": 0.6858, "step": 2763 }, { "epoch": 0.5916887426079045, "grad_norm": 0.1630638900718737, "learning_rate": 1.456595998818484e-05, "loss": 0.704, "step": 2764 }, { "epoch": 0.5919028123411201, "grad_norm": 0.16555163385873223, "learning_rate": 1.4552884208585796e-05, "loss": 0.6952, "step": 2765 }, { "epoch": 0.5921168820743357, "grad_norm": 0.1668396705760013, "learning_rate": 1.4539810943365438e-05, "loss": 0.6782, "step": 2766 }, { "epoch": 0.5923309518075514, "grad_norm": 0.21188407067690096, "learning_rate": 1.4526740198558345e-05, "loss": 0.7056, "step": 2767 }, { "epoch": 0.5925450215407669, "grad_norm": 0.1756583186916101, "learning_rate": 1.4513671980197964e-05, "loss": 0.7094, "step": 2768 }, { "epoch": 0.5927590912739825, "grad_norm": 0.16932947661767148, "learning_rate": 1.4500606294316545e-05, "loss": 0.678, "step": 2769 }, { "epoch": 0.5929731610071981, "grad_norm": 0.1680398258976508, "learning_rate": 1.4487543146945196e-05, "loss": 0.7007, "step": 2770 }, { "epoch": 0.5931872307404137, "grad_norm": 0.17438446698121887, "learning_rate": 1.4474482544113846e-05, "loss": 0.7141, "step": 2771 }, { "epoch": 0.5934013004736293, "grad_norm": 0.16077979710309473, "learning_rate": 1.446142449185123e-05, "loss": 0.6817, "step": 2772 }, { "epoch": 0.5936153702068449, "grad_norm": 0.16452070105806216, "learning_rate": 1.444836899618494e-05, "loss": 0.7056, "step": 2773 }, { "epoch": 0.5938294399400604, "grad_norm": 0.17937973900146656, "learning_rate": 1.4435316063141347e-05, "loss": 0.7024, "step": 2774 }, { "epoch": 0.594043509673276, "grad_norm": 0.16968575232261435, "learning_rate": 1.4422265698745676e-05, "loss": 0.6885, "step": 2775 }, { "epoch": 0.5942575794064917, "grad_norm": 0.16299774524171282, "learning_rate": 1.4409217909021958e-05, "loss": 0.7112, "step": 2776 }, { "epoch": 0.5944716491397073, "grad_norm": 0.16631355701424153, "learning_rate": 1.4396172699993004e-05, "loss": 0.7231, "step": 2777 }, { "epoch": 0.5946857188729229, "grad_norm": 0.23119997682126242, "learning_rate": 1.4383130077680489e-05, "loss": 0.7195, "step": 2778 }, { "epoch": 0.5948997886061385, "grad_norm": 3.138037986720887, "learning_rate": 1.4370090048104835e-05, "loss": 0.7344, "step": 2779 }, { "epoch": 0.595113858339354, "grad_norm": 0.20002142315450172, "learning_rate": 1.435705261728531e-05, "loss": 0.7108, "step": 2780 }, { "epoch": 0.5953279280725696, "grad_norm": 0.16604958792986207, "learning_rate": 1.4344017791239976e-05, "loss": 0.6958, "step": 2781 }, { "epoch": 0.5955419978057852, "grad_norm": 0.1700742915211355, "learning_rate": 1.4330985575985668e-05, "loss": 0.7043, "step": 2782 }, { "epoch": 0.5957560675390008, "grad_norm": 0.1705322217778988, "learning_rate": 1.4317955977538047e-05, "loss": 0.7016, "step": 2783 }, { "epoch": 0.5959701372722164, "grad_norm": 0.1695689380361238, "learning_rate": 1.4304929001911538e-05, "loss": 0.7111, "step": 2784 }, { "epoch": 0.5961842070054321, "grad_norm": 0.16783008074734213, "learning_rate": 1.4291904655119378e-05, "loss": 0.7157, "step": 2785 }, { "epoch": 0.5963982767386476, "grad_norm": 0.20520283635870903, "learning_rate": 1.4278882943173586e-05, "loss": 0.7137, "step": 2786 }, { "epoch": 0.5966123464718632, "grad_norm": 0.18463803305822762, "learning_rate": 1.4265863872084947e-05, "loss": 0.6937, "step": 2787 }, { "epoch": 0.5968264162050788, "grad_norm": 0.166916375414561, "learning_rate": 1.4252847447863052e-05, "loss": 0.679, "step": 2788 }, { "epoch": 0.5970404859382944, "grad_norm": 0.16078833682187244, "learning_rate": 1.4239833676516254e-05, "loss": 0.7056, "step": 2789 }, { "epoch": 0.59725455567151, "grad_norm": 0.16483393450802797, "learning_rate": 1.4226822564051685e-05, "loss": 0.7233, "step": 2790 }, { "epoch": 0.5974686254047256, "grad_norm": 0.17327600889177555, "learning_rate": 1.4213814116475253e-05, "loss": 0.7395, "step": 2791 }, { "epoch": 0.5976826951379411, "grad_norm": 0.169055947008346, "learning_rate": 1.4200808339791636e-05, "loss": 0.6527, "step": 2792 }, { "epoch": 0.5978967648711567, "grad_norm": 0.19770003659598154, "learning_rate": 1.418780524000427e-05, "loss": 0.6789, "step": 2793 }, { "epoch": 0.5981108346043724, "grad_norm": 0.16378956125142946, "learning_rate": 1.4174804823115369e-05, "loss": 0.683, "step": 2794 }, { "epoch": 0.598324904337588, "grad_norm": 0.1749418451018798, "learning_rate": 1.4161807095125898e-05, "loss": 0.7052, "step": 2795 }, { "epoch": 0.5985389740708036, "grad_norm": 0.1655426744522246, "learning_rate": 1.4148812062035577e-05, "loss": 0.7057, "step": 2796 }, { "epoch": 0.5987530438040192, "grad_norm": 0.16307665483456588, "learning_rate": 1.4135819729842903e-05, "loss": 0.7071, "step": 2797 }, { "epoch": 0.5989671135372348, "grad_norm": 0.1676887388934184, "learning_rate": 1.412283010454511e-05, "loss": 0.713, "step": 2798 }, { "epoch": 0.5991811832704503, "grad_norm": 0.1707732594147557, "learning_rate": 1.4109843192138173e-05, "loss": 0.696, "step": 2799 }, { "epoch": 0.5993952530036659, "grad_norm": 0.1592237316422158, "learning_rate": 1.409685899861685e-05, "loss": 0.6917, "step": 2800 }, { "epoch": 0.5996093227368815, "grad_norm": 0.164504155732762, "learning_rate": 1.4083877529974594e-05, "loss": 0.6847, "step": 2801 }, { "epoch": 0.5998233924700971, "grad_norm": 0.16961189605041485, "learning_rate": 1.4070898792203643e-05, "loss": 0.7063, "step": 2802 }, { "epoch": 0.6000374622033128, "grad_norm": 0.16920821739088487, "learning_rate": 1.405792279129496e-05, "loss": 0.675, "step": 2803 }, { "epoch": 0.6002515319365284, "grad_norm": 0.1639359750351902, "learning_rate": 1.4044949533238237e-05, "loss": 0.7148, "step": 2804 }, { "epoch": 0.6004656016697439, "grad_norm": 0.16622171843395203, "learning_rate": 1.4031979024021913e-05, "loss": 0.6685, "step": 2805 }, { "epoch": 0.6006796714029595, "grad_norm": 0.16717356812615172, "learning_rate": 1.4019011269633138e-05, "loss": 0.6911, "step": 2806 }, { "epoch": 0.6008937411361751, "grad_norm": 0.16108208873293745, "learning_rate": 1.4006046276057813e-05, "loss": 0.6939, "step": 2807 }, { "epoch": 0.6011078108693907, "grad_norm": 0.16295024827925517, "learning_rate": 1.3993084049280563e-05, "loss": 0.6948, "step": 2808 }, { "epoch": 0.6013218806026063, "grad_norm": 0.15832520855149404, "learning_rate": 1.398012459528471e-05, "loss": 0.7063, "step": 2809 }, { "epoch": 0.6015359503358219, "grad_norm": 0.16825850548728402, "learning_rate": 1.3967167920052336e-05, "loss": 0.7239, "step": 2810 }, { "epoch": 0.6017500200690374, "grad_norm": 0.15987064419423355, "learning_rate": 1.3954214029564195e-05, "loss": 0.7036, "step": 2811 }, { "epoch": 0.6019640898022531, "grad_norm": 0.1680518196512475, "learning_rate": 1.394126292979979e-05, "loss": 0.7191, "step": 2812 }, { "epoch": 0.6021781595354687, "grad_norm": 0.16920494731449956, "learning_rate": 1.3928314626737338e-05, "loss": 0.7226, "step": 2813 }, { "epoch": 0.6023922292686843, "grad_norm": 0.15814715156032466, "learning_rate": 1.3915369126353728e-05, "loss": 0.6922, "step": 2814 }, { "epoch": 0.6026062990018999, "grad_norm": 0.18116438775219987, "learning_rate": 1.3902426434624601e-05, "loss": 0.7135, "step": 2815 }, { "epoch": 0.6028203687351155, "grad_norm": 0.15802119074031223, "learning_rate": 1.3889486557524258e-05, "loss": 0.6722, "step": 2816 }, { "epoch": 0.603034438468331, "grad_norm": 0.17503587907093565, "learning_rate": 1.387654950102574e-05, "loss": 0.7054, "step": 2817 }, { "epoch": 0.6032485082015466, "grad_norm": 0.16905747935656973, "learning_rate": 1.3863615271100767e-05, "loss": 0.6916, "step": 2818 }, { "epoch": 0.6034625779347622, "grad_norm": 0.16820107102584045, "learning_rate": 1.3850683873719746e-05, "loss": 0.6971, "step": 2819 }, { "epoch": 0.6036766476679778, "grad_norm": 0.16442523150197488, "learning_rate": 1.38377553148518e-05, "loss": 0.6819, "step": 2820 }, { "epoch": 0.6038907174011935, "grad_norm": 0.16279692335300602, "learning_rate": 1.3824829600464709e-05, "loss": 0.708, "step": 2821 }, { "epoch": 0.6041047871344091, "grad_norm": 0.17084118113946872, "learning_rate": 1.3811906736524972e-05, "loss": 0.6849, "step": 2822 }, { "epoch": 0.6043188568676247, "grad_norm": 0.16452064589010315, "learning_rate": 1.3798986728997767e-05, "loss": 0.702, "step": 2823 }, { "epoch": 0.6045329266008402, "grad_norm": 0.16673368973665786, "learning_rate": 1.3786069583846926e-05, "loss": 0.6938, "step": 2824 }, { "epoch": 0.6047469963340558, "grad_norm": 0.1693800605040336, "learning_rate": 1.3773155307035002e-05, "loss": 0.6875, "step": 2825 }, { "epoch": 0.6049610660672714, "grad_norm": 0.16765783009207916, "learning_rate": 1.376024390452318e-05, "loss": 0.7236, "step": 2826 }, { "epoch": 0.605175135800487, "grad_norm": 0.17137048506079616, "learning_rate": 1.3747335382271345e-05, "loss": 0.7096, "step": 2827 }, { "epoch": 0.6053892055337026, "grad_norm": 0.17064714750208643, "learning_rate": 1.3734429746238066e-05, "loss": 0.7192, "step": 2828 }, { "epoch": 0.6056032752669182, "grad_norm": 0.1780840211898227, "learning_rate": 1.3721527002380535e-05, "loss": 0.7098, "step": 2829 }, { "epoch": 0.6058173450001338, "grad_norm": 0.1733188902182287, "learning_rate": 1.370862715665465e-05, "loss": 0.7256, "step": 2830 }, { "epoch": 0.6060314147333494, "grad_norm": 0.16486273290790035, "learning_rate": 1.3695730215014955e-05, "loss": 0.7036, "step": 2831 }, { "epoch": 0.606245484466565, "grad_norm": 0.17658189792895368, "learning_rate": 1.3682836183414647e-05, "loss": 0.679, "step": 2832 }, { "epoch": 0.6064595541997806, "grad_norm": 0.16447354117646157, "learning_rate": 1.3669945067805596e-05, "loss": 0.6766, "step": 2833 }, { "epoch": 0.6066736239329962, "grad_norm": 0.17992482061826953, "learning_rate": 1.3657056874138315e-05, "loss": 0.7033, "step": 2834 }, { "epoch": 0.6068876936662118, "grad_norm": 0.16356972374040374, "learning_rate": 1.364417160836197e-05, "loss": 0.7154, "step": 2835 }, { "epoch": 0.6071017633994273, "grad_norm": 0.17107500255646887, "learning_rate": 1.3631289276424374e-05, "loss": 0.7159, "step": 2836 }, { "epoch": 0.6073158331326429, "grad_norm": 0.1785890302760153, "learning_rate": 1.3618409884271993e-05, "loss": 0.7215, "step": 2837 }, { "epoch": 0.6075299028658585, "grad_norm": 0.16887974727734947, "learning_rate": 1.360553343784992e-05, "loss": 0.6903, "step": 2838 }, { "epoch": 0.6077439725990742, "grad_norm": 0.17943646777936517, "learning_rate": 1.3592659943101914e-05, "loss": 0.6875, "step": 2839 }, { "epoch": 0.6079580423322898, "grad_norm": 0.15998224774645078, "learning_rate": 1.3579789405970347e-05, "loss": 0.689, "step": 2840 }, { "epoch": 0.6081721120655054, "grad_norm": 0.17218073051440003, "learning_rate": 1.3566921832396234e-05, "loss": 0.7032, "step": 2841 }, { "epoch": 0.608386181798721, "grad_norm": 0.1651642459289121, "learning_rate": 1.3554057228319236e-05, "loss": 0.6949, "step": 2842 }, { "epoch": 0.6086002515319365, "grad_norm": 0.1671213785682561, "learning_rate": 1.354119559967761e-05, "loss": 0.7063, "step": 2843 }, { "epoch": 0.6088143212651521, "grad_norm": 0.1780644297694708, "learning_rate": 1.3528336952408277e-05, "loss": 0.7025, "step": 2844 }, { "epoch": 0.6090283909983677, "grad_norm": 0.1680704473732953, "learning_rate": 1.3515481292446762e-05, "loss": 0.7032, "step": 2845 }, { "epoch": 0.6092424607315833, "grad_norm": 0.17020266357413008, "learning_rate": 1.3502628625727208e-05, "loss": 0.6782, "step": 2846 }, { "epoch": 0.6094565304647989, "grad_norm": 0.245010912957747, "learning_rate": 1.3489778958182393e-05, "loss": 0.7111, "step": 2847 }, { "epoch": 0.6096706001980146, "grad_norm": 0.1641355606340915, "learning_rate": 1.3476932295743685e-05, "loss": 0.6618, "step": 2848 }, { "epoch": 0.6098846699312301, "grad_norm": 0.17186124968937724, "learning_rate": 1.3464088644341091e-05, "loss": 0.6968, "step": 2849 }, { "epoch": 0.6100987396644457, "grad_norm": 0.17715077313952188, "learning_rate": 1.3451248009903222e-05, "loss": 0.7078, "step": 2850 }, { "epoch": 0.6103128093976613, "grad_norm": 0.15681559309428414, "learning_rate": 1.3438410398357273e-05, "loss": 0.676, "step": 2851 }, { "epoch": 0.6105268791308769, "grad_norm": 0.1745691176010804, "learning_rate": 1.3425575815629084e-05, "loss": 0.6734, "step": 2852 }, { "epoch": 0.6107409488640925, "grad_norm": 0.17127724325606236, "learning_rate": 1.3412744267643051e-05, "loss": 0.7271, "step": 2853 }, { "epoch": 0.610955018597308, "grad_norm": 0.16361323942944692, "learning_rate": 1.3399915760322211e-05, "loss": 0.6776, "step": 2854 }, { "epoch": 0.6111690883305236, "grad_norm": 0.17723093582319852, "learning_rate": 1.338709029958818e-05, "loss": 0.6939, "step": 2855 }, { "epoch": 0.6113831580637392, "grad_norm": 0.16608857951563702, "learning_rate": 1.337426789136115e-05, "loss": 0.6909, "step": 2856 }, { "epoch": 0.6115972277969549, "grad_norm": 0.18475664294283847, "learning_rate": 1.3361448541559944e-05, "loss": 0.7132, "step": 2857 }, { "epoch": 0.6118112975301705, "grad_norm": 0.18309689093673312, "learning_rate": 1.334863225610193e-05, "loss": 0.7389, "step": 2858 }, { "epoch": 0.6120253672633861, "grad_norm": 0.20160900986128505, "learning_rate": 1.3335819040903091e-05, "loss": 0.7195, "step": 2859 }, { "epoch": 0.6122394369966017, "grad_norm": 0.18710150081023777, "learning_rate": 1.3323008901877991e-05, "loss": 0.6991, "step": 2860 }, { "epoch": 0.6124535067298172, "grad_norm": 0.17717707834980048, "learning_rate": 1.331020184493975e-05, "loss": 0.7325, "step": 2861 }, { "epoch": 0.6126675764630328, "grad_norm": 0.16784155215696425, "learning_rate": 1.3297397876000103e-05, "loss": 0.711, "step": 2862 }, { "epoch": 0.6128816461962484, "grad_norm": 0.1725802521420512, "learning_rate": 1.3284597000969314e-05, "loss": 0.7224, "step": 2863 }, { "epoch": 0.613095715929464, "grad_norm": 1.5390276737933177, "learning_rate": 1.3271799225756259e-05, "loss": 0.7148, "step": 2864 }, { "epoch": 0.6133097856626796, "grad_norm": 0.17687346419640848, "learning_rate": 1.3259004556268374e-05, "loss": 0.6831, "step": 2865 }, { "epoch": 0.6135238553958953, "grad_norm": 0.18585619933688044, "learning_rate": 1.3246212998411636e-05, "loss": 0.704, "step": 2866 }, { "epoch": 0.6137379251291109, "grad_norm": 0.18105808829446995, "learning_rate": 1.3233424558090624e-05, "loss": 0.7002, "step": 2867 }, { "epoch": 0.6139519948623264, "grad_norm": 0.17855654258899187, "learning_rate": 1.322063924120844e-05, "loss": 0.695, "step": 2868 }, { "epoch": 0.614166064595542, "grad_norm": 0.18355776755397543, "learning_rate": 1.3207857053666773e-05, "loss": 0.6831, "step": 2869 }, { "epoch": 0.6143801343287576, "grad_norm": 0.1902327712386683, "learning_rate": 1.3195078001365864e-05, "loss": 0.6831, "step": 2870 }, { "epoch": 0.6145942040619732, "grad_norm": 0.17204234073682728, "learning_rate": 1.3182302090204484e-05, "loss": 0.6969, "step": 2871 }, { "epoch": 0.6148082737951888, "grad_norm": 0.17094098670129862, "learning_rate": 1.3169529326079984e-05, "loss": 0.7152, "step": 2872 }, { "epoch": 0.6150223435284043, "grad_norm": 0.2117693784640695, "learning_rate": 1.3156759714888244e-05, "loss": 0.7198, "step": 2873 }, { "epoch": 0.6152364132616199, "grad_norm": 0.17009886451328768, "learning_rate": 1.3143993262523687e-05, "loss": 0.6989, "step": 2874 }, { "epoch": 0.6154504829948355, "grad_norm": 0.1779596592890098, "learning_rate": 1.3131229974879296e-05, "loss": 0.6734, "step": 2875 }, { "epoch": 0.6156645527280512, "grad_norm": 0.17921322382485147, "learning_rate": 1.3118469857846571e-05, "loss": 0.6743, "step": 2876 }, { "epoch": 0.6158786224612668, "grad_norm": 0.17691893997151076, "learning_rate": 1.3105712917315565e-05, "loss": 0.703, "step": 2877 }, { "epoch": 0.6160926921944824, "grad_norm": 0.16694540052827558, "learning_rate": 1.3092959159174851e-05, "loss": 0.6896, "step": 2878 }, { "epoch": 0.616306761927698, "grad_norm": 0.1790079753898222, "learning_rate": 1.3080208589311556e-05, "loss": 0.6901, "step": 2879 }, { "epoch": 0.6165208316609135, "grad_norm": 0.17878494361234837, "learning_rate": 1.3067461213611297e-05, "loss": 0.6518, "step": 2880 }, { "epoch": 0.6167349013941291, "grad_norm": 0.18703174230007547, "learning_rate": 1.3054717037958254e-05, "loss": 0.7004, "step": 2881 }, { "epoch": 0.6169489711273447, "grad_norm": 0.17219382822652507, "learning_rate": 1.3041976068235118e-05, "loss": 0.6819, "step": 2882 }, { "epoch": 0.6171630408605603, "grad_norm": 0.19117048808355613, "learning_rate": 1.3029238310323086e-05, "loss": 0.6767, "step": 2883 }, { "epoch": 0.6173771105937759, "grad_norm": 0.17508629085109303, "learning_rate": 1.3016503770101898e-05, "loss": 0.7224, "step": 2884 }, { "epoch": 0.6175911803269916, "grad_norm": 0.18228936156076614, "learning_rate": 1.3003772453449775e-05, "loss": 0.6842, "step": 2885 }, { "epoch": 0.6178052500602071, "grad_norm": 0.1767314759678005, "learning_rate": 1.2991044366243482e-05, "loss": 0.7206, "step": 2886 }, { "epoch": 0.6180193197934227, "grad_norm": 0.1928813647012719, "learning_rate": 1.2978319514358288e-05, "loss": 0.7231, "step": 2887 }, { "epoch": 0.6182333895266383, "grad_norm": 0.17987899265982177, "learning_rate": 1.2965597903667942e-05, "loss": 0.7104, "step": 2888 }, { "epoch": 0.6184474592598539, "grad_norm": 0.17763217163885667, "learning_rate": 1.2952879540044738e-05, "loss": 0.6771, "step": 2889 }, { "epoch": 0.6186615289930695, "grad_norm": 0.17860682345936435, "learning_rate": 1.2940164429359427e-05, "loss": 0.7033, "step": 2890 }, { "epoch": 0.6188755987262851, "grad_norm": 0.1888543358700002, "learning_rate": 1.2927452577481291e-05, "loss": 0.6885, "step": 2891 }, { "epoch": 0.6190896684595006, "grad_norm": 0.17864857931412279, "learning_rate": 1.2914743990278112e-05, "loss": 0.7343, "step": 2892 }, { "epoch": 0.6193037381927162, "grad_norm": 0.172214273624621, "learning_rate": 1.2902038673616124e-05, "loss": 0.6857, "step": 2893 }, { "epoch": 0.6195178079259319, "grad_norm": 0.18740147605806642, "learning_rate": 1.2889336633360101e-05, "loss": 0.7228, "step": 2894 }, { "epoch": 0.6197318776591475, "grad_norm": 0.17032178588734434, "learning_rate": 1.2876637875373263e-05, "loss": 0.6954, "step": 2895 }, { "epoch": 0.6199459473923631, "grad_norm": 0.2258489905632264, "learning_rate": 1.2863942405517342e-05, "loss": 0.6721, "step": 2896 }, { "epoch": 0.6201600171255787, "grad_norm": 0.17617785733416969, "learning_rate": 1.2851250229652552e-05, "loss": 0.7157, "step": 2897 }, { "epoch": 0.6203740868587942, "grad_norm": 0.18391050906383133, "learning_rate": 1.283856135363756e-05, "loss": 0.7062, "step": 2898 }, { "epoch": 0.6205881565920098, "grad_norm": 0.1691362509596803, "learning_rate": 1.282587578332955e-05, "loss": 0.7037, "step": 2899 }, { "epoch": 0.6208022263252254, "grad_norm": 0.18639845336118688, "learning_rate": 1.281319352458413e-05, "loss": 0.7055, "step": 2900 }, { "epoch": 0.621016296058441, "grad_norm": 0.17607930458619322, "learning_rate": 1.280051458325543e-05, "loss": 0.6917, "step": 2901 }, { "epoch": 0.6212303657916566, "grad_norm": 0.1738789578756451, "learning_rate": 1.2787838965196024e-05, "loss": 0.7173, "step": 2902 }, { "epoch": 0.6214444355248723, "grad_norm": 0.1801430032805143, "learning_rate": 1.2775166676256942e-05, "loss": 0.7097, "step": 2903 }, { "epoch": 0.6216585052580879, "grad_norm": 0.16793363576521397, "learning_rate": 1.2762497722287705e-05, "loss": 0.7335, "step": 2904 }, { "epoch": 0.6218725749913034, "grad_norm": 0.18048841789581735, "learning_rate": 1.2749832109136262e-05, "loss": 0.6932, "step": 2905 }, { "epoch": 0.622086644724519, "grad_norm": 0.16611617081444183, "learning_rate": 1.2737169842649046e-05, "loss": 0.7228, "step": 2906 }, { "epoch": 0.6223007144577346, "grad_norm": 0.17939443935567653, "learning_rate": 1.2724510928670944e-05, "loss": 0.7221, "step": 2907 }, { "epoch": 0.6225147841909502, "grad_norm": 0.1539017358556344, "learning_rate": 1.271185537304527e-05, "loss": 0.7059, "step": 2908 }, { "epoch": 0.6227288539241658, "grad_norm": 0.17268144585332496, "learning_rate": 1.2699203181613822e-05, "loss": 0.6663, "step": 2909 }, { "epoch": 0.6229429236573814, "grad_norm": 0.30648407676549616, "learning_rate": 1.2686554360216814e-05, "loss": 0.71, "step": 2910 }, { "epoch": 0.6231569933905969, "grad_norm": 0.16047629951681336, "learning_rate": 1.2673908914692925e-05, "loss": 0.7051, "step": 2911 }, { "epoch": 0.6233710631238126, "grad_norm": 0.17246879244750765, "learning_rate": 1.2661266850879277e-05, "loss": 0.6907, "step": 2912 }, { "epoch": 0.6235851328570282, "grad_norm": 0.15638788280326743, "learning_rate": 1.264862817461141e-05, "loss": 0.7258, "step": 2913 }, { "epoch": 0.6237992025902438, "grad_norm": 0.17085846723621334, "learning_rate": 1.2635992891723322e-05, "loss": 0.6643, "step": 2914 }, { "epoch": 0.6240132723234594, "grad_norm": 0.16726585249472334, "learning_rate": 1.2623361008047437e-05, "loss": 0.6735, "step": 2915 }, { "epoch": 0.624227342056675, "grad_norm": 0.16704252457088484, "learning_rate": 1.2610732529414605e-05, "loss": 0.6826, "step": 2916 }, { "epoch": 0.6244414117898905, "grad_norm": 0.1763544082611917, "learning_rate": 1.2598107461654111e-05, "loss": 0.726, "step": 2917 }, { "epoch": 0.6246554815231061, "grad_norm": 0.16341085542734637, "learning_rate": 1.2585485810593665e-05, "loss": 0.6876, "step": 2918 }, { "epoch": 0.6248695512563217, "grad_norm": 0.17757472243715544, "learning_rate": 1.2572867582059396e-05, "loss": 0.7041, "step": 2919 }, { "epoch": 0.6250836209895373, "grad_norm": 0.16358487378286496, "learning_rate": 1.256025278187585e-05, "loss": 0.6879, "step": 2920 }, { "epoch": 0.625297690722753, "grad_norm": 0.17619618742073248, "learning_rate": 1.254764141586601e-05, "loss": 0.6778, "step": 2921 }, { "epoch": 0.6255117604559686, "grad_norm": 0.16199914831698128, "learning_rate": 1.2535033489851242e-05, "loss": 0.6679, "step": 2922 }, { "epoch": 0.6257258301891842, "grad_norm": 0.16538948362297123, "learning_rate": 1.2522429009651349e-05, "loss": 0.6995, "step": 2923 }, { "epoch": 0.6259398999223997, "grad_norm": 0.17833038921739666, "learning_rate": 1.2509827981084546e-05, "loss": 0.6696, "step": 2924 }, { "epoch": 0.6261539696556153, "grad_norm": 0.17068080819660977, "learning_rate": 1.249723040996743e-05, "loss": 0.7211, "step": 2925 }, { "epoch": 0.6263680393888309, "grad_norm": 0.18804925573823228, "learning_rate": 1.2484636302115027e-05, "loss": 0.684, "step": 2926 }, { "epoch": 0.6265821091220465, "grad_norm": 0.16950128218439603, "learning_rate": 1.2472045663340744e-05, "loss": 0.7075, "step": 2927 }, { "epoch": 0.6267961788552621, "grad_norm": 0.17381394632953662, "learning_rate": 1.2459458499456401e-05, "loss": 0.6687, "step": 2928 }, { "epoch": 0.6270102485884776, "grad_norm": 0.1596582427830223, "learning_rate": 1.2446874816272216e-05, "loss": 0.7093, "step": 2929 }, { "epoch": 0.6272243183216933, "grad_norm": 0.18923414202347508, "learning_rate": 1.2434294619596785e-05, "loss": 0.7368, "step": 2930 }, { "epoch": 0.6274383880549089, "grad_norm": 0.16394900734918538, "learning_rate": 1.2421717915237114e-05, "loss": 0.7145, "step": 2931 }, { "epoch": 0.6276524577881245, "grad_norm": 0.1719139295670378, "learning_rate": 1.2409144708998574e-05, "loss": 0.6892, "step": 2932 }, { "epoch": 0.6278665275213401, "grad_norm": 0.16953471095261508, "learning_rate": 1.239657500668494e-05, "loss": 0.6911, "step": 2933 }, { "epoch": 0.6280805972545557, "grad_norm": 0.1710169225060896, "learning_rate": 1.2384008814098376e-05, "loss": 0.7124, "step": 2934 }, { "epoch": 0.6282946669877713, "grad_norm": 0.17020357873357828, "learning_rate": 1.2371446137039391e-05, "loss": 0.692, "step": 2935 }, { "epoch": 0.6285087367209868, "grad_norm": 0.15469496832129448, "learning_rate": 1.2358886981306912e-05, "loss": 0.6875, "step": 2936 }, { "epoch": 0.6287228064542024, "grad_norm": 0.17390880002807188, "learning_rate": 1.2346331352698206e-05, "loss": 0.7002, "step": 2937 }, { "epoch": 0.628936876187418, "grad_norm": 0.1642273622259631, "learning_rate": 1.2333779257008937e-05, "loss": 0.7126, "step": 2938 }, { "epoch": 0.6291509459206337, "grad_norm": 0.16678568279818254, "learning_rate": 1.232123070003314e-05, "loss": 0.7264, "step": 2939 }, { "epoch": 0.6293650156538493, "grad_norm": 0.16718697862885099, "learning_rate": 1.2308685687563186e-05, "loss": 0.7394, "step": 2940 }, { "epoch": 0.6295790853870649, "grad_norm": 0.1524015290174883, "learning_rate": 1.2296144225389847e-05, "loss": 0.6749, "step": 2941 }, { "epoch": 0.6297931551202804, "grad_norm": 0.1850826223936878, "learning_rate": 1.2283606319302224e-05, "loss": 0.7031, "step": 2942 }, { "epoch": 0.630007224853496, "grad_norm": 0.2534360107397994, "learning_rate": 1.2271071975087799e-05, "loss": 0.6736, "step": 2943 }, { "epoch": 0.6302212945867116, "grad_norm": 0.17441563231800425, "learning_rate": 1.2258541198532407e-05, "loss": 0.7029, "step": 2944 }, { "epoch": 0.6304353643199272, "grad_norm": 0.23075740781320633, "learning_rate": 1.2246013995420221e-05, "loss": 0.6882, "step": 2945 }, { "epoch": 0.6306494340531428, "grad_norm": 0.16999008530809995, "learning_rate": 1.2233490371533786e-05, "loss": 0.7079, "step": 2946 }, { "epoch": 0.6308635037863584, "grad_norm": 0.2866628942833885, "learning_rate": 1.2220970332653972e-05, "loss": 0.716, "step": 2947 }, { "epoch": 0.631077573519574, "grad_norm": 0.18768464303026294, "learning_rate": 1.2208453884560012e-05, "loss": 0.7176, "step": 2948 }, { "epoch": 0.6312916432527896, "grad_norm": 0.16261756239700065, "learning_rate": 1.2195941033029484e-05, "loss": 0.7224, "step": 2949 }, { "epoch": 0.6315057129860052, "grad_norm": 0.16951721449406248, "learning_rate": 1.2183431783838281e-05, "loss": 0.7019, "step": 2950 }, { "epoch": 0.6317197827192208, "grad_norm": 0.18069610584991522, "learning_rate": 1.2170926142760666e-05, "loss": 0.7204, "step": 2951 }, { "epoch": 0.6319338524524364, "grad_norm": 0.16441193668221984, "learning_rate": 1.2158424115569205e-05, "loss": 0.6933, "step": 2952 }, { "epoch": 0.632147922185652, "grad_norm": 0.9628811658647918, "learning_rate": 1.2145925708034815e-05, "loss": 0.692, "step": 2953 }, { "epoch": 0.6323619919188675, "grad_norm": 0.16391576680482073, "learning_rate": 1.2133430925926753e-05, "loss": 0.6858, "step": 2954 }, { "epoch": 0.6325760616520831, "grad_norm": 0.17916529443289175, "learning_rate": 1.2120939775012564e-05, "loss": 0.6964, "step": 2955 }, { "epoch": 0.6327901313852987, "grad_norm": 0.1605899428782057, "learning_rate": 1.2108452261058156e-05, "loss": 0.6797, "step": 2956 }, { "epoch": 0.6330042011185144, "grad_norm": 0.16429996074127887, "learning_rate": 1.2095968389827739e-05, "loss": 0.7115, "step": 2957 }, { "epoch": 0.63321827085173, "grad_norm": 0.1670154397729422, "learning_rate": 1.2083488167083843e-05, "loss": 0.6816, "step": 2958 }, { "epoch": 0.6334323405849456, "grad_norm": 0.1666621364478631, "learning_rate": 1.2071011598587315e-05, "loss": 0.7101, "step": 2959 }, { "epoch": 0.6336464103181612, "grad_norm": 0.16588852427192485, "learning_rate": 1.2058538690097321e-05, "loss": 0.6677, "step": 2960 }, { "epoch": 0.6338604800513767, "grad_norm": 0.17186574899635917, "learning_rate": 1.2046069447371332e-05, "loss": 0.7184, "step": 2961 }, { "epoch": 0.6340745497845923, "grad_norm": 0.1673490981243338, "learning_rate": 1.203360387616512e-05, "loss": 0.717, "step": 2962 }, { "epoch": 0.6342886195178079, "grad_norm": 0.16743151579505755, "learning_rate": 1.2021141982232785e-05, "loss": 0.6991, "step": 2963 }, { "epoch": 0.6345026892510235, "grad_norm": 0.16168715457808672, "learning_rate": 1.2008683771326697e-05, "loss": 0.69, "step": 2964 }, { "epoch": 0.6347167589842391, "grad_norm": 0.18168044103232656, "learning_rate": 1.199622924919755e-05, "loss": 0.6986, "step": 2965 }, { "epoch": 0.6349308287174548, "grad_norm": 0.16381373979373812, "learning_rate": 1.1983778421594341e-05, "loss": 0.7132, "step": 2966 }, { "epoch": 0.6351448984506703, "grad_norm": 0.16316974024076497, "learning_rate": 1.1971331294264328e-05, "loss": 0.6968, "step": 2967 }, { "epoch": 0.6353589681838859, "grad_norm": 0.16193383262435015, "learning_rate": 1.19588878729531e-05, "loss": 0.6855, "step": 2968 }, { "epoch": 0.6355730379171015, "grad_norm": 0.15944271823824435, "learning_rate": 1.1946448163404503e-05, "loss": 0.6831, "step": 2969 }, { "epoch": 0.6357871076503171, "grad_norm": 0.16073582155028612, "learning_rate": 1.1934012171360692e-05, "loss": 0.7037, "step": 2970 }, { "epoch": 0.6360011773835327, "grad_norm": 0.17182927970614129, "learning_rate": 1.1921579902562103e-05, "loss": 0.7215, "step": 2971 }, { "epoch": 0.6362152471167483, "grad_norm": 0.15885051786833096, "learning_rate": 1.1909151362747437e-05, "loss": 0.7016, "step": 2972 }, { "epoch": 0.6364293168499638, "grad_norm": 0.18147169309209343, "learning_rate": 1.1896726557653699e-05, "loss": 0.7137, "step": 2973 }, { "epoch": 0.6366433865831794, "grad_norm": 0.1671580602611548, "learning_rate": 1.188430549301614e-05, "loss": 0.6932, "step": 2974 }, { "epoch": 0.6368574563163951, "grad_norm": 0.1704553591271745, "learning_rate": 1.187188817456831e-05, "loss": 0.6844, "step": 2975 }, { "epoch": 0.6370715260496107, "grad_norm": 0.16752180285730586, "learning_rate": 1.1859474608042025e-05, "loss": 0.6948, "step": 2976 }, { "epoch": 0.6372855957828263, "grad_norm": 0.1607453595825214, "learning_rate": 1.1847064799167351e-05, "loss": 0.7071, "step": 2977 }, { "epoch": 0.6374996655160419, "grad_norm": 0.1601945790617932, "learning_rate": 1.1834658753672653e-05, "loss": 0.6875, "step": 2978 }, { "epoch": 0.6377137352492575, "grad_norm": 0.16138230498616143, "learning_rate": 1.1822256477284517e-05, "loss": 0.7072, "step": 2979 }, { "epoch": 0.637927804982473, "grad_norm": 0.15835635684726207, "learning_rate": 1.1809857975727819e-05, "loss": 0.6952, "step": 2980 }, { "epoch": 0.6381418747156886, "grad_norm": 0.16026940319647917, "learning_rate": 1.1797463254725696e-05, "loss": 0.689, "step": 2981 }, { "epoch": 0.6383559444489042, "grad_norm": 0.16442983720923277, "learning_rate": 1.1785072319999513e-05, "loss": 0.6809, "step": 2982 }, { "epoch": 0.6385700141821198, "grad_norm": 0.16612737878971637, "learning_rate": 1.1772685177268916e-05, "loss": 0.6945, "step": 2983 }, { "epoch": 0.6387840839153354, "grad_norm": 0.17809310058237487, "learning_rate": 1.1760301832251773e-05, "loss": 0.7226, "step": 2984 }, { "epoch": 0.6389981536485511, "grad_norm": 0.16395607933048745, "learning_rate": 1.174792229066422e-05, "loss": 0.6691, "step": 2985 }, { "epoch": 0.6392122233817666, "grad_norm": 0.1637606976705473, "learning_rate": 1.173554655822064e-05, "loss": 0.6909, "step": 2986 }, { "epoch": 0.6394262931149822, "grad_norm": 0.163746544450305, "learning_rate": 1.172317464063363e-05, "loss": 0.695, "step": 2987 }, { "epoch": 0.6396403628481978, "grad_norm": 0.1854282152064345, "learning_rate": 1.1710806543614066e-05, "loss": 0.705, "step": 2988 }, { "epoch": 0.6398544325814134, "grad_norm": 0.17679673871559604, "learning_rate": 1.1698442272871018e-05, "loss": 0.7063, "step": 2989 }, { "epoch": 0.640068502314629, "grad_norm": 0.19045542334904472, "learning_rate": 1.168608183411182e-05, "loss": 0.684, "step": 2990 }, { "epoch": 0.6402825720478446, "grad_norm": 0.16123138168999393, "learning_rate": 1.1673725233042033e-05, "loss": 0.6965, "step": 2991 }, { "epoch": 0.6404966417810601, "grad_norm": 0.1714294989618222, "learning_rate": 1.166137247536543e-05, "loss": 0.7443, "step": 2992 }, { "epoch": 0.6407107115142757, "grad_norm": 0.18048674301266115, "learning_rate": 1.1649023566784039e-05, "loss": 0.7048, "step": 2993 }, { "epoch": 0.6409247812474914, "grad_norm": 0.1560685331579729, "learning_rate": 1.1636678512998074e-05, "loss": 0.6938, "step": 2994 }, { "epoch": 0.641138850980707, "grad_norm": 0.18494732591236804, "learning_rate": 1.1624337319705995e-05, "loss": 0.6826, "step": 2995 }, { "epoch": 0.6413529207139226, "grad_norm": 0.16380366820778122, "learning_rate": 1.1611999992604491e-05, "loss": 0.7013, "step": 2996 }, { "epoch": 0.6415669904471382, "grad_norm": 0.1651962122147673, "learning_rate": 1.159966653738842e-05, "loss": 0.7049, "step": 2997 }, { "epoch": 0.6417810601803537, "grad_norm": 0.1662693680476804, "learning_rate": 1.1587336959750912e-05, "loss": 0.7223, "step": 2998 }, { "epoch": 0.6419951299135693, "grad_norm": 0.23710229772752486, "learning_rate": 1.1575011265383251e-05, "loss": 0.7146, "step": 2999 }, { "epoch": 0.6422091996467849, "grad_norm": 0.16185101214389352, "learning_rate": 1.156268945997498e-05, "loss": 0.7379, "step": 3000 }, { "epoch": 0.6424232693800005, "grad_norm": 0.16935471526485132, "learning_rate": 1.1550371549213797e-05, "loss": 0.7042, "step": 3001 }, { "epoch": 0.6426373391132161, "grad_norm": 0.17068356974370424, "learning_rate": 1.1538057538785638e-05, "loss": 0.7292, "step": 3002 }, { "epoch": 0.6428514088464318, "grad_norm": 0.16082713244281638, "learning_rate": 1.152574743437464e-05, "loss": 0.6771, "step": 3003 }, { "epoch": 0.6430654785796474, "grad_norm": 0.17464502471767457, "learning_rate": 1.1513441241663105e-05, "loss": 0.6896, "step": 3004 }, { "epoch": 0.6432795483128629, "grad_norm": 0.16351997860068648, "learning_rate": 1.150113896633157e-05, "loss": 0.7032, "step": 3005 }, { "epoch": 0.6434936180460785, "grad_norm": 0.1690926534684481, "learning_rate": 1.1488840614058716e-05, "loss": 0.6733, "step": 3006 }, { "epoch": 0.6437076877792941, "grad_norm": 0.1784838317003333, "learning_rate": 1.1476546190521456e-05, "loss": 0.7136, "step": 3007 }, { "epoch": 0.6439217575125097, "grad_norm": 0.1785785968046288, "learning_rate": 1.146425570139488e-05, "loss": 0.7067, "step": 3008 }, { "epoch": 0.6441358272457253, "grad_norm": 0.1713124320675924, "learning_rate": 1.145196915235224e-05, "loss": 0.694, "step": 3009 }, { "epoch": 0.6443498969789409, "grad_norm": 0.15675533806258324, "learning_rate": 1.1439686549064996e-05, "loss": 0.6652, "step": 3010 }, { "epoch": 0.6445639667121564, "grad_norm": 0.1696309691171974, "learning_rate": 1.1427407897202767e-05, "loss": 0.7052, "step": 3011 }, { "epoch": 0.6447780364453721, "grad_norm": 0.16317109100786786, "learning_rate": 1.1415133202433357e-05, "loss": 0.6714, "step": 3012 }, { "epoch": 0.6449921061785877, "grad_norm": 0.16253351295127938, "learning_rate": 1.1402862470422753e-05, "loss": 0.6907, "step": 3013 }, { "epoch": 0.6452061759118033, "grad_norm": 0.1729193360632724, "learning_rate": 1.139059570683509e-05, "loss": 0.7118, "step": 3014 }, { "epoch": 0.6454202456450189, "grad_norm": 0.1702780987278593, "learning_rate": 1.1378332917332696e-05, "loss": 0.6995, "step": 3015 }, { "epoch": 0.6456343153782345, "grad_norm": 0.17896418896916347, "learning_rate": 1.1366074107576035e-05, "loss": 0.7024, "step": 3016 }, { "epoch": 0.64584838511145, "grad_norm": 0.16240683657758037, "learning_rate": 1.1353819283223762e-05, "loss": 0.7202, "step": 3017 }, { "epoch": 0.6460624548446656, "grad_norm": 0.16927526374792506, "learning_rate": 1.1341568449932688e-05, "loss": 0.7099, "step": 3018 }, { "epoch": 0.6462765245778812, "grad_norm": 0.1600543302235835, "learning_rate": 1.132932161335776e-05, "loss": 0.72, "step": 3019 }, { "epoch": 0.6464905943110968, "grad_norm": 0.16347278785357336, "learning_rate": 1.131707877915211e-05, "loss": 0.7024, "step": 3020 }, { "epoch": 0.6467046640443125, "grad_norm": 0.17352557510729236, "learning_rate": 1.1304839952966993e-05, "loss": 0.7082, "step": 3021 }, { "epoch": 0.6469187337775281, "grad_norm": 0.20927375224275424, "learning_rate": 1.1292605140451838e-05, "loss": 0.6843, "step": 3022 }, { "epoch": 0.6471328035107436, "grad_norm": 0.1704525197454697, "learning_rate": 1.128037434725422e-05, "loss": 0.6987, "step": 3023 }, { "epoch": 0.6473468732439592, "grad_norm": 0.23404052924423965, "learning_rate": 1.126814757901983e-05, "loss": 0.703, "step": 3024 }, { "epoch": 0.6475609429771748, "grad_norm": 0.16378192029244704, "learning_rate": 1.1255924841392542e-05, "loss": 0.6913, "step": 3025 }, { "epoch": 0.6477750127103904, "grad_norm": 0.17260696387702695, "learning_rate": 1.1243706140014333e-05, "loss": 0.7071, "step": 3026 }, { "epoch": 0.647989082443606, "grad_norm": 0.15840289277180297, "learning_rate": 1.1231491480525341e-05, "loss": 0.7295, "step": 3027 }, { "epoch": 0.6482031521768216, "grad_norm": 0.1710246421139846, "learning_rate": 1.1219280868563838e-05, "loss": 0.7092, "step": 3028 }, { "epoch": 0.6484172219100371, "grad_norm": 0.16019620902751636, "learning_rate": 1.1207074309766204e-05, "loss": 0.7031, "step": 3029 }, { "epoch": 0.6486312916432528, "grad_norm": 0.15480753153701207, "learning_rate": 1.1194871809766981e-05, "loss": 0.6942, "step": 3030 }, { "epoch": 0.6488453613764684, "grad_norm": 0.15943952059365776, "learning_rate": 1.1182673374198805e-05, "loss": 0.7083, "step": 3031 }, { "epoch": 0.649059431109684, "grad_norm": 0.16123658316260847, "learning_rate": 1.1170479008692457e-05, "loss": 0.7095, "step": 3032 }, { "epoch": 0.6492735008428996, "grad_norm": 0.15275844768832486, "learning_rate": 1.1158288718876844e-05, "loss": 0.6771, "step": 3033 }, { "epoch": 0.6494875705761152, "grad_norm": 0.15629234469138292, "learning_rate": 1.1146102510378964e-05, "loss": 0.705, "step": 3034 }, { "epoch": 0.6497016403093308, "grad_norm": 0.1527165321738287, "learning_rate": 1.1133920388823967e-05, "loss": 0.6864, "step": 3035 }, { "epoch": 0.6499157100425463, "grad_norm": 0.15532618501401466, "learning_rate": 1.1121742359835079e-05, "loss": 0.6703, "step": 3036 }, { "epoch": 0.6501297797757619, "grad_norm": 0.14565902138468276, "learning_rate": 1.1109568429033669e-05, "loss": 0.6715, "step": 3037 }, { "epoch": 0.6503438495089775, "grad_norm": 0.16350793160863714, "learning_rate": 1.1097398602039202e-05, "loss": 0.6857, "step": 3038 }, { "epoch": 0.6505579192421932, "grad_norm": 0.15971597977022928, "learning_rate": 1.1085232884469236e-05, "loss": 0.7233, "step": 3039 }, { "epoch": 0.6507719889754088, "grad_norm": 0.16056666146955634, "learning_rate": 1.107307128193946e-05, "loss": 0.7156, "step": 3040 }, { "epoch": 0.6509860587086244, "grad_norm": 0.15708115566962028, "learning_rate": 1.106091380006363e-05, "loss": 0.6877, "step": 3041 }, { "epoch": 0.6512001284418399, "grad_norm": 0.16039488460755236, "learning_rate": 1.1048760444453636e-05, "loss": 0.7052, "step": 3042 }, { "epoch": 0.6514141981750555, "grad_norm": 0.16257983769300854, "learning_rate": 1.1036611220719426e-05, "loss": 0.7038, "step": 3043 }, { "epoch": 0.6516282679082711, "grad_norm": 0.15917878917205924, "learning_rate": 1.102446613446907e-05, "loss": 0.6955, "step": 3044 }, { "epoch": 0.6518423376414867, "grad_norm": 0.15910433212899805, "learning_rate": 1.1012325191308721e-05, "loss": 0.7029, "step": 3045 }, { "epoch": 0.6520564073747023, "grad_norm": 0.16049647340918968, "learning_rate": 1.1000188396842604e-05, "loss": 0.6945, "step": 3046 }, { "epoch": 0.6522704771079179, "grad_norm": 0.1574848396997355, "learning_rate": 1.0988055756673057e-05, "loss": 0.7204, "step": 3047 }, { "epoch": 0.6524845468411336, "grad_norm": 0.17062483486919586, "learning_rate": 1.0975927276400466e-05, "loss": 0.6952, "step": 3048 }, { "epoch": 0.6526986165743491, "grad_norm": 0.15999655958826292, "learning_rate": 1.0963802961623329e-05, "loss": 0.7188, "step": 3049 }, { "epoch": 0.6529126863075647, "grad_norm": 0.16372948371536275, "learning_rate": 1.0951682817938209e-05, "loss": 0.7047, "step": 3050 }, { "epoch": 0.6531267560407803, "grad_norm": 0.16804253558519006, "learning_rate": 1.0939566850939727e-05, "loss": 0.7231, "step": 3051 }, { "epoch": 0.6533408257739959, "grad_norm": 0.1637492977611271, "learning_rate": 1.092745506622061e-05, "loss": 0.6955, "step": 3052 }, { "epoch": 0.6535548955072115, "grad_norm": 0.15823061897236976, "learning_rate": 1.091534746937162e-05, "loss": 0.7004, "step": 3053 }, { "epoch": 0.653768965240427, "grad_norm": 0.16666509369899177, "learning_rate": 1.0903244065981608e-05, "loss": 0.6903, "step": 3054 }, { "epoch": 0.6539830349736426, "grad_norm": 0.160025184887067, "learning_rate": 1.0891144861637488e-05, "loss": 0.6899, "step": 3055 }, { "epoch": 0.6541971047068582, "grad_norm": 0.15938454296733964, "learning_rate": 1.087904986192422e-05, "loss": 0.7026, "step": 3056 }, { "epoch": 0.6544111744400739, "grad_norm": 0.16917273847620276, "learning_rate": 1.0866959072424838e-05, "loss": 0.6996, "step": 3057 }, { "epoch": 0.6546252441732895, "grad_norm": 0.1533162588092453, "learning_rate": 1.0854872498720436e-05, "loss": 0.6947, "step": 3058 }, { "epoch": 0.6548393139065051, "grad_norm": 0.15399321658021684, "learning_rate": 1.0842790146390144e-05, "loss": 0.7034, "step": 3059 }, { "epoch": 0.6550533836397207, "grad_norm": 0.16290717373727154, "learning_rate": 1.0830712021011154e-05, "loss": 0.6889, "step": 3060 }, { "epoch": 0.6552674533729362, "grad_norm": 0.15982311858370116, "learning_rate": 1.081863812815872e-05, "loss": 0.6897, "step": 3061 }, { "epoch": 0.6554815231061518, "grad_norm": 0.15789580043324297, "learning_rate": 1.080656847340611e-05, "loss": 0.6998, "step": 3062 }, { "epoch": 0.6556955928393674, "grad_norm": 0.1673228222261171, "learning_rate": 1.0794503062324664e-05, "loss": 0.6905, "step": 3063 }, { "epoch": 0.655909662572583, "grad_norm": 0.16043079916395062, "learning_rate": 1.078244190048376e-05, "loss": 0.7073, "step": 3064 }, { "epoch": 0.6561237323057986, "grad_norm": 0.15737028797569128, "learning_rate": 1.0770384993450796e-05, "loss": 0.6915, "step": 3065 }, { "epoch": 0.6563378020390143, "grad_norm": 0.15954350175409163, "learning_rate": 1.0758332346791219e-05, "loss": 0.6979, "step": 3066 }, { "epoch": 0.6565518717722298, "grad_norm": 0.15346727175746847, "learning_rate": 1.0746283966068525e-05, "loss": 0.6764, "step": 3067 }, { "epoch": 0.6567659415054454, "grad_norm": 0.15789737841353488, "learning_rate": 1.0734239856844204e-05, "loss": 0.685, "step": 3068 }, { "epoch": 0.656980011238661, "grad_norm": 0.16314731016209819, "learning_rate": 1.07222000246778e-05, "loss": 0.7213, "step": 3069 }, { "epoch": 0.6571940809718766, "grad_norm": 0.1560279985031777, "learning_rate": 1.0710164475126894e-05, "loss": 0.6879, "step": 3070 }, { "epoch": 0.6574081507050922, "grad_norm": 0.16051110359534035, "learning_rate": 1.069813321374705e-05, "loss": 0.6985, "step": 3071 }, { "epoch": 0.6576222204383078, "grad_norm": 0.16599332379590576, "learning_rate": 1.0686106246091895e-05, "loss": 0.7206, "step": 3072 }, { "epoch": 0.6578362901715233, "grad_norm": 0.161663267904669, "learning_rate": 1.0674083577713037e-05, "loss": 0.666, "step": 3073 }, { "epoch": 0.6580503599047389, "grad_norm": 0.16341807962206745, "learning_rate": 1.0662065214160131e-05, "loss": 0.6873, "step": 3074 }, { "epoch": 0.6582644296379546, "grad_norm": 0.16331829109326712, "learning_rate": 1.0650051160980835e-05, "loss": 0.6894, "step": 3075 }, { "epoch": 0.6584784993711702, "grad_norm": 0.15664775482017015, "learning_rate": 1.06380414237208e-05, "loss": 0.6825, "step": 3076 }, { "epoch": 0.6586925691043858, "grad_norm": 0.15899760291435164, "learning_rate": 1.0626036007923712e-05, "loss": 0.6679, "step": 3077 }, { "epoch": 0.6589066388376014, "grad_norm": 0.16818363978877052, "learning_rate": 1.061403491913124e-05, "loss": 0.7008, "step": 3078 }, { "epoch": 0.659120708570817, "grad_norm": 0.15140120050036712, "learning_rate": 1.0602038162883064e-05, "loss": 0.7001, "step": 3079 }, { "epoch": 0.6593347783040325, "grad_norm": 0.16143170483265978, "learning_rate": 1.0590045744716875e-05, "loss": 0.686, "step": 3080 }, { "epoch": 0.6595488480372481, "grad_norm": 0.15927642747166015, "learning_rate": 1.0578057670168338e-05, "loss": 0.6738, "step": 3081 }, { "epoch": 0.6597629177704637, "grad_norm": 0.15323202609648254, "learning_rate": 1.0566073944771142e-05, "loss": 0.6865, "step": 3082 }, { "epoch": 0.6599769875036793, "grad_norm": 0.17252273612411162, "learning_rate": 1.0554094574056935e-05, "loss": 0.689, "step": 3083 }, { "epoch": 0.660191057236895, "grad_norm": 0.16141619589301429, "learning_rate": 1.0542119563555388e-05, "loss": 0.6969, "step": 3084 }, { "epoch": 0.6604051269701106, "grad_norm": 0.1756548012941864, "learning_rate": 1.0530148918794131e-05, "loss": 0.6843, "step": 3085 }, { "epoch": 0.6606191967033261, "grad_norm": 0.15636751889672348, "learning_rate": 1.0518182645298798e-05, "loss": 0.7057, "step": 3086 }, { "epoch": 0.6608332664365417, "grad_norm": 0.16599761739994906, "learning_rate": 1.0506220748593003e-05, "loss": 0.7073, "step": 3087 }, { "epoch": 0.6610473361697573, "grad_norm": 0.16266530492319733, "learning_rate": 1.0494263234198328e-05, "loss": 0.7037, "step": 3088 }, { "epoch": 0.6612614059029729, "grad_norm": 0.18291412199766832, "learning_rate": 1.0482310107634349e-05, "loss": 0.7001, "step": 3089 }, { "epoch": 0.6614754756361885, "grad_norm": 0.15793773921931004, "learning_rate": 1.0470361374418592e-05, "loss": 0.6884, "step": 3090 }, { "epoch": 0.661689545369404, "grad_norm": 0.16229554901334134, "learning_rate": 1.0458417040066582e-05, "loss": 0.7033, "step": 3091 }, { "epoch": 0.6619036151026196, "grad_norm": 0.18626607373816426, "learning_rate": 1.0446477110091809e-05, "loss": 0.679, "step": 3092 }, { "epoch": 0.6621176848358352, "grad_norm": 0.15838847604553272, "learning_rate": 1.0434541590005702e-05, "loss": 0.7191, "step": 3093 }, { "epoch": 0.6623317545690509, "grad_norm": 0.16304191574001972, "learning_rate": 1.0422610485317696e-05, "loss": 0.6702, "step": 3094 }, { "epoch": 0.6625458243022665, "grad_norm": 0.16061600809628618, "learning_rate": 1.041068380153515e-05, "loss": 0.6856, "step": 3095 }, { "epoch": 0.6627598940354821, "grad_norm": 0.14957425317487621, "learning_rate": 1.0398761544163411e-05, "loss": 0.6799, "step": 3096 }, { "epoch": 0.6629739637686977, "grad_norm": 0.19783111977513954, "learning_rate": 1.038684371870577e-05, "loss": 0.7037, "step": 3097 }, { "epoch": 0.6631880335019132, "grad_norm": 0.16150087091595297, "learning_rate": 1.0374930330663467e-05, "loss": 0.7072, "step": 3098 }, { "epoch": 0.6634021032351288, "grad_norm": 0.16981422418322475, "learning_rate": 1.0363021385535709e-05, "loss": 0.7223, "step": 3099 }, { "epoch": 0.6636161729683444, "grad_norm": 0.28908513988228524, "learning_rate": 1.0351116888819632e-05, "loss": 0.6844, "step": 3100 }, { "epoch": 0.66383024270156, "grad_norm": 0.1607792246483595, "learning_rate": 1.0339216846010336e-05, "loss": 0.6907, "step": 3101 }, { "epoch": 0.6640443124347756, "grad_norm": 0.16236528757651575, "learning_rate": 1.0327321262600867e-05, "loss": 0.7155, "step": 3102 }, { "epoch": 0.6642583821679913, "grad_norm": 0.16303089783476565, "learning_rate": 1.0315430144082188e-05, "loss": 0.7112, "step": 3103 }, { "epoch": 0.6644724519012069, "grad_norm": 0.16116795157622021, "learning_rate": 1.0303543495943233e-05, "loss": 0.6892, "step": 3104 }, { "epoch": 0.6646865216344224, "grad_norm": 0.1623009662404361, "learning_rate": 1.0291661323670845e-05, "loss": 0.6864, "step": 3105 }, { "epoch": 0.664900591367638, "grad_norm": 0.1574929762562193, "learning_rate": 1.0279783632749818e-05, "loss": 0.6661, "step": 3106 }, { "epoch": 0.6651146611008536, "grad_norm": 0.16442713608472861, "learning_rate": 1.0267910428662878e-05, "loss": 0.7152, "step": 3107 }, { "epoch": 0.6653287308340692, "grad_norm": 0.1611659903693138, "learning_rate": 1.0256041716890662e-05, "loss": 0.6974, "step": 3108 }, { "epoch": 0.6655428005672848, "grad_norm": 0.15415718884072935, "learning_rate": 1.0244177502911762e-05, "loss": 0.7233, "step": 3109 }, { "epoch": 0.6657568703005003, "grad_norm": 0.15663701645788064, "learning_rate": 1.0232317792202658e-05, "loss": 0.7062, "step": 3110 }, { "epoch": 0.6659709400337159, "grad_norm": 0.1660121352485925, "learning_rate": 1.0220462590237781e-05, "loss": 0.7041, "step": 3111 }, { "epoch": 0.6661850097669316, "grad_norm": 0.1493054827494839, "learning_rate": 1.0208611902489478e-05, "loss": 0.684, "step": 3112 }, { "epoch": 0.6663990795001472, "grad_norm": 0.16242277806578512, "learning_rate": 1.0196765734427992e-05, "loss": 0.6799, "step": 3113 }, { "epoch": 0.6666131492333628, "grad_norm": 0.15561334950737316, "learning_rate": 1.0184924091521502e-05, "loss": 0.703, "step": 3114 }, { "epoch": 0.6668272189665784, "grad_norm": 0.1579829671750343, "learning_rate": 1.0173086979236077e-05, "loss": 0.7197, "step": 3115 }, { "epoch": 0.667041288699794, "grad_norm": 0.1533501087623317, "learning_rate": 1.0161254403035711e-05, "loss": 0.6914, "step": 3116 }, { "epoch": 0.6672553584330095, "grad_norm": 0.15860615118073362, "learning_rate": 1.0149426368382316e-05, "loss": 0.7257, "step": 3117 }, { "epoch": 0.6674694281662251, "grad_norm": 0.15062396088380706, "learning_rate": 1.0137602880735665e-05, "loss": 0.6871, "step": 3118 }, { "epoch": 0.6676834978994407, "grad_norm": 0.15271449812538404, "learning_rate": 1.0125783945553478e-05, "loss": 0.6857, "step": 3119 }, { "epoch": 0.6678975676326563, "grad_norm": 0.1550816402235058, "learning_rate": 1.011396956829134e-05, "loss": 0.6688, "step": 3120 }, { "epoch": 0.668111637365872, "grad_norm": 0.1569722840211998, "learning_rate": 1.0102159754402751e-05, "loss": 0.6725, "step": 3121 }, { "epoch": 0.6683257070990876, "grad_norm": 0.15539662546724703, "learning_rate": 1.009035450933911e-05, "loss": 0.7149, "step": 3122 }, { "epoch": 0.6685397768323031, "grad_norm": 0.1607548003730955, "learning_rate": 1.0078553838549679e-05, "loss": 0.6999, "step": 3123 }, { "epoch": 0.6687538465655187, "grad_norm": 0.15180709022372793, "learning_rate": 1.006675774748164e-05, "loss": 0.6639, "step": 3124 }, { "epoch": 0.6689679162987343, "grad_norm": 0.15921001860779627, "learning_rate": 1.0054966241580036e-05, "loss": 0.6822, "step": 3125 }, { "epoch": 0.6691819860319499, "grad_norm": 0.1681690814035098, "learning_rate": 1.0043179326287818e-05, "loss": 0.7023, "step": 3126 }, { "epoch": 0.6693960557651655, "grad_norm": 0.15606853472344315, "learning_rate": 1.0031397007045785e-05, "loss": 0.7039, "step": 3127 }, { "epoch": 0.6696101254983811, "grad_norm": 0.16079448082099368, "learning_rate": 1.0019619289292648e-05, "loss": 0.7082, "step": 3128 }, { "epoch": 0.6698241952315966, "grad_norm": 0.15619147163508657, "learning_rate": 1.0007846178464984e-05, "loss": 0.6797, "step": 3129 }, { "epoch": 0.6700382649648123, "grad_norm": 0.16050429543267425, "learning_rate": 9.996077679997225e-06, "loss": 0.6909, "step": 3130 }, { "epoch": 0.6702523346980279, "grad_norm": 0.16513716623223665, "learning_rate": 9.984313799321705e-06, "loss": 0.7146, "step": 3131 }, { "epoch": 0.6704664044312435, "grad_norm": 0.16168181479241397, "learning_rate": 9.97255454186859e-06, "loss": 0.673, "step": 3132 }, { "epoch": 0.6706804741644591, "grad_norm": 0.1644963549644342, "learning_rate": 9.960799913065945e-06, "loss": 0.6998, "step": 3133 }, { "epoch": 0.6708945438976747, "grad_norm": 0.1685040716482276, "learning_rate": 9.94904991833969e-06, "loss": 0.6878, "step": 3134 }, { "epoch": 0.6711086136308902, "grad_norm": 0.1769403242524765, "learning_rate": 9.937304563113588e-06, "loss": 0.7104, "step": 3135 }, { "epoch": 0.6713226833641058, "grad_norm": 0.15623683196959876, "learning_rate": 9.925563852809277e-06, "loss": 0.6956, "step": 3136 }, { "epoch": 0.6715367530973214, "grad_norm": 0.17605621966527873, "learning_rate": 9.913827792846256e-06, "loss": 0.7108, "step": 3137 }, { "epoch": 0.671750822830537, "grad_norm": 0.16534408460196132, "learning_rate": 9.902096388641857e-06, "loss": 0.6905, "step": 3138 }, { "epoch": 0.6719648925637527, "grad_norm": 0.1522216941277003, "learning_rate": 9.890369645611287e-06, "loss": 0.6907, "step": 3139 }, { "epoch": 0.6721789622969683, "grad_norm": 0.17269849774579715, "learning_rate": 9.878647569167574e-06, "loss": 0.7322, "step": 3140 }, { "epoch": 0.6723930320301839, "grad_norm": 0.1663999999530521, "learning_rate": 9.866930164721615e-06, "loss": 0.7128, "step": 3141 }, { "epoch": 0.6726071017633994, "grad_norm": 0.15884796574635146, "learning_rate": 9.855217437682153e-06, "loss": 0.7037, "step": 3142 }, { "epoch": 0.672821171496615, "grad_norm": 0.1618128699996525, "learning_rate": 9.84350939345574e-06, "loss": 0.6869, "step": 3143 }, { "epoch": 0.6730352412298306, "grad_norm": 0.18494989120822972, "learning_rate": 9.831806037446799e-06, "loss": 0.6954, "step": 3144 }, { "epoch": 0.6732493109630462, "grad_norm": 0.16320847668955887, "learning_rate": 9.820107375057587e-06, "loss": 0.6853, "step": 3145 }, { "epoch": 0.6734633806962618, "grad_norm": 0.15345569238811857, "learning_rate": 9.808413411688166e-06, "loss": 0.7107, "step": 3146 }, { "epoch": 0.6736774504294774, "grad_norm": 0.17216020080991495, "learning_rate": 9.796724152736459e-06, "loss": 0.7337, "step": 3147 }, { "epoch": 0.673891520162693, "grad_norm": 0.15632077929741134, "learning_rate": 9.785039603598211e-06, "loss": 0.739, "step": 3148 }, { "epoch": 0.6741055898959086, "grad_norm": 0.15844641185229077, "learning_rate": 9.773359769666979e-06, "loss": 0.7148, "step": 3149 }, { "epoch": 0.6743196596291242, "grad_norm": 0.1863008428946735, "learning_rate": 9.761684656334153e-06, "loss": 0.6896, "step": 3150 }, { "epoch": 0.6745337293623398, "grad_norm": 0.15964610498381, "learning_rate": 9.75001426898896e-06, "loss": 0.6856, "step": 3151 }, { "epoch": 0.6747477990955554, "grad_norm": 0.16358660961328506, "learning_rate": 9.738348613018404e-06, "loss": 0.7097, "step": 3152 }, { "epoch": 0.674961868828771, "grad_norm": 0.37476799614030293, "learning_rate": 9.726687693807346e-06, "loss": 0.6836, "step": 3153 }, { "epoch": 0.6751759385619865, "grad_norm": 0.1561915002792, "learning_rate": 9.715031516738449e-06, "loss": 0.7144, "step": 3154 }, { "epoch": 0.6753900082952021, "grad_norm": 0.15804131303086497, "learning_rate": 9.703380087192172e-06, "loss": 0.664, "step": 3155 }, { "epoch": 0.6756040780284177, "grad_norm": 0.1642980645109622, "learning_rate": 9.691733410546803e-06, "loss": 0.673, "step": 3156 }, { "epoch": 0.6758181477616334, "grad_norm": 0.15965839899632348, "learning_rate": 9.680091492178414e-06, "loss": 0.6993, "step": 3157 }, { "epoch": 0.676032217494849, "grad_norm": 0.17361081052545546, "learning_rate": 9.668454337460903e-06, "loss": 0.6821, "step": 3158 }, { "epoch": 0.6762462872280646, "grad_norm": 0.15442536683044944, "learning_rate": 9.65682195176596e-06, "loss": 0.6816, "step": 3159 }, { "epoch": 0.6764603569612802, "grad_norm": 0.2751371714063181, "learning_rate": 9.645194340463066e-06, "loss": 0.7192, "step": 3160 }, { "epoch": 0.6766744266944957, "grad_norm": 0.16031316581342847, "learning_rate": 9.633571508919517e-06, "loss": 0.6663, "step": 3161 }, { "epoch": 0.6768884964277113, "grad_norm": 0.1605092561481887, "learning_rate": 9.621953462500376e-06, "loss": 0.7064, "step": 3162 }, { "epoch": 0.6771025661609269, "grad_norm": 0.15314851988646203, "learning_rate": 9.610340206568516e-06, "loss": 0.6978, "step": 3163 }, { "epoch": 0.6773166358941425, "grad_norm": 0.16377340468062307, "learning_rate": 9.598731746484609e-06, "loss": 0.708, "step": 3164 }, { "epoch": 0.6775307056273581, "grad_norm": 0.155316449530693, "learning_rate": 9.587128087607076e-06, "loss": 0.6815, "step": 3165 }, { "epoch": 0.6777447753605738, "grad_norm": 0.16467278791151196, "learning_rate": 9.575529235292167e-06, "loss": 0.6884, "step": 3166 }, { "epoch": 0.6779588450937893, "grad_norm": 0.1551656837635727, "learning_rate": 9.563935194893873e-06, "loss": 0.6763, "step": 3167 }, { "epoch": 0.6781729148270049, "grad_norm": 0.1626814061488549, "learning_rate": 9.552345971763995e-06, "loss": 0.6747, "step": 3168 }, { "epoch": 0.6783869845602205, "grad_norm": 0.1584305609053603, "learning_rate": 9.540761571252081e-06, "loss": 0.6853, "step": 3169 }, { "epoch": 0.6786010542934361, "grad_norm": 0.1627757079442043, "learning_rate": 9.529181998705484e-06, "loss": 0.6885, "step": 3170 }, { "epoch": 0.6788151240266517, "grad_norm": 0.15702841227085676, "learning_rate": 9.517607259469315e-06, "loss": 0.6986, "step": 3171 }, { "epoch": 0.6790291937598673, "grad_norm": 0.15514903257969306, "learning_rate": 9.506037358886438e-06, "loss": 0.7051, "step": 3172 }, { "epoch": 0.6792432634930828, "grad_norm": 0.1588871422175614, "learning_rate": 9.494472302297513e-06, "loss": 0.6797, "step": 3173 }, { "epoch": 0.6794573332262984, "grad_norm": 0.15465620509642436, "learning_rate": 9.482912095040935e-06, "loss": 0.7042, "step": 3174 }, { "epoch": 0.6796714029595141, "grad_norm": 0.15477902193079654, "learning_rate": 9.471356742452881e-06, "loss": 0.7312, "step": 3175 }, { "epoch": 0.6798854726927297, "grad_norm": 0.154888446804333, "learning_rate": 9.459806249867291e-06, "loss": 0.6874, "step": 3176 }, { "epoch": 0.6800995424259453, "grad_norm": 0.15403428259120724, "learning_rate": 9.448260622615833e-06, "loss": 0.6899, "step": 3177 }, { "epoch": 0.6803136121591609, "grad_norm": 0.15486401377898562, "learning_rate": 9.436719866027964e-06, "loss": 0.7176, "step": 3178 }, { "epoch": 0.6805276818923764, "grad_norm": 0.16917034306985979, "learning_rate": 9.42518398543086e-06, "loss": 0.692, "step": 3179 }, { "epoch": 0.680741751625592, "grad_norm": 0.15405636111575113, "learning_rate": 9.413652986149469e-06, "loss": 0.7086, "step": 3180 }, { "epoch": 0.6809558213588076, "grad_norm": 0.16976708114096917, "learning_rate": 9.40212687350649e-06, "loss": 0.6965, "step": 3181 }, { "epoch": 0.6811698910920232, "grad_norm": 0.15729070246954535, "learning_rate": 9.390605652822338e-06, "loss": 0.6783, "step": 3182 }, { "epoch": 0.6813839608252388, "grad_norm": 0.15539637254515223, "learning_rate": 9.3790893294152e-06, "loss": 0.7329, "step": 3183 }, { "epoch": 0.6815980305584545, "grad_norm": 0.15200131940661787, "learning_rate": 9.367577908600982e-06, "loss": 0.7162, "step": 3184 }, { "epoch": 0.68181210029167, "grad_norm": 0.15464749938535283, "learning_rate": 9.356071395693336e-06, "loss": 0.6939, "step": 3185 }, { "epoch": 0.6820261700248856, "grad_norm": 0.15238123162748352, "learning_rate": 9.344569796003658e-06, "loss": 0.7004, "step": 3186 }, { "epoch": 0.6822402397581012, "grad_norm": 0.14639624950887936, "learning_rate": 9.333073114841047e-06, "loss": 0.6769, "step": 3187 }, { "epoch": 0.6824543094913168, "grad_norm": 0.15527939673346694, "learning_rate": 9.321581357512368e-06, "loss": 0.6919, "step": 3188 }, { "epoch": 0.6826683792245324, "grad_norm": 0.15415946644278458, "learning_rate": 9.31009452932218e-06, "loss": 0.6889, "step": 3189 }, { "epoch": 0.682882448957748, "grad_norm": 0.14968837572851434, "learning_rate": 9.298612635572789e-06, "loss": 0.6933, "step": 3190 }, { "epoch": 0.6830965186909636, "grad_norm": 0.16013721408530462, "learning_rate": 9.287135681564221e-06, "loss": 0.6753, "step": 3191 }, { "epoch": 0.6833105884241791, "grad_norm": 0.17109248256134058, "learning_rate": 9.275663672594207e-06, "loss": 0.6925, "step": 3192 }, { "epoch": 0.6835246581573948, "grad_norm": 0.2166480431249797, "learning_rate": 9.264196613958214e-06, "loss": 0.6956, "step": 3193 }, { "epoch": 0.6837387278906104, "grad_norm": 0.15860227185907372, "learning_rate": 9.252734510949407e-06, "loss": 0.6835, "step": 3194 }, { "epoch": 0.683952797623826, "grad_norm": 0.1605212642264318, "learning_rate": 9.241277368858674e-06, "loss": 0.7096, "step": 3195 }, { "epoch": 0.6841668673570416, "grad_norm": 0.15730764500498864, "learning_rate": 9.229825192974622e-06, "loss": 0.6816, "step": 3196 }, { "epoch": 0.6843809370902572, "grad_norm": 0.15379061456916696, "learning_rate": 9.218377988583537e-06, "loss": 0.6991, "step": 3197 }, { "epoch": 0.6845950068234727, "grad_norm": 0.15275554232726055, "learning_rate": 9.206935760969444e-06, "loss": 0.669, "step": 3198 }, { "epoch": 0.6848090765566883, "grad_norm": 0.16113244165672033, "learning_rate": 9.195498515414039e-06, "loss": 0.6919, "step": 3199 }, { "epoch": 0.6850231462899039, "grad_norm": 0.1520911816424395, "learning_rate": 9.18406625719674e-06, "loss": 0.689, "step": 3200 }, { "epoch": 0.6852372160231195, "grad_norm": 0.16296861432524246, "learning_rate": 9.172638991594664e-06, "loss": 0.7292, "step": 3201 }, { "epoch": 0.6854512857563351, "grad_norm": 0.1556187840342968, "learning_rate": 9.161216723882602e-06, "loss": 0.6927, "step": 3202 }, { "epoch": 0.6856653554895508, "grad_norm": 0.14984107826594323, "learning_rate": 9.14979945933307e-06, "loss": 0.6939, "step": 3203 }, { "epoch": 0.6858794252227663, "grad_norm": 0.15465119761763227, "learning_rate": 9.138387203216235e-06, "loss": 0.6731, "step": 3204 }, { "epoch": 0.6860934949559819, "grad_norm": 0.1538868912566821, "learning_rate": 9.126979960799984e-06, "loss": 0.6888, "step": 3205 }, { "epoch": 0.6863075646891975, "grad_norm": 0.15184321224933015, "learning_rate": 9.115577737349887e-06, "loss": 0.705, "step": 3206 }, { "epoch": 0.6865216344224131, "grad_norm": 0.16052072893154717, "learning_rate": 9.104180538129175e-06, "loss": 0.7003, "step": 3207 }, { "epoch": 0.6867357041556287, "grad_norm": 0.15060654352400643, "learning_rate": 9.092788368398785e-06, "loss": 0.678, "step": 3208 }, { "epoch": 0.6869497738888443, "grad_norm": 0.1610353272806887, "learning_rate": 9.081401233417315e-06, "loss": 0.7286, "step": 3209 }, { "epoch": 0.6871638436220598, "grad_norm": 0.15840630458392266, "learning_rate": 9.070019138441054e-06, "loss": 0.7406, "step": 3210 }, { "epoch": 0.6873779133552754, "grad_norm": 0.24777232575067695, "learning_rate": 9.058642088723943e-06, "loss": 0.6667, "step": 3211 }, { "epoch": 0.6875919830884911, "grad_norm": 0.15377440152663294, "learning_rate": 9.047270089517615e-06, "loss": 0.7053, "step": 3212 }, { "epoch": 0.6878060528217067, "grad_norm": 0.16044122375214126, "learning_rate": 9.035903146071371e-06, "loss": 0.6988, "step": 3213 }, { "epoch": 0.6880201225549223, "grad_norm": 0.15458704704568665, "learning_rate": 9.024541263632156e-06, "loss": 0.7298, "step": 3214 }, { "epoch": 0.6882341922881379, "grad_norm": 0.19605947630624748, "learning_rate": 9.013184447444612e-06, "loss": 0.7159, "step": 3215 }, { "epoch": 0.6884482620213535, "grad_norm": 0.15326392484139542, "learning_rate": 9.001832702751005e-06, "loss": 0.6825, "step": 3216 }, { "epoch": 0.688662331754569, "grad_norm": 0.27379772412603165, "learning_rate": 8.990486034791292e-06, "loss": 0.7022, "step": 3217 }, { "epoch": 0.6888764014877846, "grad_norm": 0.15215053768188203, "learning_rate": 8.979144448803079e-06, "loss": 0.7045, "step": 3218 }, { "epoch": 0.6890904712210002, "grad_norm": 0.15324266580610485, "learning_rate": 8.967807950021603e-06, "loss": 0.6812, "step": 3219 }, { "epoch": 0.6893045409542158, "grad_norm": 0.15535988865067632, "learning_rate": 8.956476543679787e-06, "loss": 0.6849, "step": 3220 }, { "epoch": 0.6895186106874315, "grad_norm": 0.15531044354422383, "learning_rate": 8.945150235008187e-06, "loss": 0.6995, "step": 3221 }, { "epoch": 0.6897326804206471, "grad_norm": 0.1606915624181014, "learning_rate": 8.933829029234993e-06, "loss": 0.684, "step": 3222 }, { "epoch": 0.6899467501538626, "grad_norm": 0.16621197805211987, "learning_rate": 8.922512931586066e-06, "loss": 0.7035, "step": 3223 }, { "epoch": 0.6901608198870782, "grad_norm": 0.15844074323764407, "learning_rate": 8.911201947284893e-06, "loss": 0.6878, "step": 3224 }, { "epoch": 0.6903748896202938, "grad_norm": 0.15585698595264963, "learning_rate": 8.8998960815526e-06, "loss": 0.7059, "step": 3225 }, { "epoch": 0.6905889593535094, "grad_norm": 0.15776095578882104, "learning_rate": 8.888595339607961e-06, "loss": 0.6982, "step": 3226 }, { "epoch": 0.690803029086725, "grad_norm": 0.15176073051273042, "learning_rate": 8.877299726667368e-06, "loss": 0.6645, "step": 3227 }, { "epoch": 0.6910170988199406, "grad_norm": 0.1641609940908201, "learning_rate": 8.866009247944857e-06, "loss": 0.6647, "step": 3228 }, { "epoch": 0.6912311685531561, "grad_norm": 0.1504639867144409, "learning_rate": 8.854723908652105e-06, "loss": 0.6849, "step": 3229 }, { "epoch": 0.6914452382863718, "grad_norm": 0.15357323871562542, "learning_rate": 8.843443713998388e-06, "loss": 0.7071, "step": 3230 }, { "epoch": 0.6916593080195874, "grad_norm": 0.15690131119394382, "learning_rate": 8.832168669190629e-06, "loss": 0.7268, "step": 3231 }, { "epoch": 0.691873377752803, "grad_norm": 0.14867224619401806, "learning_rate": 8.820898779433374e-06, "loss": 0.6911, "step": 3232 }, { "epoch": 0.6920874474860186, "grad_norm": 0.1548704703614642, "learning_rate": 8.809634049928773e-06, "loss": 0.7196, "step": 3233 }, { "epoch": 0.6923015172192342, "grad_norm": 0.15226528973226203, "learning_rate": 8.798374485876609e-06, "loss": 0.6886, "step": 3234 }, { "epoch": 0.6925155869524497, "grad_norm": 0.15634992465558403, "learning_rate": 8.787120092474286e-06, "loss": 0.6935, "step": 3235 }, { "epoch": 0.6927296566856653, "grad_norm": 0.15286390914464468, "learning_rate": 8.775870874916792e-06, "loss": 0.7091, "step": 3236 }, { "epoch": 0.6929437264188809, "grad_norm": 0.15767169052468524, "learning_rate": 8.764626838396757e-06, "loss": 0.6807, "step": 3237 }, { "epoch": 0.6931577961520965, "grad_norm": 0.15721142554207457, "learning_rate": 8.753387988104415e-06, "loss": 0.7197, "step": 3238 }, { "epoch": 0.6933718658853122, "grad_norm": 0.1631163203891092, "learning_rate": 8.742154329227581e-06, "loss": 0.6637, "step": 3239 }, { "epoch": 0.6935859356185278, "grad_norm": 0.15770942046593717, "learning_rate": 8.73092586695171e-06, "loss": 0.6653, "step": 3240 }, { "epoch": 0.6938000053517434, "grad_norm": 0.16094273294550562, "learning_rate": 8.71970260645982e-06, "loss": 0.7003, "step": 3241 }, { "epoch": 0.6940140750849589, "grad_norm": 0.168150831931923, "learning_rate": 8.708484552932557e-06, "loss": 0.7094, "step": 3242 }, { "epoch": 0.6942281448181745, "grad_norm": 0.16187957424719024, "learning_rate": 8.697271711548163e-06, "loss": 0.6946, "step": 3243 }, { "epoch": 0.6944422145513901, "grad_norm": 0.1685080945617025, "learning_rate": 8.686064087482448e-06, "loss": 0.6792, "step": 3244 }, { "epoch": 0.6946562842846057, "grad_norm": 0.17103143099079773, "learning_rate": 8.674861685908848e-06, "loss": 0.7131, "step": 3245 }, { "epoch": 0.6948703540178213, "grad_norm": 0.15904423375335486, "learning_rate": 8.663664511998355e-06, "loss": 0.7085, "step": 3246 }, { "epoch": 0.6950844237510369, "grad_norm": 0.19989578516301848, "learning_rate": 8.652472570919579e-06, "loss": 0.7223, "step": 3247 }, { "epoch": 0.6952984934842525, "grad_norm": 0.1551725663755147, "learning_rate": 8.641285867838682e-06, "loss": 0.6606, "step": 3248 }, { "epoch": 0.6955125632174681, "grad_norm": 0.15329960859729763, "learning_rate": 8.630104407919438e-06, "loss": 0.6741, "step": 3249 }, { "epoch": 0.6957266329506837, "grad_norm": 0.15652680347529477, "learning_rate": 8.618928196323192e-06, "loss": 0.6879, "step": 3250 }, { "epoch": 0.6959407026838993, "grad_norm": 0.1551127764792699, "learning_rate": 8.60775723820885e-06, "loss": 0.7047, "step": 3251 }, { "epoch": 0.6961547724171149, "grad_norm": 0.16006532064357237, "learning_rate": 8.59659153873292e-06, "loss": 0.6819, "step": 3252 }, { "epoch": 0.6963688421503305, "grad_norm": 0.15633178720675306, "learning_rate": 8.585431103049453e-06, "loss": 0.7087, "step": 3253 }, { "epoch": 0.696582911883546, "grad_norm": 0.15692161645710714, "learning_rate": 8.574275936310095e-06, "loss": 0.6891, "step": 3254 }, { "epoch": 0.6967969816167616, "grad_norm": 0.1566846526405795, "learning_rate": 8.563126043664054e-06, "loss": 0.685, "step": 3255 }, { "epoch": 0.6970110513499772, "grad_norm": 0.16482919007789454, "learning_rate": 8.55198143025809e-06, "loss": 0.6927, "step": 3256 }, { "epoch": 0.6972251210831929, "grad_norm": 0.1598377174862076, "learning_rate": 8.540842101236549e-06, "loss": 0.6744, "step": 3257 }, { "epoch": 0.6974391908164085, "grad_norm": 0.16261354296708044, "learning_rate": 8.529708061741306e-06, "loss": 0.689, "step": 3258 }, { "epoch": 0.6976532605496241, "grad_norm": 0.17474731991633885, "learning_rate": 8.518579316911828e-06, "loss": 0.7015, "step": 3259 }, { "epoch": 0.6978673302828396, "grad_norm": 0.1581379365573815, "learning_rate": 8.507455871885126e-06, "loss": 0.7059, "step": 3260 }, { "epoch": 0.6980814000160552, "grad_norm": 0.16172568947803476, "learning_rate": 8.49633773179575e-06, "loss": 0.6835, "step": 3261 }, { "epoch": 0.6982954697492708, "grad_norm": 0.158648978385029, "learning_rate": 8.485224901775823e-06, "loss": 0.7139, "step": 3262 }, { "epoch": 0.6985095394824864, "grad_norm": 0.16222491003334794, "learning_rate": 8.474117386954998e-06, "loss": 0.6862, "step": 3263 }, { "epoch": 0.698723609215702, "grad_norm": 0.16584229699293707, "learning_rate": 8.463015192460488e-06, "loss": 0.7049, "step": 3264 }, { "epoch": 0.6989376789489176, "grad_norm": 0.15216134303928552, "learning_rate": 8.451918323417053e-06, "loss": 0.6973, "step": 3265 }, { "epoch": 0.6991517486821333, "grad_norm": 0.15987361850664245, "learning_rate": 8.440826784946972e-06, "loss": 0.6871, "step": 3266 }, { "epoch": 0.6993658184153488, "grad_norm": 0.1508799694115644, "learning_rate": 8.429740582170094e-06, "loss": 0.6829, "step": 3267 }, { "epoch": 0.6995798881485644, "grad_norm": 0.1556035125570702, "learning_rate": 8.418659720203777e-06, "loss": 0.6947, "step": 3268 }, { "epoch": 0.69979395788178, "grad_norm": 0.15935844557397552, "learning_rate": 8.407584204162933e-06, "loss": 0.6948, "step": 3269 }, { "epoch": 0.7000080276149956, "grad_norm": 0.15164055886152145, "learning_rate": 8.396514039160007e-06, "loss": 0.7085, "step": 3270 }, { "epoch": 0.7002220973482112, "grad_norm": 0.15745134574952785, "learning_rate": 8.38544923030495e-06, "loss": 0.6901, "step": 3271 }, { "epoch": 0.7004361670814268, "grad_norm": 0.16747816355778503, "learning_rate": 8.374389782705276e-06, "loss": 0.7361, "step": 3272 }, { "epoch": 0.7006502368146423, "grad_norm": 0.14676182178891728, "learning_rate": 8.363335701465989e-06, "loss": 0.696, "step": 3273 }, { "epoch": 0.7008643065478579, "grad_norm": 0.16553099968837984, "learning_rate": 8.352286991689642e-06, "loss": 0.6989, "step": 3274 }, { "epoch": 0.7010783762810736, "grad_norm": 0.15052109228717409, "learning_rate": 8.341243658476303e-06, "loss": 0.6999, "step": 3275 }, { "epoch": 0.7012924460142892, "grad_norm": 0.1480200012041235, "learning_rate": 8.330205706923543e-06, "loss": 0.6853, "step": 3276 }, { "epoch": 0.7015065157475048, "grad_norm": 0.15865158359551787, "learning_rate": 8.319173142126473e-06, "loss": 0.682, "step": 3277 }, { "epoch": 0.7017205854807204, "grad_norm": 0.1590859019280151, "learning_rate": 8.30814596917769e-06, "loss": 0.7098, "step": 3278 }, { "epoch": 0.7019346552139359, "grad_norm": 0.152983472158898, "learning_rate": 8.297124193167325e-06, "loss": 0.7197, "step": 3279 }, { "epoch": 0.7021487249471515, "grad_norm": 0.1586510475568293, "learning_rate": 8.286107819183018e-06, "loss": 0.7014, "step": 3280 }, { "epoch": 0.7023627946803671, "grad_norm": 0.15122253761799054, "learning_rate": 8.27509685230989e-06, "loss": 0.7142, "step": 3281 }, { "epoch": 0.7025768644135827, "grad_norm": 0.1548462353061408, "learning_rate": 8.264091297630601e-06, "loss": 0.6958, "step": 3282 }, { "epoch": 0.7027909341467983, "grad_norm": 0.15058402413909766, "learning_rate": 8.253091160225275e-06, "loss": 0.6909, "step": 3283 }, { "epoch": 0.703005003880014, "grad_norm": 0.15312143235504205, "learning_rate": 8.242096445171568e-06, "loss": 0.664, "step": 3284 }, { "epoch": 0.7032190736132296, "grad_norm": 0.16032369384110995, "learning_rate": 8.231107157544627e-06, "loss": 0.7078, "step": 3285 }, { "epoch": 0.7034331433464451, "grad_norm": 0.16338575824528243, "learning_rate": 8.220123302417068e-06, "loss": 0.685, "step": 3286 }, { "epoch": 0.7036472130796607, "grad_norm": 0.15133135918381588, "learning_rate": 8.209144884859038e-06, "loss": 0.6944, "step": 3287 }, { "epoch": 0.7038612828128763, "grad_norm": 0.16186220984160274, "learning_rate": 8.198171909938135e-06, "loss": 0.6995, "step": 3288 }, { "epoch": 0.7040753525460919, "grad_norm": 0.16182674921422807, "learning_rate": 8.187204382719485e-06, "loss": 0.701, "step": 3289 }, { "epoch": 0.7042894222793075, "grad_norm": 0.15291505984125645, "learning_rate": 8.176242308265659e-06, "loss": 0.6945, "step": 3290 }, { "epoch": 0.704503492012523, "grad_norm": 0.15712566845642592, "learning_rate": 8.16528569163674e-06, "loss": 0.7011, "step": 3291 }, { "epoch": 0.7047175617457386, "grad_norm": 0.1533780516375367, "learning_rate": 8.154334537890288e-06, "loss": 0.7048, "step": 3292 }, { "epoch": 0.7049316314789543, "grad_norm": 0.15025676200541188, "learning_rate": 8.143388852081319e-06, "loss": 0.6932, "step": 3293 }, { "epoch": 0.7051457012121699, "grad_norm": 0.15997870935165437, "learning_rate": 8.132448639262362e-06, "loss": 0.682, "step": 3294 }, { "epoch": 0.7053597709453855, "grad_norm": 0.15125068583237963, "learning_rate": 8.121513904483383e-06, "loss": 0.6946, "step": 3295 }, { "epoch": 0.7055738406786011, "grad_norm": 0.15393005915538988, "learning_rate": 8.110584652791837e-06, "loss": 0.6878, "step": 3296 }, { "epoch": 0.7057879104118167, "grad_norm": 0.16056953788845163, "learning_rate": 8.099660889232661e-06, "loss": 0.7207, "step": 3297 }, { "epoch": 0.7060019801450322, "grad_norm": 0.15322709592882022, "learning_rate": 8.088742618848227e-06, "loss": 0.6877, "step": 3298 }, { "epoch": 0.7062160498782478, "grad_norm": 0.20836632658402116, "learning_rate": 8.077829846678401e-06, "loss": 0.7085, "step": 3299 }, { "epoch": 0.7064301196114634, "grad_norm": 0.23459825862196712, "learning_rate": 8.066922577760488e-06, "loss": 0.7036, "step": 3300 }, { "epoch": 0.706644189344679, "grad_norm": 0.16557335826929756, "learning_rate": 8.056020817129269e-06, "loss": 0.7171, "step": 3301 }, { "epoch": 0.7068582590778947, "grad_norm": 0.15962824680752044, "learning_rate": 8.045124569816983e-06, "loss": 0.6942, "step": 3302 }, { "epoch": 0.7070723288111103, "grad_norm": 0.1562427851842794, "learning_rate": 8.034233840853304e-06, "loss": 0.6977, "step": 3303 }, { "epoch": 0.7072863985443258, "grad_norm": 0.15336135090779646, "learning_rate": 8.023348635265377e-06, "loss": 0.6992, "step": 3304 }, { "epoch": 0.7075004682775414, "grad_norm": 0.1559817613170699, "learning_rate": 8.012468958077805e-06, "loss": 0.6823, "step": 3305 }, { "epoch": 0.707714538010757, "grad_norm": 0.151487855424781, "learning_rate": 8.001594814312612e-06, "loss": 0.6633, "step": 3306 }, { "epoch": 0.7079286077439726, "grad_norm": 0.15145904827595, "learning_rate": 7.990726208989289e-06, "loss": 0.7021, "step": 3307 }, { "epoch": 0.7081426774771882, "grad_norm": 0.15019125814155232, "learning_rate": 7.979863147124771e-06, "loss": 0.6683, "step": 3308 }, { "epoch": 0.7083567472104038, "grad_norm": 0.16162331545892966, "learning_rate": 7.969005633733412e-06, "loss": 0.7502, "step": 3309 }, { "epoch": 0.7085708169436193, "grad_norm": 0.15798457096325713, "learning_rate": 7.95815367382703e-06, "loss": 0.7138, "step": 3310 }, { "epoch": 0.7087848866768349, "grad_norm": 0.15290488841322952, "learning_rate": 7.947307272414874e-06, "loss": 0.679, "step": 3311 }, { "epoch": 0.7089989564100506, "grad_norm": 0.15531253190558253, "learning_rate": 7.936466434503614e-06, "loss": 0.681, "step": 3312 }, { "epoch": 0.7092130261432662, "grad_norm": 0.15074385446016486, "learning_rate": 7.925631165097362e-06, "loss": 0.6814, "step": 3313 }, { "epoch": 0.7094270958764818, "grad_norm": 0.16414183475105307, "learning_rate": 7.914801469197669e-06, "loss": 0.6879, "step": 3314 }, { "epoch": 0.7096411656096974, "grad_norm": 0.15444895914184442, "learning_rate": 7.903977351803488e-06, "loss": 0.6813, "step": 3315 }, { "epoch": 0.709855235342913, "grad_norm": 0.15600202300130273, "learning_rate": 7.893158817911225e-06, "loss": 0.6943, "step": 3316 }, { "epoch": 0.7100693050761285, "grad_norm": 0.29906092755354347, "learning_rate": 7.882345872514682e-06, "loss": 0.7171, "step": 3317 }, { "epoch": 0.7102833748093441, "grad_norm": 0.15706329338725833, "learning_rate": 7.871538520605104e-06, "loss": 0.7027, "step": 3318 }, { "epoch": 0.7104974445425597, "grad_norm": 0.15247037005381606, "learning_rate": 7.860736767171148e-06, "loss": 0.6959, "step": 3319 }, { "epoch": 0.7107115142757753, "grad_norm": 0.16360515419120714, "learning_rate": 7.849940617198872e-06, "loss": 0.7192, "step": 3320 }, { "epoch": 0.710925584008991, "grad_norm": 0.1494733107927237, "learning_rate": 7.839150075671766e-06, "loss": 0.7096, "step": 3321 }, { "epoch": 0.7111396537422066, "grad_norm": 0.15651951456030722, "learning_rate": 7.828365147570731e-06, "loss": 0.691, "step": 3322 }, { "epoch": 0.7113537234754221, "grad_norm": 0.16220546679217188, "learning_rate": 7.817585837874055e-06, "loss": 0.6959, "step": 3323 }, { "epoch": 0.7115677932086377, "grad_norm": 0.14801509523348158, "learning_rate": 7.806812151557463e-06, "loss": 0.6822, "step": 3324 }, { "epoch": 0.7117818629418533, "grad_norm": 0.16681944991433031, "learning_rate": 7.796044093594056e-06, "loss": 0.7127, "step": 3325 }, { "epoch": 0.7119959326750689, "grad_norm": 0.15712317398624448, "learning_rate": 7.785281668954353e-06, "loss": 0.691, "step": 3326 }, { "epoch": 0.7122100024082845, "grad_norm": 0.15789069431373431, "learning_rate": 7.774524882606278e-06, "loss": 0.7135, "step": 3327 }, { "epoch": 0.7124240721415, "grad_norm": 0.16442824904434017, "learning_rate": 7.76377373951513e-06, "loss": 0.6983, "step": 3328 }, { "epoch": 0.7126381418747156, "grad_norm": 0.16359429737990552, "learning_rate": 7.753028244643634e-06, "loss": 0.6985, "step": 3329 }, { "epoch": 0.7128522116079313, "grad_norm": 0.16502710970219736, "learning_rate": 7.742288402951875e-06, "loss": 0.6842, "step": 3330 }, { "epoch": 0.7130662813411469, "grad_norm": 0.16350182802163685, "learning_rate": 7.731554219397354e-06, "loss": 0.7213, "step": 3331 }, { "epoch": 0.7132803510743625, "grad_norm": 0.15803065686106776, "learning_rate": 7.720825698934941e-06, "loss": 0.6936, "step": 3332 }, { "epoch": 0.7134944208075781, "grad_norm": 0.16427002342058883, "learning_rate": 7.710102846516909e-06, "loss": 0.7221, "step": 3333 }, { "epoch": 0.7137084905407937, "grad_norm": 0.1507168336640026, "learning_rate": 7.699385667092914e-06, "loss": 0.681, "step": 3334 }, { "epoch": 0.7139225602740092, "grad_norm": 0.15888700028778466, "learning_rate": 7.688674165609968e-06, "loss": 0.6694, "step": 3335 }, { "epoch": 0.7141366300072248, "grad_norm": 0.16434463130603383, "learning_rate": 7.6779683470125e-06, "loss": 0.6848, "step": 3336 }, { "epoch": 0.7143506997404404, "grad_norm": 0.14962235689266584, "learning_rate": 7.667268216242276e-06, "loss": 0.6797, "step": 3337 }, { "epoch": 0.714564769473656, "grad_norm": 0.2539172984903302, "learning_rate": 7.65657377823847e-06, "loss": 0.6945, "step": 3338 }, { "epoch": 0.7147788392068717, "grad_norm": 0.15894632150544735, "learning_rate": 7.645885037937618e-06, "loss": 0.7146, "step": 3339 }, { "epoch": 0.7149929089400873, "grad_norm": 0.15303574716754773, "learning_rate": 7.635202000273612e-06, "loss": 0.6851, "step": 3340 }, { "epoch": 0.7152069786733029, "grad_norm": 0.17956860287237303, "learning_rate": 7.624524670177733e-06, "loss": 0.6893, "step": 3341 }, { "epoch": 0.7154210484065184, "grad_norm": 0.16486662999374005, "learning_rate": 7.613853052578606e-06, "loss": 0.6997, "step": 3342 }, { "epoch": 0.715635118139734, "grad_norm": 0.15758835418243866, "learning_rate": 7.603187152402236e-06, "loss": 0.6888, "step": 3343 }, { "epoch": 0.7158491878729496, "grad_norm": 0.15339903183700115, "learning_rate": 7.592526974571992e-06, "loss": 0.6829, "step": 3344 }, { "epoch": 0.7160632576061652, "grad_norm": 0.7017295495675703, "learning_rate": 7.581872524008574e-06, "loss": 0.7461, "step": 3345 }, { "epoch": 0.7162773273393808, "grad_norm": 0.14876238787415644, "learning_rate": 7.571223805630074e-06, "loss": 0.6823, "step": 3346 }, { "epoch": 0.7164913970725963, "grad_norm": 0.1526503072162832, "learning_rate": 7.560580824351908e-06, "loss": 0.672, "step": 3347 }, { "epoch": 0.716705466805812, "grad_norm": 0.15917182905910648, "learning_rate": 7.549943585086863e-06, "loss": 0.691, "step": 3348 }, { "epoch": 0.7169195365390276, "grad_norm": 0.16069115875788337, "learning_rate": 7.539312092745072e-06, "loss": 0.6967, "step": 3349 }, { "epoch": 0.7171336062722432, "grad_norm": 0.16353040198864685, "learning_rate": 7.528686352234005e-06, "loss": 0.6717, "step": 3350 }, { "epoch": 0.7173476760054588, "grad_norm": 0.16357354033857646, "learning_rate": 7.518066368458494e-06, "loss": 0.6989, "step": 3351 }, { "epoch": 0.7175617457386744, "grad_norm": 0.1535502885079072, "learning_rate": 7.5074521463206904e-06, "loss": 0.6872, "step": 3352 }, { "epoch": 0.71777581547189, "grad_norm": 0.15944656493142786, "learning_rate": 7.49684369072011e-06, "loss": 0.6963, "step": 3353 }, { "epoch": 0.7179898852051055, "grad_norm": 0.15582152848457353, "learning_rate": 7.486241006553598e-06, "loss": 0.7141, "step": 3354 }, { "epoch": 0.7182039549383211, "grad_norm": 0.15180057158042523, "learning_rate": 7.475644098715324e-06, "loss": 0.7161, "step": 3355 }, { "epoch": 0.7184180246715367, "grad_norm": 0.15303103099972895, "learning_rate": 7.465052972096816e-06, "loss": 0.6799, "step": 3356 }, { "epoch": 0.7186320944047524, "grad_norm": 0.14673479453292337, "learning_rate": 7.454467631586901e-06, "loss": 0.7051, "step": 3357 }, { "epoch": 0.718846164137968, "grad_norm": 0.1599869420272919, "learning_rate": 7.443888082071764e-06, "loss": 0.7064, "step": 3358 }, { "epoch": 0.7190602338711836, "grad_norm": 0.15461538319716817, "learning_rate": 7.433314328434908e-06, "loss": 0.7072, "step": 3359 }, { "epoch": 0.7192743036043991, "grad_norm": 0.15386624927594827, "learning_rate": 7.422746375557148e-06, "loss": 0.6646, "step": 3360 }, { "epoch": 0.7194883733376147, "grad_norm": 0.1545566190984167, "learning_rate": 7.412184228316644e-06, "loss": 0.7063, "step": 3361 }, { "epoch": 0.7197024430708303, "grad_norm": 0.15626497527671113, "learning_rate": 7.40162789158885e-06, "loss": 0.7081, "step": 3362 }, { "epoch": 0.7199165128040459, "grad_norm": 0.15450757007304494, "learning_rate": 7.3910773702465596e-06, "loss": 0.7157, "step": 3363 }, { "epoch": 0.7201305825372615, "grad_norm": 0.16973833457855797, "learning_rate": 7.380532669159881e-06, "loss": 0.6915, "step": 3364 }, { "epoch": 0.7203446522704771, "grad_norm": 0.15458051922297733, "learning_rate": 7.369993793196213e-06, "loss": 0.731, "step": 3365 }, { "epoch": 0.7205587220036928, "grad_norm": 0.14870759179833373, "learning_rate": 7.359460747220298e-06, "loss": 0.6992, "step": 3366 }, { "epoch": 0.7207727917369083, "grad_norm": 0.1505420260072728, "learning_rate": 7.348933536094156e-06, "loss": 0.6831, "step": 3367 }, { "epoch": 0.7209868614701239, "grad_norm": 0.15286444108432226, "learning_rate": 7.338412164677133e-06, "loss": 0.7078, "step": 3368 }, { "epoch": 0.7212009312033395, "grad_norm": 0.14654828021530483, "learning_rate": 7.327896637825886e-06, "loss": 0.715, "step": 3369 }, { "epoch": 0.7214150009365551, "grad_norm": 0.1519082171866383, "learning_rate": 7.317386960394346e-06, "loss": 0.691, "step": 3370 }, { "epoch": 0.7216290706697707, "grad_norm": 0.1533500100122846, "learning_rate": 7.306883137233776e-06, "loss": 0.703, "step": 3371 }, { "epoch": 0.7218431404029862, "grad_norm": 0.15430001585354824, "learning_rate": 7.296385173192708e-06, "loss": 0.6862, "step": 3372 }, { "epoch": 0.7220572101362018, "grad_norm": 0.14863032108344576, "learning_rate": 7.2858930731169945e-06, "loss": 0.6909, "step": 3373 }, { "epoch": 0.7222712798694174, "grad_norm": 0.1637095261720954, "learning_rate": 7.275406841849757e-06, "loss": 0.6923, "step": 3374 }, { "epoch": 0.7224853496026331, "grad_norm": 0.1503527045643267, "learning_rate": 7.264926484231429e-06, "loss": 0.6571, "step": 3375 }, { "epoch": 0.7226994193358487, "grad_norm": 0.1500767315268531, "learning_rate": 7.2544520050997305e-06, "loss": 0.6934, "step": 3376 }, { "epoch": 0.7229134890690643, "grad_norm": 0.15289633280915021, "learning_rate": 7.243983409289648e-06, "loss": 0.6921, "step": 3377 }, { "epoch": 0.7231275588022799, "grad_norm": 0.1563676658019505, "learning_rate": 7.233520701633479e-06, "loss": 0.7074, "step": 3378 }, { "epoch": 0.7233416285354954, "grad_norm": 0.14779967180706038, "learning_rate": 7.223063886960779e-06, "loss": 0.7217, "step": 3379 }, { "epoch": 0.723555698268711, "grad_norm": 0.1584082916968249, "learning_rate": 7.2126129700983986e-06, "loss": 0.728, "step": 3380 }, { "epoch": 0.7237697680019266, "grad_norm": 0.15063823251962052, "learning_rate": 7.20216795587047e-06, "loss": 0.7113, "step": 3381 }, { "epoch": 0.7239838377351422, "grad_norm": 0.1550126643318111, "learning_rate": 7.191728849098379e-06, "loss": 0.6939, "step": 3382 }, { "epoch": 0.7241979074683578, "grad_norm": 0.15214817709964662, "learning_rate": 7.1812956546008105e-06, "loss": 0.7081, "step": 3383 }, { "epoch": 0.7244119772015735, "grad_norm": 0.14938195835100643, "learning_rate": 7.170868377193696e-06, "loss": 0.6981, "step": 3384 }, { "epoch": 0.724626046934789, "grad_norm": 0.15038555055654673, "learning_rate": 7.160447021690253e-06, "loss": 0.7076, "step": 3385 }, { "epoch": 0.7248401166680046, "grad_norm": 0.15309372635862914, "learning_rate": 7.150031592900968e-06, "loss": 0.6889, "step": 3386 }, { "epoch": 0.7250541864012202, "grad_norm": 0.1524824433175752, "learning_rate": 7.139622095633572e-06, "loss": 0.7322, "step": 3387 }, { "epoch": 0.7252682561344358, "grad_norm": 0.1599372986538526, "learning_rate": 7.1292185346930745e-06, "loss": 0.7222, "step": 3388 }, { "epoch": 0.7254823258676514, "grad_norm": 0.15133341854378823, "learning_rate": 7.118820914881746e-06, "loss": 0.6981, "step": 3389 }, { "epoch": 0.725696395600867, "grad_norm": 0.1446864509483478, "learning_rate": 7.108429240999097e-06, "loss": 0.683, "step": 3390 }, { "epoch": 0.7259104653340825, "grad_norm": 0.14883310883327594, "learning_rate": 7.098043517841911e-06, "loss": 0.6818, "step": 3391 }, { "epoch": 0.7261245350672981, "grad_norm": 0.1555072200378894, "learning_rate": 7.0876637502042255e-06, "loss": 0.7017, "step": 3392 }, { "epoch": 0.7263386048005138, "grad_norm": 0.1536401070649317, "learning_rate": 7.07728994287731e-06, "loss": 0.7172, "step": 3393 }, { "epoch": 0.7265526745337294, "grad_norm": 0.14689587361987888, "learning_rate": 7.066922100649702e-06, "loss": 0.6965, "step": 3394 }, { "epoch": 0.726766744266945, "grad_norm": 0.15310481914422255, "learning_rate": 7.056560228307183e-06, "loss": 0.7084, "step": 3395 }, { "epoch": 0.7269808140001606, "grad_norm": 0.15197732025207406, "learning_rate": 7.046204330632762e-06, "loss": 0.6819, "step": 3396 }, { "epoch": 0.7271948837333762, "grad_norm": 0.14805906907025784, "learning_rate": 7.035854412406709e-06, "loss": 0.6983, "step": 3397 }, { "epoch": 0.7274089534665917, "grad_norm": 0.1483724804144164, "learning_rate": 7.025510478406534e-06, "loss": 0.695, "step": 3398 }, { "epoch": 0.7276230231998073, "grad_norm": 0.15873187132890057, "learning_rate": 7.015172533406964e-06, "loss": 0.6991, "step": 3399 }, { "epoch": 0.7278370929330229, "grad_norm": 0.14800913953356853, "learning_rate": 7.0048405821799855e-06, "loss": 0.724, "step": 3400 }, { "epoch": 0.7280511626662385, "grad_norm": 0.15134973949863306, "learning_rate": 6.9945146294948105e-06, "loss": 0.6858, "step": 3401 }, { "epoch": 0.7282652323994542, "grad_norm": 0.15248909677906117, "learning_rate": 6.984194680117868e-06, "loss": 0.7221, "step": 3402 }, { "epoch": 0.7284793021326698, "grad_norm": 0.15342124727909373, "learning_rate": 6.973880738812844e-06, "loss": 0.7029, "step": 3403 }, { "epoch": 0.7286933718658853, "grad_norm": 0.14576049701522734, "learning_rate": 6.963572810340616e-06, "loss": 0.7224, "step": 3404 }, { "epoch": 0.7289074415991009, "grad_norm": 0.1498282622939985, "learning_rate": 6.953270899459317e-06, "loss": 0.6969, "step": 3405 }, { "epoch": 0.7291215113323165, "grad_norm": 0.1512993692592409, "learning_rate": 6.942975010924291e-06, "loss": 0.7149, "step": 3406 }, { "epoch": 0.7293355810655321, "grad_norm": 0.16659082919254012, "learning_rate": 6.932685149488094e-06, "loss": 0.6801, "step": 3407 }, { "epoch": 0.7295496507987477, "grad_norm": 0.147521830530763, "learning_rate": 6.922401319900518e-06, "loss": 0.7229, "step": 3408 }, { "epoch": 0.7297637205319633, "grad_norm": 0.24749044889111424, "learning_rate": 6.912123526908547e-06, "loss": 0.7052, "step": 3409 }, { "epoch": 0.7299777902651788, "grad_norm": 0.15457958137989353, "learning_rate": 6.901851775256396e-06, "loss": 0.7045, "step": 3410 }, { "epoch": 0.7301918599983945, "grad_norm": 0.1460763916119327, "learning_rate": 6.8915860696854965e-06, "loss": 0.7014, "step": 3411 }, { "epoch": 0.7304059297316101, "grad_norm": 0.148052040371279, "learning_rate": 6.881326414934464e-06, "loss": 0.6878, "step": 3412 }, { "epoch": 0.7306199994648257, "grad_norm": 0.14774034247572365, "learning_rate": 6.87107281573915e-06, "loss": 0.6603, "step": 3413 }, { "epoch": 0.7308340691980413, "grad_norm": 0.14849805484254816, "learning_rate": 6.860825276832585e-06, "loss": 0.6801, "step": 3414 }, { "epoch": 0.7310481389312569, "grad_norm": 0.17338069488266583, "learning_rate": 6.8505838029450275e-06, "loss": 0.688, "step": 3415 }, { "epoch": 0.7312622086644724, "grad_norm": 0.1538822474317705, "learning_rate": 6.840348398803906e-06, "loss": 0.7164, "step": 3416 }, { "epoch": 0.731476278397688, "grad_norm": 0.15209063029325054, "learning_rate": 6.830119069133878e-06, "loss": 0.7129, "step": 3417 }, { "epoch": 0.7316903481309036, "grad_norm": 0.15285611898125917, "learning_rate": 6.819895818656783e-06, "loss": 0.7178, "step": 3418 }, { "epoch": 0.7319044178641192, "grad_norm": 0.15493790996850904, "learning_rate": 6.809678652091645e-06, "loss": 0.6951, "step": 3419 }, { "epoch": 0.7321184875973348, "grad_norm": 0.1433556319415999, "learning_rate": 6.7994675741547014e-06, "loss": 0.677, "step": 3420 }, { "epoch": 0.7323325573305505, "grad_norm": 0.1477034357517413, "learning_rate": 6.789262589559355e-06, "loss": 0.6864, "step": 3421 }, { "epoch": 0.732546627063766, "grad_norm": 0.15183324778132398, "learning_rate": 6.779063703016216e-06, "loss": 0.683, "step": 3422 }, { "epoch": 0.7327606967969816, "grad_norm": 0.15255717181585005, "learning_rate": 6.768870919233073e-06, "loss": 0.6892, "step": 3423 }, { "epoch": 0.7329747665301972, "grad_norm": 0.14965734972611663, "learning_rate": 6.758684242914888e-06, "loss": 0.6942, "step": 3424 }, { "epoch": 0.7331888362634128, "grad_norm": 0.15405644325158754, "learning_rate": 6.7485036787638245e-06, "loss": 0.7072, "step": 3425 }, { "epoch": 0.7334029059966284, "grad_norm": 0.15227504324107274, "learning_rate": 6.738329231479197e-06, "loss": 0.7054, "step": 3426 }, { "epoch": 0.733616975729844, "grad_norm": 0.14675505260563773, "learning_rate": 6.728160905757521e-06, "loss": 0.6963, "step": 3427 }, { "epoch": 0.7338310454630596, "grad_norm": 0.14652893843082374, "learning_rate": 6.717998706292481e-06, "loss": 0.7229, "step": 3428 }, { "epoch": 0.7340451151962751, "grad_norm": 0.1509029876280325, "learning_rate": 6.70784263777492e-06, "loss": 0.703, "step": 3429 }, { "epoch": 0.7342591849294908, "grad_norm": 0.15094441942247902, "learning_rate": 6.697692704892871e-06, "loss": 0.7041, "step": 3430 }, { "epoch": 0.7344732546627064, "grad_norm": 0.1506906511359724, "learning_rate": 6.687548912331512e-06, "loss": 0.7032, "step": 3431 }, { "epoch": 0.734687324395922, "grad_norm": 0.1505700428178517, "learning_rate": 6.677411264773204e-06, "loss": 0.7044, "step": 3432 }, { "epoch": 0.7349013941291376, "grad_norm": 0.1562400944894484, "learning_rate": 6.6672797668974765e-06, "loss": 0.6775, "step": 3433 }, { "epoch": 0.7351154638623532, "grad_norm": 0.15451972419948504, "learning_rate": 6.657154423380996e-06, "loss": 0.6834, "step": 3434 }, { "epoch": 0.7353295335955687, "grad_norm": 0.14973106909517378, "learning_rate": 6.6470352388976146e-06, "loss": 0.6923, "step": 3435 }, { "epoch": 0.7355436033287843, "grad_norm": 0.1505642357852099, "learning_rate": 6.636922218118316e-06, "loss": 0.691, "step": 3436 }, { "epoch": 0.7357576730619999, "grad_norm": 0.1513512979605357, "learning_rate": 6.626815365711259e-06, "loss": 0.6969, "step": 3437 }, { "epoch": 0.7359717427952155, "grad_norm": 0.15932898538625465, "learning_rate": 6.6167146863417564e-06, "loss": 0.6706, "step": 3438 }, { "epoch": 0.7361858125284312, "grad_norm": 0.1461255327804242, "learning_rate": 6.60662018467225e-06, "loss": 0.6555, "step": 3439 }, { "epoch": 0.7363998822616468, "grad_norm": 0.15223104873445534, "learning_rate": 6.596531865362354e-06, "loss": 0.7068, "step": 3440 }, { "epoch": 0.7366139519948623, "grad_norm": 0.15299475056670553, "learning_rate": 6.5864497330688045e-06, "loss": 0.6863, "step": 3441 }, { "epoch": 0.7368280217280779, "grad_norm": 0.14539464281086412, "learning_rate": 6.576373792445507e-06, "loss": 0.7074, "step": 3442 }, { "epoch": 0.7370420914612935, "grad_norm": 0.15091681358692513, "learning_rate": 6.566304048143499e-06, "loss": 0.6906, "step": 3443 }, { "epoch": 0.7372561611945091, "grad_norm": 0.15680469206084852, "learning_rate": 6.556240504810945e-06, "loss": 0.7087, "step": 3444 }, { "epoch": 0.7374702309277247, "grad_norm": 0.15354007935878433, "learning_rate": 6.54618316709317e-06, "loss": 0.6972, "step": 3445 }, { "epoch": 0.7376843006609403, "grad_norm": 0.15215215918153305, "learning_rate": 6.53613203963261e-06, "loss": 0.7038, "step": 3446 }, { "epoch": 0.7378983703941558, "grad_norm": 0.14887076958114878, "learning_rate": 6.526087127068857e-06, "loss": 0.7332, "step": 3447 }, { "epoch": 0.7381124401273715, "grad_norm": 0.15696229039598944, "learning_rate": 6.516048434038624e-06, "loss": 0.6826, "step": 3448 }, { "epoch": 0.7383265098605871, "grad_norm": 0.154293062938764, "learning_rate": 6.506015965175745e-06, "loss": 0.6952, "step": 3449 }, { "epoch": 0.7385405795938027, "grad_norm": 0.14669425585749374, "learning_rate": 6.495989725111203e-06, "loss": 0.6866, "step": 3450 }, { "epoch": 0.7387546493270183, "grad_norm": 0.1542738165770645, "learning_rate": 6.485969718473075e-06, "loss": 0.7225, "step": 3451 }, { "epoch": 0.7389687190602339, "grad_norm": 0.15307150042183137, "learning_rate": 6.475955949886587e-06, "loss": 0.6793, "step": 3452 }, { "epoch": 0.7391827887934495, "grad_norm": 0.1483389967735417, "learning_rate": 6.465948423974085e-06, "loss": 0.7074, "step": 3453 }, { "epoch": 0.739396858526665, "grad_norm": 0.14907186217462975, "learning_rate": 6.455947145355006e-06, "loss": 0.7193, "step": 3454 }, { "epoch": 0.7396109282598806, "grad_norm": 0.18214754122585616, "learning_rate": 6.445952118645937e-06, "loss": 0.6676, "step": 3455 }, { "epoch": 0.7398249979930962, "grad_norm": 0.1482875992962516, "learning_rate": 6.435963348460554e-06, "loss": 0.6898, "step": 3456 }, { "epoch": 0.7400390677263119, "grad_norm": 0.1427804193861355, "learning_rate": 6.4259808394096645e-06, "loss": 0.6947, "step": 3457 }, { "epoch": 0.7402531374595275, "grad_norm": 0.1518872142755112, "learning_rate": 6.4160045961011664e-06, "loss": 0.6959, "step": 3458 }, { "epoch": 0.7404672071927431, "grad_norm": 0.15155101550663358, "learning_rate": 6.406034623140078e-06, "loss": 0.7016, "step": 3459 }, { "epoch": 0.7406812769259586, "grad_norm": 0.1517575123736666, "learning_rate": 6.396070925128532e-06, "loss": 0.6925, "step": 3460 }, { "epoch": 0.7408953466591742, "grad_norm": 0.14763084958866032, "learning_rate": 6.386113506665737e-06, "loss": 0.6997, "step": 3461 }, { "epoch": 0.7411094163923898, "grad_norm": 0.15559156433787158, "learning_rate": 6.376162372348032e-06, "loss": 0.6639, "step": 3462 }, { "epoch": 0.7413234861256054, "grad_norm": 0.15121447903508906, "learning_rate": 6.36621752676883e-06, "loss": 0.701, "step": 3463 }, { "epoch": 0.741537555858821, "grad_norm": 0.15143520224027426, "learning_rate": 6.356278974518659e-06, "loss": 0.6859, "step": 3464 }, { "epoch": 0.7417516255920366, "grad_norm": 0.15200496557556217, "learning_rate": 6.346346720185146e-06, "loss": 0.6891, "step": 3465 }, { "epoch": 0.7419656953252523, "grad_norm": 0.15178547620092442, "learning_rate": 6.336420768352984e-06, "loss": 0.7108, "step": 3466 }, { "epoch": 0.7421797650584678, "grad_norm": 0.1455060733596948, "learning_rate": 6.326501123603986e-06, "loss": 0.6763, "step": 3467 }, { "epoch": 0.7423938347916834, "grad_norm": 0.15051287681302894, "learning_rate": 6.316587790517044e-06, "loss": 0.7349, "step": 3468 }, { "epoch": 0.742607904524899, "grad_norm": 0.14343932085981198, "learning_rate": 6.3066807736681215e-06, "loss": 0.6908, "step": 3469 }, { "epoch": 0.7428219742581146, "grad_norm": 0.1503341811816976, "learning_rate": 6.296780077630289e-06, "loss": 0.6822, "step": 3470 }, { "epoch": 0.7430360439913302, "grad_norm": 0.14770233398877097, "learning_rate": 6.2868857069736935e-06, "loss": 0.6986, "step": 3471 }, { "epoch": 0.7432501137245457, "grad_norm": 0.14953962426904005, "learning_rate": 6.276997666265547e-06, "loss": 0.6895, "step": 3472 }, { "epoch": 0.7434641834577613, "grad_norm": 0.15064435272164417, "learning_rate": 6.267115960070165e-06, "loss": 0.7043, "step": 3473 }, { "epoch": 0.7436782531909769, "grad_norm": 0.14900797917823344, "learning_rate": 6.257240592948908e-06, "loss": 0.7116, "step": 3474 }, { "epoch": 0.7438923229241926, "grad_norm": 0.14859530069751684, "learning_rate": 6.247371569460236e-06, "loss": 0.6833, "step": 3475 }, { "epoch": 0.7441063926574082, "grad_norm": 0.1536351887710474, "learning_rate": 6.23750889415968e-06, "loss": 0.6794, "step": 3476 }, { "epoch": 0.7443204623906238, "grad_norm": 0.14901655132083652, "learning_rate": 6.2276525715998184e-06, "loss": 0.6881, "step": 3477 }, { "epoch": 0.7445345321238394, "grad_norm": 0.14949396606953977, "learning_rate": 6.217802606330319e-06, "loss": 0.698, "step": 3478 }, { "epoch": 0.7447486018570549, "grad_norm": 0.1479567503230999, "learning_rate": 6.207959002897912e-06, "loss": 0.6676, "step": 3479 }, { "epoch": 0.7449626715902705, "grad_norm": 0.1461896823252663, "learning_rate": 6.1981217658463766e-06, "loss": 0.69, "step": 3480 }, { "epoch": 0.7451767413234861, "grad_norm": 0.14459359084047935, "learning_rate": 6.188290899716569e-06, "loss": 0.6888, "step": 3481 }, { "epoch": 0.7453908110567017, "grad_norm": 0.1486442535448886, "learning_rate": 6.1784664090464045e-06, "loss": 0.6891, "step": 3482 }, { "epoch": 0.7456048807899173, "grad_norm": 0.15391385653026315, "learning_rate": 6.168648298370839e-06, "loss": 0.7018, "step": 3483 }, { "epoch": 0.745818950523133, "grad_norm": 0.13931330424670904, "learning_rate": 6.1588365722218975e-06, "loss": 0.6633, "step": 3484 }, { "epoch": 0.7460330202563485, "grad_norm": 0.14574270828205177, "learning_rate": 6.149031235128667e-06, "loss": 0.7149, "step": 3485 }, { "epoch": 0.7462470899895641, "grad_norm": 0.14688827931739226, "learning_rate": 6.139232291617254e-06, "loss": 0.6902, "step": 3486 }, { "epoch": 0.7464611597227797, "grad_norm": 0.1503196751740802, "learning_rate": 6.129439746210848e-06, "loss": 0.7141, "step": 3487 }, { "epoch": 0.7466752294559953, "grad_norm": 0.14990597542397502, "learning_rate": 6.119653603429659e-06, "loss": 0.7168, "step": 3488 }, { "epoch": 0.7468892991892109, "grad_norm": 0.14781381138706368, "learning_rate": 6.109873867790957e-06, "loss": 0.6865, "step": 3489 }, { "epoch": 0.7471033689224265, "grad_norm": 0.1512508172162291, "learning_rate": 6.100100543809057e-06, "loss": 0.6991, "step": 3490 }, { "epoch": 0.747317438655642, "grad_norm": 0.14971574620785155, "learning_rate": 6.090333635995296e-06, "loss": 0.7168, "step": 3491 }, { "epoch": 0.7475315083888576, "grad_norm": 0.14662669096817155, "learning_rate": 6.080573148858071e-06, "loss": 0.6971, "step": 3492 }, { "epoch": 0.7477455781220733, "grad_norm": 0.1477528433065089, "learning_rate": 6.070819086902795e-06, "loss": 0.6814, "step": 3493 }, { "epoch": 0.7479596478552889, "grad_norm": 0.14861693160736386, "learning_rate": 6.06107145463193e-06, "loss": 0.6977, "step": 3494 }, { "epoch": 0.7481737175885045, "grad_norm": 0.15123908792964869, "learning_rate": 6.051330256544971e-06, "loss": 0.6637, "step": 3495 }, { "epoch": 0.7483877873217201, "grad_norm": 0.14176940130106536, "learning_rate": 6.041595497138424e-06, "loss": 0.704, "step": 3496 }, { "epoch": 0.7486018570549356, "grad_norm": 0.15762788664350239, "learning_rate": 6.031867180905852e-06, "loss": 0.7146, "step": 3497 }, { "epoch": 0.7488159267881512, "grad_norm": 0.14694967764663688, "learning_rate": 6.022145312337812e-06, "loss": 0.6589, "step": 3498 }, { "epoch": 0.7490299965213668, "grad_norm": 0.14380758565341722, "learning_rate": 6.0124298959219165e-06, "loss": 0.6629, "step": 3499 }, { "epoch": 0.7492440662545824, "grad_norm": 0.15062939581345539, "learning_rate": 6.002720936142767e-06, "loss": 0.6876, "step": 3500 }, { "epoch": 0.749458135987798, "grad_norm": 0.15310027955477912, "learning_rate": 5.9930184374820125e-06, "loss": 0.7018, "step": 3501 }, { "epoch": 0.7496722057210137, "grad_norm": 0.14450896111185574, "learning_rate": 5.98332240441831e-06, "loss": 0.6619, "step": 3502 }, { "epoch": 0.7498862754542293, "grad_norm": 0.15373891111271507, "learning_rate": 5.973632841427324e-06, "loss": 0.7045, "step": 3503 }, { "epoch": 0.7501003451874448, "grad_norm": 0.15075910232264875, "learning_rate": 5.963949752981746e-06, "loss": 0.6976, "step": 3504 }, { "epoch": 0.7503144149206604, "grad_norm": 0.14762413292381302, "learning_rate": 5.954273143551264e-06, "loss": 0.676, "step": 3505 }, { "epoch": 0.750528484653876, "grad_norm": 0.1509472898776307, "learning_rate": 5.944603017602586e-06, "loss": 0.705, "step": 3506 }, { "epoch": 0.7507425543870916, "grad_norm": 0.1598886546934672, "learning_rate": 5.934939379599431e-06, "loss": 0.7103, "step": 3507 }, { "epoch": 0.7509566241203072, "grad_norm": 0.14834173668501452, "learning_rate": 5.925282234002505e-06, "loss": 0.6667, "step": 3508 }, { "epoch": 0.7511706938535228, "grad_norm": 0.14871353345344307, "learning_rate": 5.915631585269543e-06, "loss": 0.677, "step": 3509 }, { "epoch": 0.7513847635867383, "grad_norm": 0.1455205068481264, "learning_rate": 5.905987437855252e-06, "loss": 0.694, "step": 3510 }, { "epoch": 0.751598833319954, "grad_norm": 0.1433319837719959, "learning_rate": 5.896349796211358e-06, "loss": 0.6931, "step": 3511 }, { "epoch": 0.7518129030531696, "grad_norm": 0.14993155880692113, "learning_rate": 5.8867186647865885e-06, "loss": 0.6669, "step": 3512 }, { "epoch": 0.7520269727863852, "grad_norm": 0.15340387858874688, "learning_rate": 5.877094048026641e-06, "loss": 0.6857, "step": 3513 }, { "epoch": 0.7522410425196008, "grad_norm": 0.14781702867549093, "learning_rate": 5.867475950374233e-06, "loss": 0.6903, "step": 3514 }, { "epoch": 0.7524551122528164, "grad_norm": 0.15672579984067628, "learning_rate": 5.857864376269051e-06, "loss": 0.6975, "step": 3515 }, { "epoch": 0.7526691819860319, "grad_norm": 0.15518680035157378, "learning_rate": 5.848259330147785e-06, "loss": 0.7203, "step": 3516 }, { "epoch": 0.7528832517192475, "grad_norm": 0.14822640603276857, "learning_rate": 5.83866081644411e-06, "loss": 0.6938, "step": 3517 }, { "epoch": 0.7530973214524631, "grad_norm": 0.15473743140564133, "learning_rate": 5.829068839588676e-06, "loss": 0.7144, "step": 3518 }, { "epoch": 0.7533113911856787, "grad_norm": 0.15380435570663145, "learning_rate": 5.81948340400913e-06, "loss": 0.6952, "step": 3519 }, { "epoch": 0.7535254609188944, "grad_norm": 0.149614534669644, "learning_rate": 5.809904514130078e-06, "loss": 0.6814, "step": 3520 }, { "epoch": 0.75373953065211, "grad_norm": 0.1516527425931899, "learning_rate": 5.800332174373129e-06, "loss": 0.6785, "step": 3521 }, { "epoch": 0.7539536003853256, "grad_norm": 0.14958597241174026, "learning_rate": 5.790766389156859e-06, "loss": 0.6863, "step": 3522 }, { "epoch": 0.7541676701185411, "grad_norm": 0.1455880171340645, "learning_rate": 5.781207162896807e-06, "loss": 0.6779, "step": 3523 }, { "epoch": 0.7543817398517567, "grad_norm": 0.1481194485704188, "learning_rate": 5.7716545000055056e-06, "loss": 0.6966, "step": 3524 }, { "epoch": 0.7545958095849723, "grad_norm": 0.1453422681248648, "learning_rate": 5.762108404892437e-06, "loss": 0.6788, "step": 3525 }, { "epoch": 0.7548098793181879, "grad_norm": 0.141303964684086, "learning_rate": 5.752568881964065e-06, "loss": 0.6647, "step": 3526 }, { "epoch": 0.7550239490514035, "grad_norm": 0.1465381438690067, "learning_rate": 5.74303593562382e-06, "loss": 0.7006, "step": 3527 }, { "epoch": 0.755238018784619, "grad_norm": 0.14717689421418012, "learning_rate": 5.733509570272085e-06, "loss": 0.706, "step": 3528 }, { "epoch": 0.7554520885178346, "grad_norm": 0.145311171036877, "learning_rate": 5.7239897903062195e-06, "loss": 0.685, "step": 3529 }, { "epoch": 0.7556661582510503, "grad_norm": 0.13975117168116696, "learning_rate": 5.714476600120531e-06, "loss": 0.6734, "step": 3530 }, { "epoch": 0.7558802279842659, "grad_norm": 0.15204346152954393, "learning_rate": 5.7049700041062896e-06, "loss": 0.7228, "step": 3531 }, { "epoch": 0.7560942977174815, "grad_norm": 0.14964212802659002, "learning_rate": 5.695470006651736e-06, "loss": 0.7265, "step": 3532 }, { "epoch": 0.7563083674506971, "grad_norm": 0.14141466999538752, "learning_rate": 5.685976612142033e-06, "loss": 0.693, "step": 3533 }, { "epoch": 0.7565224371839127, "grad_norm": 0.1428511091199509, "learning_rate": 5.67648982495933e-06, "loss": 0.6766, "step": 3534 }, { "epoch": 0.7567365069171282, "grad_norm": 0.15061562224777444, "learning_rate": 5.667009649482698e-06, "loss": 0.6989, "step": 3535 }, { "epoch": 0.7569505766503438, "grad_norm": 0.14920844910110417, "learning_rate": 5.65753609008818e-06, "loss": 0.7145, "step": 3536 }, { "epoch": 0.7571646463835594, "grad_norm": 0.1862597364757916, "learning_rate": 5.6480691511487404e-06, "loss": 0.6871, "step": 3537 }, { "epoch": 0.757378716116775, "grad_norm": 0.15082101946973378, "learning_rate": 5.638608837034309e-06, "loss": 0.7031, "step": 3538 }, { "epoch": 0.7575927858499907, "grad_norm": 0.14885222190568342, "learning_rate": 5.629155152111756e-06, "loss": 0.6708, "step": 3539 }, { "epoch": 0.7578068555832063, "grad_norm": 0.14946031097612494, "learning_rate": 5.619708100744871e-06, "loss": 0.6998, "step": 3540 }, { "epoch": 0.7580209253164218, "grad_norm": 0.15150139368910043, "learning_rate": 5.6102676872944105e-06, "loss": 0.6862, "step": 3541 }, { "epoch": 0.7582349950496374, "grad_norm": 0.14528120780814796, "learning_rate": 5.600833916118036e-06, "loss": 0.6926, "step": 3542 }, { "epoch": 0.758449064782853, "grad_norm": 0.1478792522908929, "learning_rate": 5.591406791570368e-06, "loss": 0.6757, "step": 3543 }, { "epoch": 0.7586631345160686, "grad_norm": 0.15479992966447959, "learning_rate": 5.581986318002954e-06, "loss": 0.7115, "step": 3544 }, { "epoch": 0.7588772042492842, "grad_norm": 0.14778186516095093, "learning_rate": 5.572572499764258e-06, "loss": 0.6631, "step": 3545 }, { "epoch": 0.7590912739824998, "grad_norm": 0.15514968870863632, "learning_rate": 5.56316534119969e-06, "loss": 0.723, "step": 3546 }, { "epoch": 0.7593053437157153, "grad_norm": 0.15399317871950186, "learning_rate": 5.553764846651568e-06, "loss": 0.6834, "step": 3547 }, { "epoch": 0.759519413448931, "grad_norm": 0.14610405195629494, "learning_rate": 5.544371020459147e-06, "loss": 0.6949, "step": 3548 }, { "epoch": 0.7597334831821466, "grad_norm": 0.1550757297618627, "learning_rate": 5.534983866958608e-06, "loss": 0.7034, "step": 3549 }, { "epoch": 0.7599475529153622, "grad_norm": 0.15249826727832982, "learning_rate": 5.52560339048303e-06, "loss": 0.6802, "step": 3550 }, { "epoch": 0.7601616226485778, "grad_norm": 0.15990361646423798, "learning_rate": 5.51622959536243e-06, "loss": 0.6665, "step": 3551 }, { "epoch": 0.7603756923817934, "grad_norm": 0.15675674451816746, "learning_rate": 5.506862485923743e-06, "loss": 0.7085, "step": 3552 }, { "epoch": 0.760589762115009, "grad_norm": 0.15425741849393068, "learning_rate": 5.497502066490794e-06, "loss": 0.7043, "step": 3553 }, { "epoch": 0.7608038318482245, "grad_norm": 0.14732362439572336, "learning_rate": 5.488148341384343e-06, "loss": 0.6942, "step": 3554 }, { "epoch": 0.7610179015814401, "grad_norm": 0.15462024258481727, "learning_rate": 5.47880131492206e-06, "loss": 0.6877, "step": 3555 }, { "epoch": 0.7612319713146557, "grad_norm": 0.1488457724029461, "learning_rate": 5.469460991418501e-06, "loss": 0.6778, "step": 3556 }, { "epoch": 0.7614460410478714, "grad_norm": 0.1463214204438405, "learning_rate": 5.460127375185149e-06, "loss": 0.7052, "step": 3557 }, { "epoch": 0.761660110781087, "grad_norm": 0.15787772253742774, "learning_rate": 5.450800470530391e-06, "loss": 0.7364, "step": 3558 }, { "epoch": 0.7618741805143026, "grad_norm": 0.15108225763316432, "learning_rate": 5.441480281759497e-06, "loss": 0.692, "step": 3559 }, { "epoch": 0.7620882502475181, "grad_norm": 0.14680756999056407, "learning_rate": 5.43216681317466e-06, "loss": 0.6819, "step": 3560 }, { "epoch": 0.7623023199807337, "grad_norm": 0.15136912613020453, "learning_rate": 5.422860069074949e-06, "loss": 0.7046, "step": 3561 }, { "epoch": 0.7625163897139493, "grad_norm": 0.14723621183647193, "learning_rate": 5.413560053756344e-06, "loss": 0.6712, "step": 3562 }, { "epoch": 0.7627304594471649, "grad_norm": 0.14575649289043383, "learning_rate": 5.404266771511724e-06, "loss": 0.6831, "step": 3563 }, { "epoch": 0.7629445291803805, "grad_norm": 0.14985340867888913, "learning_rate": 5.394980226630837e-06, "loss": 0.6907, "step": 3564 }, { "epoch": 0.763158598913596, "grad_norm": 0.21399373667467642, "learning_rate": 5.385700423400342e-06, "loss": 0.6851, "step": 3565 }, { "epoch": 0.7633726686468117, "grad_norm": 0.14587068917445511, "learning_rate": 5.376427366103785e-06, "loss": 0.6746, "step": 3566 }, { "epoch": 0.7635867383800273, "grad_norm": 0.14772214126312302, "learning_rate": 5.367161059021579e-06, "loss": 0.6807, "step": 3567 }, { "epoch": 0.7638008081132429, "grad_norm": 0.15104290541918627, "learning_rate": 5.357901506431045e-06, "loss": 0.6925, "step": 3568 }, { "epoch": 0.7640148778464585, "grad_norm": 0.1460490001623541, "learning_rate": 5.348648712606377e-06, "loss": 0.6606, "step": 3569 }, { "epoch": 0.7642289475796741, "grad_norm": 0.14996309101222452, "learning_rate": 5.339402681818635e-06, "loss": 0.6921, "step": 3570 }, { "epoch": 0.7644430173128897, "grad_norm": 0.15161998327531107, "learning_rate": 5.330163418335785e-06, "loss": 0.6887, "step": 3571 }, { "epoch": 0.7646570870461052, "grad_norm": 0.15116850976620883, "learning_rate": 5.3209309264226405e-06, "loss": 0.6967, "step": 3572 }, { "epoch": 0.7648711567793208, "grad_norm": 0.1525044548119896, "learning_rate": 5.311705210340909e-06, "loss": 0.6929, "step": 3573 }, { "epoch": 0.7650852265125364, "grad_norm": 0.14448107558451886, "learning_rate": 5.302486274349172e-06, "loss": 0.6904, "step": 3574 }, { "epoch": 0.7652992962457521, "grad_norm": 0.15287788455081394, "learning_rate": 5.293274122702858e-06, "loss": 0.6758, "step": 3575 }, { "epoch": 0.7655133659789677, "grad_norm": 0.144367658280424, "learning_rate": 5.284068759654295e-06, "loss": 0.7035, "step": 3576 }, { "epoch": 0.7657274357121833, "grad_norm": 0.14739219572103643, "learning_rate": 5.274870189452648e-06, "loss": 0.7131, "step": 3577 }, { "epoch": 0.7659415054453989, "grad_norm": 0.15153468382661264, "learning_rate": 5.2656784163439715e-06, "loss": 0.6855, "step": 3578 }, { "epoch": 0.7661555751786144, "grad_norm": 0.14472458441935257, "learning_rate": 5.25649344457116e-06, "loss": 0.6678, "step": 3579 }, { "epoch": 0.76636964491183, "grad_norm": 0.1423686618276698, "learning_rate": 5.247315278373983e-06, "loss": 0.6645, "step": 3580 }, { "epoch": 0.7665837146450456, "grad_norm": 0.1561284936350141, "learning_rate": 5.238143921989076e-06, "loss": 0.7006, "step": 3581 }, { "epoch": 0.7667977843782612, "grad_norm": 0.14694574730059415, "learning_rate": 5.228979379649906e-06, "loss": 0.6965, "step": 3582 }, { "epoch": 0.7670118541114768, "grad_norm": 0.14965368879489102, "learning_rate": 5.219821655586821e-06, "loss": 0.6786, "step": 3583 }, { "epoch": 0.7672259238446925, "grad_norm": 0.15153940337772823, "learning_rate": 5.210670754026996e-06, "loss": 0.69, "step": 3584 }, { "epoch": 0.767439993577908, "grad_norm": 0.1476311981634055, "learning_rate": 5.20152667919448e-06, "loss": 0.7052, "step": 3585 }, { "epoch": 0.7676540633111236, "grad_norm": 0.14386225508385378, "learning_rate": 5.192389435310165e-06, "loss": 0.6789, "step": 3586 }, { "epoch": 0.7678681330443392, "grad_norm": 0.16161601365398967, "learning_rate": 5.183259026591774e-06, "loss": 0.7124, "step": 3587 }, { "epoch": 0.7680822027775548, "grad_norm": 0.1429500331562117, "learning_rate": 5.174135457253899e-06, "loss": 0.6885, "step": 3588 }, { "epoch": 0.7682962725107704, "grad_norm": 0.14901454804148337, "learning_rate": 5.1650187315079495e-06, "loss": 0.6823, "step": 3589 }, { "epoch": 0.768510342243986, "grad_norm": 0.15060970553205538, "learning_rate": 5.155908853562199e-06, "loss": 0.6605, "step": 3590 }, { "epoch": 0.7687244119772015, "grad_norm": 0.15125526613464874, "learning_rate": 5.146805827621755e-06, "loss": 0.6704, "step": 3591 }, { "epoch": 0.7689384817104171, "grad_norm": 0.1452066765752053, "learning_rate": 5.137709657888543e-06, "loss": 0.6759, "step": 3592 }, { "epoch": 0.7691525514436328, "grad_norm": 0.1515118732805631, "learning_rate": 5.1286203485613525e-06, "loss": 0.6783, "step": 3593 }, { "epoch": 0.7693666211768484, "grad_norm": 0.1538291655720675, "learning_rate": 5.1195379038357825e-06, "loss": 0.6862, "step": 3594 }, { "epoch": 0.769580690910064, "grad_norm": 0.14378164556618328, "learning_rate": 5.110462327904275e-06, "loss": 0.6944, "step": 3595 }, { "epoch": 0.7697947606432796, "grad_norm": 0.15453963403424745, "learning_rate": 5.101393624956106e-06, "loss": 0.7054, "step": 3596 }, { "epoch": 0.7700088303764951, "grad_norm": 0.15560221937755805, "learning_rate": 5.092331799177361e-06, "loss": 0.7042, "step": 3597 }, { "epoch": 0.7702229001097107, "grad_norm": 0.14812052992953495, "learning_rate": 5.083276854750974e-06, "loss": 0.6854, "step": 3598 }, { "epoch": 0.7704369698429263, "grad_norm": 0.14777507490410358, "learning_rate": 5.074228795856679e-06, "loss": 0.6728, "step": 3599 }, { "epoch": 0.7706510395761419, "grad_norm": 0.15288474595976217, "learning_rate": 5.065187626671048e-06, "loss": 0.7063, "step": 3600 }, { "epoch": 0.7708651093093575, "grad_norm": 0.1502097967002784, "learning_rate": 5.056153351367477e-06, "loss": 0.7021, "step": 3601 }, { "epoch": 0.7710791790425732, "grad_norm": 0.17267344323599498, "learning_rate": 5.047125974116156e-06, "loss": 0.6868, "step": 3602 }, { "epoch": 0.7712932487757888, "grad_norm": 0.1481410798670096, "learning_rate": 5.038105499084119e-06, "loss": 0.6715, "step": 3603 }, { "epoch": 0.7715073185090043, "grad_norm": 0.14471274871786427, "learning_rate": 5.02909193043519e-06, "loss": 0.6961, "step": 3604 }, { "epoch": 0.7717213882422199, "grad_norm": 0.14253817472206023, "learning_rate": 5.02008527233002e-06, "loss": 0.6856, "step": 3605 }, { "epoch": 0.7719354579754355, "grad_norm": 0.1454712497078495, "learning_rate": 5.0110855289260715e-06, "loss": 0.6811, "step": 3606 }, { "epoch": 0.7721495277086511, "grad_norm": 0.14372760835458082, "learning_rate": 5.002092704377599e-06, "loss": 0.6977, "step": 3607 }, { "epoch": 0.7723635974418667, "grad_norm": 0.14137880497796756, "learning_rate": 4.993106802835686e-06, "loss": 0.6872, "step": 3608 }, { "epoch": 0.7725776671750822, "grad_norm": 0.1443860306366555, "learning_rate": 4.984127828448196e-06, "loss": 0.6845, "step": 3609 }, { "epoch": 0.7727917369082978, "grad_norm": 0.1495619821833103, "learning_rate": 4.9751557853598105e-06, "loss": 0.7199, "step": 3610 }, { "epoch": 0.7730058066415135, "grad_norm": 0.14246740084591147, "learning_rate": 4.966190677712019e-06, "loss": 0.6526, "step": 3611 }, { "epoch": 0.7732198763747291, "grad_norm": 0.14280730977428982, "learning_rate": 4.957232509643082e-06, "loss": 0.6958, "step": 3612 }, { "epoch": 0.7734339461079447, "grad_norm": 0.1444098725201292, "learning_rate": 4.94828128528809e-06, "loss": 0.6879, "step": 3613 }, { "epoch": 0.7736480158411603, "grad_norm": 0.15274940038812987, "learning_rate": 4.939337008778895e-06, "loss": 0.6712, "step": 3614 }, { "epoch": 0.7738620855743759, "grad_norm": 0.14534139764508106, "learning_rate": 4.9303996842441695e-06, "loss": 0.6927, "step": 3615 }, { "epoch": 0.7740761553075914, "grad_norm": 0.16815580329923768, "learning_rate": 4.921469315809369e-06, "loss": 0.7049, "step": 3616 }, { "epoch": 0.774290225040807, "grad_norm": 0.14971565575094634, "learning_rate": 4.912545907596722e-06, "loss": 0.71, "step": 3617 }, { "epoch": 0.7745042947740226, "grad_norm": 0.14539530061684675, "learning_rate": 4.903629463725274e-06, "loss": 0.6774, "step": 3618 }, { "epoch": 0.7747183645072382, "grad_norm": 0.14651811235383488, "learning_rate": 4.894719988310823e-06, "loss": 0.7002, "step": 3619 }, { "epoch": 0.7749324342404539, "grad_norm": 0.1449687047863747, "learning_rate": 4.8858174854659804e-06, "loss": 0.6979, "step": 3620 }, { "epoch": 0.7751465039736695, "grad_norm": 0.14878444311342331, "learning_rate": 4.8769219593001135e-06, "loss": 0.6834, "step": 3621 }, { "epoch": 0.775360573706885, "grad_norm": 0.14637069318606644, "learning_rate": 4.868033413919386e-06, "loss": 0.7114, "step": 3622 }, { "epoch": 0.7755746434401006, "grad_norm": 0.14602603522843896, "learning_rate": 4.85915185342674e-06, "loss": 0.7031, "step": 3623 }, { "epoch": 0.7757887131733162, "grad_norm": 0.14681099392977884, "learning_rate": 4.850277281921876e-06, "loss": 0.712, "step": 3624 }, { "epoch": 0.7760027829065318, "grad_norm": 0.1454575177981704, "learning_rate": 4.841409703501292e-06, "loss": 0.6961, "step": 3625 }, { "epoch": 0.7762168526397474, "grad_norm": 0.14564331273396205, "learning_rate": 4.832549122258234e-06, "loss": 0.6725, "step": 3626 }, { "epoch": 0.776430922372963, "grad_norm": 0.15854855672454377, "learning_rate": 4.823695542282738e-06, "loss": 0.7169, "step": 3627 }, { "epoch": 0.7766449921061785, "grad_norm": 0.14785777387749188, "learning_rate": 4.8148489676616025e-06, "loss": 0.679, "step": 3628 }, { "epoch": 0.7768590618393941, "grad_norm": 0.1412872034267171, "learning_rate": 4.80600940247838e-06, "loss": 0.6825, "step": 3629 }, { "epoch": 0.7770731315726098, "grad_norm": 0.16532232539661437, "learning_rate": 4.79717685081341e-06, "loss": 0.7056, "step": 3630 }, { "epoch": 0.7772872013058254, "grad_norm": 0.14848016296659505, "learning_rate": 4.788351316743769e-06, "loss": 0.6657, "step": 3631 }, { "epoch": 0.777501271039041, "grad_norm": 0.14375141099044916, "learning_rate": 4.7795328043433166e-06, "loss": 0.6826, "step": 3632 }, { "epoch": 0.7777153407722566, "grad_norm": 0.15246816576983932, "learning_rate": 4.770721317682663e-06, "loss": 0.6778, "step": 3633 }, { "epoch": 0.7779294105054722, "grad_norm": 0.14619019412293158, "learning_rate": 4.7619168608291655e-06, "loss": 0.7208, "step": 3634 }, { "epoch": 0.7781434802386877, "grad_norm": 0.14514720848243715, "learning_rate": 4.753119437846951e-06, "loss": 0.683, "step": 3635 }, { "epoch": 0.7783575499719033, "grad_norm": 0.13991836419741208, "learning_rate": 4.744329052796899e-06, "loss": 0.706, "step": 3636 }, { "epoch": 0.7785716197051189, "grad_norm": 0.14645596734362765, "learning_rate": 4.735545709736624e-06, "loss": 0.6869, "step": 3637 }, { "epoch": 0.7787856894383345, "grad_norm": 0.14443721378800692, "learning_rate": 4.726769412720506e-06, "loss": 0.6845, "step": 3638 }, { "epoch": 0.7789997591715502, "grad_norm": 0.14559633643361072, "learning_rate": 4.7180001657996745e-06, "loss": 0.6921, "step": 3639 }, { "epoch": 0.7792138289047658, "grad_norm": 0.14773278671770956, "learning_rate": 4.7092379730219874e-06, "loss": 0.6891, "step": 3640 }, { "epoch": 0.7794278986379813, "grad_norm": 0.13550999742590666, "learning_rate": 4.700482838432059e-06, "loss": 0.68, "step": 3641 }, { "epoch": 0.7796419683711969, "grad_norm": 0.14702684097174573, "learning_rate": 4.691734766071252e-06, "loss": 0.6797, "step": 3642 }, { "epoch": 0.7798560381044125, "grad_norm": 0.14445549825886153, "learning_rate": 4.682993759977648e-06, "loss": 0.6889, "step": 3643 }, { "epoch": 0.7800701078376281, "grad_norm": 0.14631527443199774, "learning_rate": 4.6742598241860875e-06, "loss": 0.7227, "step": 3644 }, { "epoch": 0.7802841775708437, "grad_norm": 0.14902162597253918, "learning_rate": 4.665532962728141e-06, "loss": 0.6964, "step": 3645 }, { "epoch": 0.7804982473040593, "grad_norm": 0.14566734344008586, "learning_rate": 4.656813179632102e-06, "loss": 0.6993, "step": 3646 }, { "epoch": 0.7807123170372748, "grad_norm": 0.1440681304391279, "learning_rate": 4.648100478923014e-06, "loss": 0.7002, "step": 3647 }, { "epoch": 0.7809263867704905, "grad_norm": 0.14337501249023385, "learning_rate": 4.639394864622646e-06, "loss": 0.6801, "step": 3648 }, { "epoch": 0.7811404565037061, "grad_norm": 0.14444266543344936, "learning_rate": 4.6306963407494855e-06, "loss": 0.6754, "step": 3649 }, { "epoch": 0.7813545262369217, "grad_norm": 0.14726334608406041, "learning_rate": 4.6220049113187644e-06, "loss": 0.6977, "step": 3650 }, { "epoch": 0.7815685959701373, "grad_norm": 0.14200863975157607, "learning_rate": 4.613320580342422e-06, "loss": 0.6766, "step": 3651 }, { "epoch": 0.7817826657033529, "grad_norm": 0.14223219680662605, "learning_rate": 4.60464335182913e-06, "loss": 0.6895, "step": 3652 }, { "epoch": 0.7819967354365684, "grad_norm": 0.143881909479297, "learning_rate": 4.595973229784291e-06, "loss": 0.6703, "step": 3653 }, { "epoch": 0.782210805169784, "grad_norm": 0.14389924843123095, "learning_rate": 4.587310218210008e-06, "loss": 0.6677, "step": 3654 }, { "epoch": 0.7824248749029996, "grad_norm": 0.1521399356603649, "learning_rate": 4.578654321105118e-06, "loss": 0.6975, "step": 3655 }, { "epoch": 0.7826389446362152, "grad_norm": 0.15069367842892362, "learning_rate": 4.5700055424651594e-06, "loss": 0.7117, "step": 3656 }, { "epoch": 0.7828530143694309, "grad_norm": 0.14967374826431096, "learning_rate": 4.561363886282393e-06, "loss": 0.6847, "step": 3657 }, { "epoch": 0.7830670841026465, "grad_norm": 0.1441688220108929, "learning_rate": 4.552729356545804e-06, "loss": 0.6967, "step": 3658 }, { "epoch": 0.783281153835862, "grad_norm": 0.14632044198323957, "learning_rate": 4.54410195724106e-06, "loss": 0.6826, "step": 3659 }, { "epoch": 0.7834952235690776, "grad_norm": 0.14638171754928977, "learning_rate": 4.535481692350565e-06, "loss": 0.6952, "step": 3660 }, { "epoch": 0.7837092933022932, "grad_norm": 0.14501938688256968, "learning_rate": 4.526868565853406e-06, "loss": 0.7029, "step": 3661 }, { "epoch": 0.7839233630355088, "grad_norm": 0.1469192786686249, "learning_rate": 4.518262581725399e-06, "loss": 0.7042, "step": 3662 }, { "epoch": 0.7841374327687244, "grad_norm": 0.22081669649556304, "learning_rate": 4.5096637439390365e-06, "loss": 0.6984, "step": 3663 }, { "epoch": 0.78435150250194, "grad_norm": 0.14377410961623974, "learning_rate": 4.501072056463536e-06, "loss": 0.6945, "step": 3664 }, { "epoch": 0.7845655722351556, "grad_norm": 0.13757389187353808, "learning_rate": 4.492487523264806e-06, "loss": 0.6571, "step": 3665 }, { "epoch": 0.7847796419683712, "grad_norm": 0.1395342436130252, "learning_rate": 4.483910148305441e-06, "loss": 0.6856, "step": 3666 }, { "epoch": 0.7849937117015868, "grad_norm": 0.14693427866391, "learning_rate": 4.4753399355447556e-06, "loss": 0.6679, "step": 3667 }, { "epoch": 0.7852077814348024, "grad_norm": 0.1455743255241307, "learning_rate": 4.466776888938731e-06, "loss": 0.694, "step": 3668 }, { "epoch": 0.785421851168018, "grad_norm": 0.19106409949001513, "learning_rate": 4.45822101244006e-06, "loss": 0.6908, "step": 3669 }, { "epoch": 0.7856359209012336, "grad_norm": 0.14056268608495565, "learning_rate": 4.449672309998125e-06, "loss": 0.6956, "step": 3670 }, { "epoch": 0.7858499906344492, "grad_norm": 0.1398491797699803, "learning_rate": 4.441130785558981e-06, "loss": 0.7018, "step": 3671 }, { "epoch": 0.7860640603676647, "grad_norm": 0.18205320695841806, "learning_rate": 4.432596443065389e-06, "loss": 0.6693, "step": 3672 }, { "epoch": 0.7862781301008803, "grad_norm": 0.14921981320338992, "learning_rate": 4.4240692864567755e-06, "loss": 0.7079, "step": 3673 }, { "epoch": 0.7864921998340959, "grad_norm": 0.14413740783108842, "learning_rate": 4.415549319669268e-06, "loss": 0.7093, "step": 3674 }, { "epoch": 0.7867062695673116, "grad_norm": 0.1446170870345179, "learning_rate": 4.40703654663567e-06, "loss": 0.697, "step": 3675 }, { "epoch": 0.7869203393005272, "grad_norm": 0.14357160578361775, "learning_rate": 4.398530971285453e-06, "loss": 0.6662, "step": 3676 }, { "epoch": 0.7871344090337428, "grad_norm": 0.14400881820519595, "learning_rate": 4.390032597544787e-06, "loss": 0.7033, "step": 3677 }, { "epoch": 0.7873484787669583, "grad_norm": 0.1451046800820446, "learning_rate": 4.381541429336491e-06, "loss": 0.6656, "step": 3678 }, { "epoch": 0.7875625485001739, "grad_norm": 0.137583112108047, "learning_rate": 4.373057470580082e-06, "loss": 0.6596, "step": 3679 }, { "epoch": 0.7877766182333895, "grad_norm": 0.14469552651510104, "learning_rate": 4.364580725191743e-06, "loss": 0.6877, "step": 3680 }, { "epoch": 0.7879906879666051, "grad_norm": 0.14247038821039348, "learning_rate": 4.356111197084317e-06, "loss": 0.6792, "step": 3681 }, { "epoch": 0.7882047576998207, "grad_norm": 0.1468007516027795, "learning_rate": 4.347648890167326e-06, "loss": 0.6646, "step": 3682 }, { "epoch": 0.7884188274330363, "grad_norm": 0.1402996787144692, "learning_rate": 4.339193808346951e-06, "loss": 0.6779, "step": 3683 }, { "epoch": 0.788632897166252, "grad_norm": 0.14254646459308967, "learning_rate": 4.330745955526045e-06, "loss": 0.6596, "step": 3684 }, { "epoch": 0.7888469668994675, "grad_norm": 0.14631651078258634, "learning_rate": 4.3223053356041315e-06, "loss": 0.6739, "step": 3685 }, { "epoch": 0.7890610366326831, "grad_norm": 0.16083493154870732, "learning_rate": 4.313871952477367e-06, "loss": 0.6578, "step": 3686 }, { "epoch": 0.7892751063658987, "grad_norm": 0.19634635903316577, "learning_rate": 4.3054458100385996e-06, "loss": 0.7058, "step": 3687 }, { "epoch": 0.7894891760991143, "grad_norm": 0.17472902085358755, "learning_rate": 4.2970269121773135e-06, "loss": 0.6827, "step": 3688 }, { "epoch": 0.7897032458323299, "grad_norm": 0.1443233124034339, "learning_rate": 4.288615262779656e-06, "loss": 0.688, "step": 3689 }, { "epoch": 0.7899173155655455, "grad_norm": 0.1458702048708569, "learning_rate": 4.28021086572844e-06, "loss": 0.6996, "step": 3690 }, { "epoch": 0.790131385298761, "grad_norm": 0.14680820313500526, "learning_rate": 4.271813724903106e-06, "loss": 0.6925, "step": 3691 }, { "epoch": 0.7903454550319766, "grad_norm": 0.14795281151426107, "learning_rate": 4.26342384417977e-06, "loss": 0.6841, "step": 3692 }, { "epoch": 0.7905595247651923, "grad_norm": 0.14621841645765707, "learning_rate": 4.255041227431178e-06, "loss": 0.7052, "step": 3693 }, { "epoch": 0.7907735944984079, "grad_norm": 0.14424606937255216, "learning_rate": 4.2466658785267304e-06, "loss": 0.6895, "step": 3694 }, { "epoch": 0.7909876642316235, "grad_norm": 0.1442743584183758, "learning_rate": 4.238297801332483e-06, "loss": 0.6983, "step": 3695 }, { "epoch": 0.7912017339648391, "grad_norm": 0.1516338560548041, "learning_rate": 4.22993699971111e-06, "loss": 0.6732, "step": 3696 }, { "epoch": 0.7914158036980546, "grad_norm": 0.1525905454680027, "learning_rate": 4.221583477521956e-06, "loss": 0.6873, "step": 3697 }, { "epoch": 0.7916298734312702, "grad_norm": 0.14743315246458855, "learning_rate": 4.21323723862098e-06, "loss": 0.7065, "step": 3698 }, { "epoch": 0.7918439431644858, "grad_norm": 0.17158018204275188, "learning_rate": 4.204898286860795e-06, "loss": 0.7114, "step": 3699 }, { "epoch": 0.7920580128977014, "grad_norm": 0.14403492705947102, "learning_rate": 4.1965666260906525e-06, "loss": 0.6848, "step": 3700 }, { "epoch": 0.792272082630917, "grad_norm": 0.14858960809544725, "learning_rate": 4.188242260156421e-06, "loss": 0.7141, "step": 3701 }, { "epoch": 0.7924861523641327, "grad_norm": 0.1475919588083457, "learning_rate": 4.1799251929006225e-06, "loss": 0.7067, "step": 3702 }, { "epoch": 0.7927002220973483, "grad_norm": 0.14173596377722034, "learning_rate": 4.17161542816239e-06, "loss": 0.6942, "step": 3703 }, { "epoch": 0.7929142918305638, "grad_norm": 0.14673351814957863, "learning_rate": 4.163312969777506e-06, "loss": 0.7086, "step": 3704 }, { "epoch": 0.7931283615637794, "grad_norm": 0.14623364280415502, "learning_rate": 4.155017821578362e-06, "loss": 0.7105, "step": 3705 }, { "epoch": 0.793342431296995, "grad_norm": 0.1471445843756772, "learning_rate": 4.146729987393982e-06, "loss": 0.6972, "step": 3706 }, { "epoch": 0.7935565010302106, "grad_norm": 0.14515934720938398, "learning_rate": 4.138449471050028e-06, "loss": 0.693, "step": 3707 }, { "epoch": 0.7937705707634262, "grad_norm": 0.14547294711745223, "learning_rate": 4.1301762763687556e-06, "loss": 0.7054, "step": 3708 }, { "epoch": 0.7939846404966417, "grad_norm": 0.14358553265992055, "learning_rate": 4.12191040716907e-06, "loss": 0.69, "step": 3709 }, { "epoch": 0.7941987102298573, "grad_norm": 0.14066686916955223, "learning_rate": 4.113651867266468e-06, "loss": 0.7061, "step": 3710 }, { "epoch": 0.794412779963073, "grad_norm": 0.14646119589548262, "learning_rate": 4.105400660473082e-06, "loss": 0.7019, "step": 3711 }, { "epoch": 0.7946268496962886, "grad_norm": 0.1506096934484361, "learning_rate": 4.09715679059766e-06, "loss": 0.6811, "step": 3712 }, { "epoch": 0.7948409194295042, "grad_norm": 0.13838487767472074, "learning_rate": 4.088920261445548e-06, "loss": 0.6626, "step": 3713 }, { "epoch": 0.7950549891627198, "grad_norm": 0.15062904687313375, "learning_rate": 4.080691076818719e-06, "loss": 0.7285, "step": 3714 }, { "epoch": 0.7952690588959354, "grad_norm": 0.14022059738133055, "learning_rate": 4.0724692405157505e-06, "loss": 0.6551, "step": 3715 }, { "epoch": 0.7954831286291509, "grad_norm": 0.1390790251765366, "learning_rate": 4.064254756331818e-06, "loss": 0.6612, "step": 3716 }, { "epoch": 0.7956971983623665, "grad_norm": 0.17527178930194584, "learning_rate": 4.056047628058726e-06, "loss": 0.6712, "step": 3717 }, { "epoch": 0.7959112680955821, "grad_norm": 0.14177816726424428, "learning_rate": 4.047847859484855e-06, "loss": 0.665, "step": 3718 }, { "epoch": 0.7961253378287977, "grad_norm": 0.1462702274505592, "learning_rate": 4.03965545439521e-06, "loss": 0.6947, "step": 3719 }, { "epoch": 0.7963394075620134, "grad_norm": 0.14625636632028322, "learning_rate": 4.031470416571397e-06, "loss": 0.6842, "step": 3720 }, { "epoch": 0.796553477295229, "grad_norm": 0.1479395399533281, "learning_rate": 4.023292749791603e-06, "loss": 0.7117, "step": 3721 }, { "epoch": 0.7967675470284445, "grad_norm": 0.14294806232725346, "learning_rate": 4.015122457830631e-06, "loss": 0.6782, "step": 3722 }, { "epoch": 0.7969816167616601, "grad_norm": 0.14391605199157265, "learning_rate": 4.006959544459874e-06, "loss": 0.6805, "step": 3723 }, { "epoch": 0.7971956864948757, "grad_norm": 0.1453150979786913, "learning_rate": 3.99880401344731e-06, "loss": 0.6634, "step": 3724 }, { "epoch": 0.7974097562280913, "grad_norm": 0.1453991275400545, "learning_rate": 3.990655868557522e-06, "loss": 0.6869, "step": 3725 }, { "epoch": 0.7976238259613069, "grad_norm": 0.1437959290913186, "learning_rate": 3.982515113551684e-06, "loss": 0.7075, "step": 3726 }, { "epoch": 0.7978378956945225, "grad_norm": 0.1460419031747847, "learning_rate": 3.9743817521875436e-06, "loss": 0.6918, "step": 3727 }, { "epoch": 0.798051965427738, "grad_norm": 0.14625730461489223, "learning_rate": 3.966255788219451e-06, "loss": 0.6822, "step": 3728 }, { "epoch": 0.7982660351609537, "grad_norm": 0.14315116744583717, "learning_rate": 3.958137225398339e-06, "loss": 0.6788, "step": 3729 }, { "epoch": 0.7984801048941693, "grad_norm": 0.14548552457502054, "learning_rate": 3.950026067471713e-06, "loss": 0.6888, "step": 3730 }, { "epoch": 0.7986941746273849, "grad_norm": 0.14504314778964464, "learning_rate": 3.941922318183675e-06, "loss": 0.6891, "step": 3731 }, { "epoch": 0.7989082443606005, "grad_norm": 0.14054258332500452, "learning_rate": 3.933825981274903e-06, "loss": 0.688, "step": 3732 }, { "epoch": 0.7991223140938161, "grad_norm": 0.14416063829175654, "learning_rate": 3.925737060482644e-06, "loss": 0.6849, "step": 3733 }, { "epoch": 0.7993363838270316, "grad_norm": 0.15232123685251384, "learning_rate": 3.917655559540738e-06, "loss": 0.6712, "step": 3734 }, { "epoch": 0.7995504535602472, "grad_norm": 0.14910757725890156, "learning_rate": 3.9095814821795805e-06, "loss": 0.7175, "step": 3735 }, { "epoch": 0.7997645232934628, "grad_norm": 0.14494711098002688, "learning_rate": 3.901514832126154e-06, "loss": 0.6852, "step": 3736 }, { "epoch": 0.7999785930266784, "grad_norm": 0.1453708082480703, "learning_rate": 3.893455613104021e-06, "loss": 0.6791, "step": 3737 }, { "epoch": 0.800192662759894, "grad_norm": 0.21346208746687895, "learning_rate": 3.885403828833283e-06, "loss": 0.6916, "step": 3738 }, { "epoch": 0.8004067324931097, "grad_norm": 0.1446229387767777, "learning_rate": 3.877359483030647e-06, "loss": 0.7044, "step": 3739 }, { "epoch": 0.8006208022263253, "grad_norm": 0.1474085393401029, "learning_rate": 3.8693225794093535e-06, "loss": 0.6994, "step": 3740 }, { "epoch": 0.8008348719595408, "grad_norm": 0.14436214299259978, "learning_rate": 3.86129312167923e-06, "loss": 0.6881, "step": 3741 }, { "epoch": 0.8010489416927564, "grad_norm": 0.14398485106569503, "learning_rate": 3.853271113546661e-06, "loss": 0.7168, "step": 3742 }, { "epoch": 0.801263011425972, "grad_norm": 0.1408527844385051, "learning_rate": 3.845256558714585e-06, "loss": 0.6899, "step": 3743 }, { "epoch": 0.8014770811591876, "grad_norm": 0.14478090729331433, "learning_rate": 3.837249460882515e-06, "loss": 0.6892, "step": 3744 }, { "epoch": 0.8016911508924032, "grad_norm": 0.14571803924285012, "learning_rate": 3.829249823746502e-06, "loss": 0.7181, "step": 3745 }, { "epoch": 0.8019052206256188, "grad_norm": 0.14419415353094814, "learning_rate": 3.821257650999171e-06, "loss": 0.6955, "step": 3746 }, { "epoch": 0.8021192903588343, "grad_norm": 0.14691963733543656, "learning_rate": 3.8132729463296892e-06, "loss": 0.6879, "step": 3747 }, { "epoch": 0.80233336009205, "grad_norm": 0.14784162164039757, "learning_rate": 3.8052957134237823e-06, "loss": 0.6946, "step": 3748 }, { "epoch": 0.8025474298252656, "grad_norm": 0.14621945815172976, "learning_rate": 3.7973259559637353e-06, "loss": 0.6793, "step": 3749 }, { "epoch": 0.8027614995584812, "grad_norm": 0.14157435080718195, "learning_rate": 3.7893636776283616e-06, "loss": 0.6733, "step": 3750 }, { "epoch": 0.8029755692916968, "grad_norm": 0.15148044838644467, "learning_rate": 3.781408882093045e-06, "loss": 0.6919, "step": 3751 }, { "epoch": 0.8031896390249124, "grad_norm": 0.14507699440240024, "learning_rate": 3.773461573029693e-06, "loss": 0.7086, "step": 3752 }, { "epoch": 0.8034037087581279, "grad_norm": 0.14049868694163933, "learning_rate": 3.765521754106776e-06, "loss": 0.6766, "step": 3753 }, { "epoch": 0.8036177784913435, "grad_norm": 0.13681091509930346, "learning_rate": 3.757589428989303e-06, "loss": 0.6648, "step": 3754 }, { "epoch": 0.8038318482245591, "grad_norm": 0.14202927095717371, "learning_rate": 3.7496646013388116e-06, "loss": 0.6815, "step": 3755 }, { "epoch": 0.8040459179577747, "grad_norm": 0.1423962517603676, "learning_rate": 3.741747274813399e-06, "loss": 0.7088, "step": 3756 }, { "epoch": 0.8042599876909904, "grad_norm": 0.1431857263635574, "learning_rate": 3.733837453067677e-06, "loss": 0.6978, "step": 3757 }, { "epoch": 0.804474057424206, "grad_norm": 0.13907985974998918, "learning_rate": 3.7259351397528097e-06, "loss": 0.67, "step": 3758 }, { "epoch": 0.8046881271574216, "grad_norm": 0.14506125633703043, "learning_rate": 3.7180403385164955e-06, "loss": 0.6747, "step": 3759 }, { "epoch": 0.8049021968906371, "grad_norm": 0.14047285450165206, "learning_rate": 3.710153053002952e-06, "loss": 0.6958, "step": 3760 }, { "epoch": 0.8051162666238527, "grad_norm": 0.14635988275187017, "learning_rate": 3.7022732868529444e-06, "loss": 0.708, "step": 3761 }, { "epoch": 0.8053303363570683, "grad_norm": 0.1389022472999722, "learning_rate": 3.6944010437037482e-06, "loss": 0.6785, "step": 3762 }, { "epoch": 0.8055444060902839, "grad_norm": 0.1415424297330864, "learning_rate": 3.686536327189181e-06, "loss": 0.6762, "step": 3763 }, { "epoch": 0.8057584758234995, "grad_norm": 0.1453015273944333, "learning_rate": 3.678679140939587e-06, "loss": 0.7102, "step": 3764 }, { "epoch": 0.805972545556715, "grad_norm": 0.14185952598928692, "learning_rate": 3.6708294885818196e-06, "loss": 0.6924, "step": 3765 }, { "epoch": 0.8061866152899307, "grad_norm": 0.14440778141542995, "learning_rate": 3.6629873737392727e-06, "loss": 0.6965, "step": 3766 }, { "epoch": 0.8064006850231463, "grad_norm": 0.14062813659339363, "learning_rate": 3.6551528000318447e-06, "loss": 0.6773, "step": 3767 }, { "epoch": 0.8066147547563619, "grad_norm": 0.13929165691530832, "learning_rate": 3.6473257710759647e-06, "loss": 0.6825, "step": 3768 }, { "epoch": 0.8068288244895775, "grad_norm": 0.14496243727647162, "learning_rate": 3.639506290484576e-06, "loss": 0.699, "step": 3769 }, { "epoch": 0.8070428942227931, "grad_norm": 0.1374901631490186, "learning_rate": 3.6316943618671306e-06, "loss": 0.6524, "step": 3770 }, { "epoch": 0.8072569639560087, "grad_norm": 0.14000586225946124, "learning_rate": 3.6238899888296097e-06, "loss": 0.6628, "step": 3771 }, { "epoch": 0.8074710336892242, "grad_norm": 0.13749802901242772, "learning_rate": 3.616093174974489e-06, "loss": 0.6741, "step": 3772 }, { "epoch": 0.8076851034224398, "grad_norm": 0.13592998452882274, "learning_rate": 3.6083039239007642e-06, "loss": 0.6766, "step": 3773 }, { "epoch": 0.8078991731556554, "grad_norm": 0.1422615683326355, "learning_rate": 3.6005222392039473e-06, "loss": 0.6986, "step": 3774 }, { "epoch": 0.8081132428888711, "grad_norm": 0.1440892780171938, "learning_rate": 3.5927481244760397e-06, "loss": 0.6771, "step": 3775 }, { "epoch": 0.8083273126220867, "grad_norm": 0.14308044955576857, "learning_rate": 3.584981583305569e-06, "loss": 0.7121, "step": 3776 }, { "epoch": 0.8085413823553023, "grad_norm": 0.14190129219743178, "learning_rate": 3.577222619277545e-06, "loss": 0.6787, "step": 3777 }, { "epoch": 0.8087554520885178, "grad_norm": 0.14460558707942517, "learning_rate": 3.5694712359734986e-06, "loss": 0.6994, "step": 3778 }, { "epoch": 0.8089695218217334, "grad_norm": 0.1440595861392093, "learning_rate": 3.5617274369714538e-06, "loss": 0.6963, "step": 3779 }, { "epoch": 0.809183591554949, "grad_norm": 0.14904462393643703, "learning_rate": 3.5539912258459297e-06, "loss": 0.7145, "step": 3780 }, { "epoch": 0.8093976612881646, "grad_norm": 0.14551197868259458, "learning_rate": 3.546262606167956e-06, "loss": 0.6971, "step": 3781 }, { "epoch": 0.8096117310213802, "grad_norm": 0.1421372831117601, "learning_rate": 3.538541581505037e-06, "loss": 0.6991, "step": 3782 }, { "epoch": 0.8098258007545958, "grad_norm": 0.1407928226543025, "learning_rate": 3.530828155421191e-06, "loss": 0.6928, "step": 3783 }, { "epoch": 0.8100398704878115, "grad_norm": 0.32031682034983805, "learning_rate": 3.523122331476925e-06, "loss": 0.7007, "step": 3784 }, { "epoch": 0.810253940221027, "grad_norm": 0.14805351100878913, "learning_rate": 3.5154241132292223e-06, "loss": 0.6943, "step": 3785 }, { "epoch": 0.8104680099542426, "grad_norm": 0.1417773605708263, "learning_rate": 3.507733504231581e-06, "loss": 0.6973, "step": 3786 }, { "epoch": 0.8106820796874582, "grad_norm": 0.1416352908057546, "learning_rate": 3.5000505080339565e-06, "loss": 0.6796, "step": 3787 }, { "epoch": 0.8108961494206738, "grad_norm": 0.14815806986079122, "learning_rate": 3.4923751281828187e-06, "loss": 0.6931, "step": 3788 }, { "epoch": 0.8111102191538894, "grad_norm": 0.1526928380763037, "learning_rate": 3.4847073682210984e-06, "loss": 0.7021, "step": 3789 }, { "epoch": 0.811324288887105, "grad_norm": 0.14607899098517793, "learning_rate": 3.4770472316882243e-06, "loss": 0.7053, "step": 3790 }, { "epoch": 0.8115383586203205, "grad_norm": 0.1445774289626635, "learning_rate": 3.4693947221201054e-06, "loss": 0.6879, "step": 3791 }, { "epoch": 0.8117524283535361, "grad_norm": 0.15328098506784177, "learning_rate": 3.461749843049118e-06, "loss": 0.695, "step": 3792 }, { "epoch": 0.8119664980867518, "grad_norm": 0.14365666565391966, "learning_rate": 3.4541125980041355e-06, "loss": 0.6768, "step": 3793 }, { "epoch": 0.8121805678199674, "grad_norm": 0.13844710847516453, "learning_rate": 3.4464829905104825e-06, "loss": 0.6777, "step": 3794 }, { "epoch": 0.812394637553183, "grad_norm": 0.14285951761810192, "learning_rate": 3.438861024089979e-06, "loss": 0.6714, "step": 3795 }, { "epoch": 0.8126087072863986, "grad_norm": 0.14182521629810915, "learning_rate": 3.4312467022609154e-06, "loss": 0.6774, "step": 3796 }, { "epoch": 0.8128227770196141, "grad_norm": 0.14163504457562062, "learning_rate": 3.423640028538038e-06, "loss": 0.6751, "step": 3797 }, { "epoch": 0.8130368467528297, "grad_norm": 0.14090485057485672, "learning_rate": 3.41604100643258e-06, "loss": 0.6745, "step": 3798 }, { "epoch": 0.8132509164860453, "grad_norm": 0.14091518590969906, "learning_rate": 3.4084496394522402e-06, "loss": 0.6799, "step": 3799 }, { "epoch": 0.8134649862192609, "grad_norm": 0.1458713949321835, "learning_rate": 3.4008659311011714e-06, "loss": 0.6755, "step": 3800 }, { "epoch": 0.8136790559524765, "grad_norm": 0.14484410753284657, "learning_rate": 3.39328988488e-06, "loss": 0.7068, "step": 3801 }, { "epoch": 0.8138931256856922, "grad_norm": 0.13994096185237495, "learning_rate": 3.385721504285826e-06, "loss": 0.66, "step": 3802 }, { "epoch": 0.8141071954189077, "grad_norm": 0.14371049552130213, "learning_rate": 3.378160792812184e-06, "loss": 0.7139, "step": 3803 }, { "epoch": 0.8143212651521233, "grad_norm": 0.2219343042840987, "learning_rate": 3.3706077539490933e-06, "loss": 0.6669, "step": 3804 }, { "epoch": 0.8145353348853389, "grad_norm": 0.1470722347444197, "learning_rate": 3.3630623911830274e-06, "loss": 0.7227, "step": 3805 }, { "epoch": 0.8147494046185545, "grad_norm": 0.14029202030288834, "learning_rate": 3.355524707996902e-06, "loss": 0.6925, "step": 3806 }, { "epoch": 0.8149634743517701, "grad_norm": 0.1452785248967079, "learning_rate": 3.347994707870108e-06, "loss": 0.7249, "step": 3807 }, { "epoch": 0.8151775440849857, "grad_norm": 0.14116977299861644, "learning_rate": 3.340472394278469e-06, "loss": 0.6759, "step": 3808 }, { "epoch": 0.8153916138182012, "grad_norm": 0.14288217961441463, "learning_rate": 3.332957770694276e-06, "loss": 0.7011, "step": 3809 }, { "epoch": 0.8156056835514168, "grad_norm": 0.14838926978584027, "learning_rate": 3.3254508405862706e-06, "loss": 0.6992, "step": 3810 }, { "epoch": 0.8158197532846325, "grad_norm": 0.14343148160581343, "learning_rate": 3.317951607419627e-06, "loss": 0.7141, "step": 3811 }, { "epoch": 0.8160338230178481, "grad_norm": 0.1446930044037509, "learning_rate": 3.3104600746559856e-06, "loss": 0.6775, "step": 3812 }, { "epoch": 0.8162478927510637, "grad_norm": 0.1441271187485597, "learning_rate": 3.3029762457534266e-06, "loss": 0.6914, "step": 3813 }, { "epoch": 0.8164619624842793, "grad_norm": 0.142953090623808, "learning_rate": 3.295500124166462e-06, "loss": 0.6901, "step": 3814 }, { "epoch": 0.8166760322174949, "grad_norm": 0.14546662819737374, "learning_rate": 3.2880317133460628e-06, "loss": 0.6952, "step": 3815 }, { "epoch": 0.8168901019507104, "grad_norm": 0.14942845844301347, "learning_rate": 3.2805710167396354e-06, "loss": 0.7023, "step": 3816 }, { "epoch": 0.817104171683926, "grad_norm": 0.14434334509441404, "learning_rate": 3.2731180377910167e-06, "loss": 0.6676, "step": 3817 }, { "epoch": 0.8173182414171416, "grad_norm": 0.1415830452275537, "learning_rate": 3.2656727799404962e-06, "loss": 0.6763, "step": 3818 }, { "epoch": 0.8175323111503572, "grad_norm": 0.1414645007118077, "learning_rate": 3.2582352466247835e-06, "loss": 0.7006, "step": 3819 }, { "epoch": 0.8177463808835729, "grad_norm": 0.1470960502396605, "learning_rate": 3.250805441277032e-06, "loss": 0.7412, "step": 3820 }, { "epoch": 0.8179604506167885, "grad_norm": 0.1458956073202586, "learning_rate": 3.2433833673268358e-06, "loss": 0.7096, "step": 3821 }, { "epoch": 0.818174520350004, "grad_norm": 0.14041562626277815, "learning_rate": 3.2359690282001944e-06, "loss": 0.6663, "step": 3822 }, { "epoch": 0.8183885900832196, "grad_norm": 0.1418928073863154, "learning_rate": 3.2285624273195704e-06, "loss": 0.6799, "step": 3823 }, { "epoch": 0.8186026598164352, "grad_norm": 0.14221149135132968, "learning_rate": 3.2211635681038223e-06, "loss": 0.6633, "step": 3824 }, { "epoch": 0.8188167295496508, "grad_norm": 0.1524517490005059, "learning_rate": 3.2137724539682603e-06, "loss": 0.7003, "step": 3825 }, { "epoch": 0.8190307992828664, "grad_norm": 0.14454017156920307, "learning_rate": 3.2063890883245997e-06, "loss": 0.6845, "step": 3826 }, { "epoch": 0.819244869016082, "grad_norm": 0.14481563853591464, "learning_rate": 3.1990134745809966e-06, "loss": 0.7, "step": 3827 }, { "epoch": 0.8194589387492975, "grad_norm": 0.14738696202549195, "learning_rate": 3.1916456161420207e-06, "loss": 0.7076, "step": 3828 }, { "epoch": 0.8196730084825132, "grad_norm": 0.1485838201358138, "learning_rate": 3.1842855164086563e-06, "loss": 0.7175, "step": 3829 }, { "epoch": 0.8198870782157288, "grad_norm": 0.13992076520294638, "learning_rate": 3.1769331787783186e-06, "loss": 0.696, "step": 3830 }, { "epoch": 0.8201011479489444, "grad_norm": 0.1451141799886476, "learning_rate": 3.1695886066448268e-06, "loss": 0.7044, "step": 3831 }, { "epoch": 0.82031521768216, "grad_norm": 0.1383239960460572, "learning_rate": 3.162251803398422e-06, "loss": 0.6727, "step": 3832 }, { "epoch": 0.8205292874153756, "grad_norm": 0.14559151524010955, "learning_rate": 3.15492277242577e-06, "loss": 0.7084, "step": 3833 }, { "epoch": 0.8207433571485911, "grad_norm": 0.14538113334588182, "learning_rate": 3.1476015171099237e-06, "loss": 0.6928, "step": 3834 }, { "epoch": 0.8209574268818067, "grad_norm": 0.13710778957341044, "learning_rate": 3.1402880408303727e-06, "loss": 0.6889, "step": 3835 }, { "epoch": 0.8211714966150223, "grad_norm": 0.13968201697210011, "learning_rate": 3.132982346962994e-06, "loss": 0.6919, "step": 3836 }, { "epoch": 0.8213855663482379, "grad_norm": 0.14069485094599501, "learning_rate": 3.1256844388800876e-06, "loss": 0.6817, "step": 3837 }, { "epoch": 0.8215996360814536, "grad_norm": 0.14162967945251165, "learning_rate": 3.11839431995036e-06, "loss": 0.6979, "step": 3838 }, { "epoch": 0.8218137058146692, "grad_norm": 0.24692001012059667, "learning_rate": 3.1111119935389043e-06, "loss": 0.7072, "step": 3839 }, { "epoch": 0.8220277755478848, "grad_norm": 0.13860231067556303, "learning_rate": 3.103837463007244e-06, "loss": 0.6822, "step": 3840 }, { "epoch": 0.8222418452811003, "grad_norm": 0.13930677073500938, "learning_rate": 3.0965707317132733e-06, "loss": 0.7099, "step": 3841 }, { "epoch": 0.8224559150143159, "grad_norm": 0.14020374175989106, "learning_rate": 3.0893118030113125e-06, "loss": 0.6762, "step": 3842 }, { "epoch": 0.8226699847475315, "grad_norm": 0.14603073170804046, "learning_rate": 3.0820606802520704e-06, "loss": 0.7012, "step": 3843 }, { "epoch": 0.8228840544807471, "grad_norm": 0.14466821681364184, "learning_rate": 3.074817366782645e-06, "loss": 0.6595, "step": 3844 }, { "epoch": 0.8230981242139627, "grad_norm": 0.14387890059063177, "learning_rate": 3.067581865946545e-06, "loss": 0.7005, "step": 3845 }, { "epoch": 0.8233121939471783, "grad_norm": 0.14111563332915836, "learning_rate": 3.0603541810836535e-06, "loss": 0.6766, "step": 3846 }, { "epoch": 0.8235262636803938, "grad_norm": 0.1423762382892295, "learning_rate": 3.053134315530264e-06, "loss": 0.7019, "step": 3847 }, { "epoch": 0.8237403334136095, "grad_norm": 0.14122892803872422, "learning_rate": 3.0459222726190572e-06, "loss": 0.6715, "step": 3848 }, { "epoch": 0.8239544031468251, "grad_norm": 0.14282683189266973, "learning_rate": 3.0387180556790885e-06, "loss": 0.7026, "step": 3849 }, { "epoch": 0.8241684728800407, "grad_norm": 0.1420137311560712, "learning_rate": 3.0315216680358197e-06, "loss": 0.7198, "step": 3850 }, { "epoch": 0.8243825426132563, "grad_norm": 0.13842001787256472, "learning_rate": 3.0243331130110844e-06, "loss": 0.6911, "step": 3851 }, { "epoch": 0.8245966123464719, "grad_norm": 0.14723954474387052, "learning_rate": 3.0171523939231085e-06, "loss": 0.7183, "step": 3852 }, { "epoch": 0.8248106820796874, "grad_norm": 0.14031946831048728, "learning_rate": 3.009979514086503e-06, "loss": 0.6949, "step": 3853 }, { "epoch": 0.825024751812903, "grad_norm": 0.13896601011476556, "learning_rate": 3.002814476812248e-06, "loss": 0.7005, "step": 3854 }, { "epoch": 0.8252388215461186, "grad_norm": 0.140336415584225, "learning_rate": 2.9956572854077205e-06, "loss": 0.7058, "step": 3855 }, { "epoch": 0.8254528912793342, "grad_norm": 0.14237152962164493, "learning_rate": 2.988507943176657e-06, "loss": 0.6981, "step": 3856 }, { "epoch": 0.8256669610125499, "grad_norm": 0.14474808939258405, "learning_rate": 2.981366453419188e-06, "loss": 0.6757, "step": 3857 }, { "epoch": 0.8258810307457655, "grad_norm": 0.14428721558822039, "learning_rate": 2.974232819431815e-06, "loss": 0.6803, "step": 3858 }, { "epoch": 0.826095100478981, "grad_norm": 0.14696435302289199, "learning_rate": 2.967107044507398e-06, "loss": 0.7367, "step": 3859 }, { "epoch": 0.8263091702121966, "grad_norm": 0.28037210468520757, "learning_rate": 2.959989131935197e-06, "loss": 0.693, "step": 3860 }, { "epoch": 0.8265232399454122, "grad_norm": 0.1448533722417162, "learning_rate": 2.9528790850008127e-06, "loss": 0.7079, "step": 3861 }, { "epoch": 0.8267373096786278, "grad_norm": 0.14752679242510425, "learning_rate": 2.9457769069862395e-06, "loss": 0.7179, "step": 3862 }, { "epoch": 0.8269513794118434, "grad_norm": 0.13785014223553663, "learning_rate": 2.9386826011698286e-06, "loss": 0.6684, "step": 3863 }, { "epoch": 0.827165449145059, "grad_norm": 0.1457392300989148, "learning_rate": 2.931596170826294e-06, "loss": 0.7045, "step": 3864 }, { "epoch": 0.8273795188782745, "grad_norm": 0.14128710615788895, "learning_rate": 2.9245176192267276e-06, "loss": 0.7002, "step": 3865 }, { "epoch": 0.8275935886114902, "grad_norm": 0.14166476392919206, "learning_rate": 2.9174469496385648e-06, "loss": 0.6694, "step": 3866 }, { "epoch": 0.8278076583447058, "grad_norm": 0.14053579966386573, "learning_rate": 2.9103841653256238e-06, "loss": 0.6735, "step": 3867 }, { "epoch": 0.8280217280779214, "grad_norm": 0.14390292930132934, "learning_rate": 2.903329269548063e-06, "loss": 0.6931, "step": 3868 }, { "epoch": 0.828235797811137, "grad_norm": 0.14094241244359185, "learning_rate": 2.8962822655624155e-06, "loss": 0.7051, "step": 3869 }, { "epoch": 0.8284498675443526, "grad_norm": 0.14289625621208796, "learning_rate": 2.8892431566215685e-06, "loss": 0.701, "step": 3870 }, { "epoch": 0.8286639372775682, "grad_norm": 0.14272735146508855, "learning_rate": 2.8822119459747534e-06, "loss": 0.6844, "step": 3871 }, { "epoch": 0.8288780070107837, "grad_norm": 0.1388613092953752, "learning_rate": 2.8751886368675742e-06, "loss": 0.7012, "step": 3872 }, { "epoch": 0.8290920767439993, "grad_norm": 0.13846642983613058, "learning_rate": 2.8681732325419666e-06, "loss": 0.6712, "step": 3873 }, { "epoch": 0.8293061464772149, "grad_norm": 0.14623432135979536, "learning_rate": 2.8611657362362354e-06, "loss": 0.7462, "step": 3874 }, { "epoch": 0.8295202162104306, "grad_norm": 0.14847826703160524, "learning_rate": 2.8541661511850295e-06, "loss": 0.6931, "step": 3875 }, { "epoch": 0.8297342859436462, "grad_norm": 0.14505536712986322, "learning_rate": 2.8471744806193367e-06, "loss": 0.7103, "step": 3876 }, { "epoch": 0.8299483556768618, "grad_norm": 0.13743644349343906, "learning_rate": 2.8401907277665096e-06, "loss": 0.6591, "step": 3877 }, { "epoch": 0.8301624254100773, "grad_norm": 0.18343752688834064, "learning_rate": 2.8332148958502247e-06, "loss": 0.6752, "step": 3878 }, { "epoch": 0.8303764951432929, "grad_norm": 0.17207799635199497, "learning_rate": 2.82624698809052e-06, "loss": 0.6539, "step": 3879 }, { "epoch": 0.8305905648765085, "grad_norm": 0.14449348982871577, "learning_rate": 2.819287007703773e-06, "loss": 0.6847, "step": 3880 }, { "epoch": 0.8308046346097241, "grad_norm": 0.13911071475543194, "learning_rate": 2.812334957902685e-06, "loss": 0.7, "step": 3881 }, { "epoch": 0.8310187043429397, "grad_norm": 0.1455899341813523, "learning_rate": 2.8053908418963205e-06, "loss": 0.7362, "step": 3882 }, { "epoch": 0.8312327740761553, "grad_norm": 0.13809931159925534, "learning_rate": 2.798454662890069e-06, "loss": 0.6827, "step": 3883 }, { "epoch": 0.831446843809371, "grad_norm": 0.1427053366283101, "learning_rate": 2.7915264240856554e-06, "loss": 0.6816, "step": 3884 }, { "epoch": 0.8316609135425865, "grad_norm": 0.143272006365014, "learning_rate": 2.78460612868114e-06, "loss": 0.7046, "step": 3885 }, { "epoch": 0.8318749832758021, "grad_norm": 0.14430443102194687, "learning_rate": 2.777693779870927e-06, "loss": 0.6726, "step": 3886 }, { "epoch": 0.8320890530090177, "grad_norm": 0.13592084650091263, "learning_rate": 2.7707893808457355e-06, "loss": 0.6654, "step": 3887 }, { "epoch": 0.8323031227422333, "grad_norm": 0.14487192191418882, "learning_rate": 2.7638929347926245e-06, "loss": 0.7002, "step": 3888 }, { "epoch": 0.8325171924754489, "grad_norm": 0.14223244399105567, "learning_rate": 2.7570044448949886e-06, "loss": 0.6767, "step": 3889 }, { "epoch": 0.8327312622086644, "grad_norm": 0.14475070977830992, "learning_rate": 2.750123914332532e-06, "loss": 0.6861, "step": 3890 }, { "epoch": 0.83294533194188, "grad_norm": 0.1380380931616058, "learning_rate": 2.743251346281297e-06, "loss": 0.6816, "step": 3891 }, { "epoch": 0.8331594016750956, "grad_norm": 0.1439929693576951, "learning_rate": 2.7363867439136572e-06, "loss": 0.7053, "step": 3892 }, { "epoch": 0.8333734714083113, "grad_norm": 0.14272699628805557, "learning_rate": 2.7295301103982906e-06, "loss": 0.6921, "step": 3893 }, { "epoch": 0.8335875411415269, "grad_norm": 0.14647739149487252, "learning_rate": 2.722681448900213e-06, "loss": 0.7005, "step": 3894 }, { "epoch": 0.8338016108747425, "grad_norm": 0.14308375221398376, "learning_rate": 2.715840762580748e-06, "loss": 0.6926, "step": 3895 }, { "epoch": 0.834015680607958, "grad_norm": 0.13914110296548118, "learning_rate": 2.709008054597546e-06, "loss": 0.6864, "step": 3896 }, { "epoch": 0.8342297503411736, "grad_norm": 0.14306722460179636, "learning_rate": 2.7021833281045796e-06, "loss": 0.6922, "step": 3897 }, { "epoch": 0.8344438200743892, "grad_norm": 0.14054438178321524, "learning_rate": 2.6953665862521174e-06, "loss": 0.7107, "step": 3898 }, { "epoch": 0.8346578898076048, "grad_norm": 0.14759096236273558, "learning_rate": 2.688557832186762e-06, "loss": 0.6842, "step": 3899 }, { "epoch": 0.8348719595408204, "grad_norm": 0.1413651806438682, "learning_rate": 2.681757069051427e-06, "loss": 0.6935, "step": 3900 }, { "epoch": 0.835086029274036, "grad_norm": 0.14037485380233253, "learning_rate": 2.674964299985321e-06, "loss": 0.6776, "step": 3901 }, { "epoch": 0.8353000990072517, "grad_norm": 0.14262034762857348, "learning_rate": 2.6681795281239866e-06, "loss": 0.6992, "step": 3902 }, { "epoch": 0.8355141687404672, "grad_norm": 0.14717500778750456, "learning_rate": 2.6614027565992473e-06, "loss": 0.6866, "step": 3903 }, { "epoch": 0.8357282384736828, "grad_norm": 0.13603128330850944, "learning_rate": 2.6546339885392568e-06, "loss": 0.6782, "step": 3904 }, { "epoch": 0.8359423082068984, "grad_norm": 0.14291650916023088, "learning_rate": 2.647873227068469e-06, "loss": 0.6913, "step": 3905 }, { "epoch": 0.836156377940114, "grad_norm": 0.13719684474729255, "learning_rate": 2.6411204753076325e-06, "loss": 0.6948, "step": 3906 }, { "epoch": 0.8363704476733296, "grad_norm": 0.14062577396152376, "learning_rate": 2.634375736373811e-06, "loss": 0.6841, "step": 3907 }, { "epoch": 0.8365845174065452, "grad_norm": 0.14660486128751915, "learning_rate": 2.6276390133803585e-06, "loss": 0.7241, "step": 3908 }, { "epoch": 0.8367985871397607, "grad_norm": 0.14093723627470145, "learning_rate": 2.620910309436937e-06, "loss": 0.6936, "step": 3909 }, { "epoch": 0.8370126568729763, "grad_norm": 0.14304147247852791, "learning_rate": 2.6141896276495015e-06, "loss": 0.7202, "step": 3910 }, { "epoch": 0.837226726606192, "grad_norm": 0.13957784476811416, "learning_rate": 2.6074769711203062e-06, "loss": 0.7017, "step": 3911 }, { "epoch": 0.8374407963394076, "grad_norm": 0.1443129027083466, "learning_rate": 2.600772342947908e-06, "loss": 0.7173, "step": 3912 }, { "epoch": 0.8376548660726232, "grad_norm": 0.14499795810455413, "learning_rate": 2.5940757462271405e-06, "loss": 0.6996, "step": 3913 }, { "epoch": 0.8378689358058388, "grad_norm": 0.13608416467777257, "learning_rate": 2.5873871840491504e-06, "loss": 0.6648, "step": 3914 }, { "epoch": 0.8380830055390543, "grad_norm": 0.14263848842725366, "learning_rate": 2.5807066595013574e-06, "loss": 0.7054, "step": 3915 }, { "epoch": 0.8382970752722699, "grad_norm": 0.1390153659503591, "learning_rate": 2.5740341756674813e-06, "loss": 0.6989, "step": 3916 }, { "epoch": 0.8385111450054855, "grad_norm": 0.14440777083763068, "learning_rate": 2.5673697356275364e-06, "loss": 0.7063, "step": 3917 }, { "epoch": 0.8387252147387011, "grad_norm": 0.1370223863651042, "learning_rate": 2.560713342457806e-06, "loss": 0.6645, "step": 3918 }, { "epoch": 0.8389392844719167, "grad_norm": 0.14239714949739304, "learning_rate": 2.554064999230876e-06, "loss": 0.6911, "step": 3919 }, { "epoch": 0.8391533542051324, "grad_norm": 0.13658205241746624, "learning_rate": 2.5474247090156025e-06, "loss": 0.6618, "step": 3920 }, { "epoch": 0.839367423938348, "grad_norm": 0.1414954085156795, "learning_rate": 2.540792474877134e-06, "loss": 0.6789, "step": 3921 }, { "epoch": 0.8395814936715635, "grad_norm": 0.13893306653794818, "learning_rate": 2.5341682998769045e-06, "loss": 0.6944, "step": 3922 }, { "epoch": 0.8397955634047791, "grad_norm": 0.14402246315833805, "learning_rate": 2.5275521870726107e-06, "loss": 0.7252, "step": 3923 }, { "epoch": 0.8400096331379947, "grad_norm": 0.13976737539025286, "learning_rate": 2.5209441395182444e-06, "loss": 0.6739, "step": 3924 }, { "epoch": 0.8402237028712103, "grad_norm": 0.13960447481763935, "learning_rate": 2.5143441602640662e-06, "loss": 0.6841, "step": 3925 }, { "epoch": 0.8404377726044259, "grad_norm": 0.13697710272548283, "learning_rate": 2.5077522523566123e-06, "loss": 0.6965, "step": 3926 }, { "epoch": 0.8406518423376415, "grad_norm": 0.1474640548603916, "learning_rate": 2.5011684188387044e-06, "loss": 0.6872, "step": 3927 }, { "epoch": 0.840865912070857, "grad_norm": 0.1406357364898473, "learning_rate": 2.4945926627494154e-06, "loss": 0.7, "step": 3928 }, { "epoch": 0.8410799818040727, "grad_norm": 0.1406116770497962, "learning_rate": 2.4880249871241135e-06, "loss": 0.6694, "step": 3929 }, { "epoch": 0.8412940515372883, "grad_norm": 0.16032841037283024, "learning_rate": 2.4814653949944157e-06, "loss": 0.7324, "step": 3930 }, { "epoch": 0.8415081212705039, "grad_norm": 0.13927806938891313, "learning_rate": 2.474913889388222e-06, "loss": 0.7026, "step": 3931 }, { "epoch": 0.8417221910037195, "grad_norm": 0.14112975682958137, "learning_rate": 2.468370473329702e-06, "loss": 0.6777, "step": 3932 }, { "epoch": 0.8419362607369351, "grad_norm": 0.13985389682414093, "learning_rate": 2.4618351498392735e-06, "loss": 0.6811, "step": 3933 }, { "epoch": 0.8421503304701506, "grad_norm": 0.1354473401959006, "learning_rate": 2.4553079219336385e-06, "loss": 0.6678, "step": 3934 }, { "epoch": 0.8423644002033662, "grad_norm": 0.13276337595163643, "learning_rate": 2.448788792625747e-06, "loss": 0.6616, "step": 3935 }, { "epoch": 0.8425784699365818, "grad_norm": 0.1401646446893855, "learning_rate": 2.4422777649248186e-06, "loss": 0.685, "step": 3936 }, { "epoch": 0.8427925396697974, "grad_norm": 0.1422983340353118, "learning_rate": 2.435774841836338e-06, "loss": 0.6645, "step": 3937 }, { "epoch": 0.8430066094030131, "grad_norm": 0.14091992521337854, "learning_rate": 2.4292800263620354e-06, "loss": 0.6835, "step": 3938 }, { "epoch": 0.8432206791362287, "grad_norm": 0.155299653523915, "learning_rate": 2.42279332149991e-06, "loss": 0.7074, "step": 3939 }, { "epoch": 0.8434347488694443, "grad_norm": 0.14304814131572857, "learning_rate": 2.416314730244207e-06, "loss": 0.6992, "step": 3940 }, { "epoch": 0.8436488186026598, "grad_norm": 0.138437758159917, "learning_rate": 2.4098442555854386e-06, "loss": 0.6718, "step": 3941 }, { "epoch": 0.8438628883358754, "grad_norm": 0.1437415045430234, "learning_rate": 2.403381900510364e-06, "loss": 0.6888, "step": 3942 }, { "epoch": 0.844076958069091, "grad_norm": 0.14302757384348427, "learning_rate": 2.396927668001987e-06, "loss": 0.6965, "step": 3943 }, { "epoch": 0.8442910278023066, "grad_norm": 0.14530139265461148, "learning_rate": 2.3904815610395816e-06, "loss": 0.6862, "step": 3944 }, { "epoch": 0.8445050975355222, "grad_norm": 0.16416906748599933, "learning_rate": 2.384043582598645e-06, "loss": 0.6675, "step": 3945 }, { "epoch": 0.8447191672687377, "grad_norm": 0.14360171170288485, "learning_rate": 2.3776137356509455e-06, "loss": 0.6786, "step": 3946 }, { "epoch": 0.8449332370019534, "grad_norm": 0.21768664667533322, "learning_rate": 2.3711920231644902e-06, "loss": 0.6778, "step": 3947 }, { "epoch": 0.845147306735169, "grad_norm": 0.13587049456150121, "learning_rate": 2.364778448103524e-06, "loss": 0.6682, "step": 3948 }, { "epoch": 0.8453613764683846, "grad_norm": 0.1422460530792501, "learning_rate": 2.3583730134285453e-06, "loss": 0.6773, "step": 3949 }, { "epoch": 0.8455754462016002, "grad_norm": 0.1397936301287952, "learning_rate": 2.3519757220962847e-06, "loss": 0.6886, "step": 3950 }, { "epoch": 0.8457895159348158, "grad_norm": 0.1398299869684245, "learning_rate": 2.345586577059731e-06, "loss": 0.6643, "step": 3951 }, { "epoch": 0.8460035856680314, "grad_norm": 0.139695615788581, "learning_rate": 2.339205581268089e-06, "loss": 0.6986, "step": 3952 }, { "epoch": 0.8462176554012469, "grad_norm": 0.14577144515660195, "learning_rate": 2.3328327376668237e-06, "loss": 0.6841, "step": 3953 }, { "epoch": 0.8464317251344625, "grad_norm": 0.1384862785022817, "learning_rate": 2.32646804919763e-06, "loss": 0.6623, "step": 3954 }, { "epoch": 0.8466457948676781, "grad_norm": 0.14203223741428794, "learning_rate": 2.320111518798427e-06, "loss": 0.6709, "step": 3955 }, { "epoch": 0.8468598646008937, "grad_norm": 0.1378325964817421, "learning_rate": 2.3137631494033853e-06, "loss": 0.7027, "step": 3956 }, { "epoch": 0.8470739343341094, "grad_norm": 0.13838009661720402, "learning_rate": 2.3074229439428964e-06, "loss": 0.6772, "step": 3957 }, { "epoch": 0.847288004067325, "grad_norm": 0.14114303651345592, "learning_rate": 2.301090905343586e-06, "loss": 0.7014, "step": 3958 }, { "epoch": 0.8475020738005405, "grad_norm": 0.1398378489868303, "learning_rate": 2.29476703652832e-06, "loss": 0.6861, "step": 3959 }, { "epoch": 0.8477161435337561, "grad_norm": 0.16828877180724813, "learning_rate": 2.288451340416178e-06, "loss": 0.6852, "step": 3960 }, { "epoch": 0.8479302132669717, "grad_norm": 0.1415004020669396, "learning_rate": 2.2821438199224756e-06, "loss": 0.6754, "step": 3961 }, { "epoch": 0.8481442830001873, "grad_norm": 0.13877695917097002, "learning_rate": 2.2758444779587487e-06, "loss": 0.6752, "step": 3962 }, { "epoch": 0.8483583527334029, "grad_norm": 0.14433666441866894, "learning_rate": 2.2695533174327667e-06, "loss": 0.7113, "step": 3963 }, { "epoch": 0.8485724224666185, "grad_norm": 0.14307448115144958, "learning_rate": 2.263270341248518e-06, "loss": 0.6886, "step": 3964 }, { "epoch": 0.848786492199834, "grad_norm": 0.13571935435252858, "learning_rate": 2.2569955523062093e-06, "loss": 0.6711, "step": 3965 }, { "epoch": 0.8490005619330497, "grad_norm": 0.13748358307770517, "learning_rate": 2.2507289535022747e-06, "loss": 0.6417, "step": 3966 }, { "epoch": 0.8492146316662653, "grad_norm": 0.13798070078416347, "learning_rate": 2.244470547729365e-06, "loss": 0.6861, "step": 3967 }, { "epoch": 0.8494287013994809, "grad_norm": 0.14618650486624518, "learning_rate": 2.2382203378763466e-06, "loss": 0.6687, "step": 3968 }, { "epoch": 0.8496427711326965, "grad_norm": 0.13862003490689262, "learning_rate": 2.2319783268283037e-06, "loss": 0.6556, "step": 3969 }, { "epoch": 0.8498568408659121, "grad_norm": 0.18601606585048341, "learning_rate": 2.225744517466546e-06, "loss": 0.7012, "step": 3970 }, { "epoch": 0.8500709105991276, "grad_norm": 0.13825824549501115, "learning_rate": 2.2195189126685746e-06, "loss": 0.6855, "step": 3971 }, { "epoch": 0.8502849803323432, "grad_norm": 0.2125788653793348, "learning_rate": 2.2133015153081283e-06, "loss": 0.6751, "step": 3972 }, { "epoch": 0.8504990500655588, "grad_norm": 0.14006030798093297, "learning_rate": 2.2070923282551447e-06, "loss": 0.686, "step": 3973 }, { "epoch": 0.8507131197987744, "grad_norm": 0.14281909317579342, "learning_rate": 2.2008913543757673e-06, "loss": 0.6904, "step": 3974 }, { "epoch": 0.8509271895319901, "grad_norm": 0.14249985452681854, "learning_rate": 2.1946985965323584e-06, "loss": 0.6949, "step": 3975 }, { "epoch": 0.8511412592652057, "grad_norm": 0.14261949527007434, "learning_rate": 2.1885140575834862e-06, "loss": 0.701, "step": 3976 }, { "epoch": 0.8513553289984213, "grad_norm": 0.1353749900762074, "learning_rate": 2.1823377403839176e-06, "loss": 0.6786, "step": 3977 }, { "epoch": 0.8515693987316368, "grad_norm": 0.14062008652826552, "learning_rate": 2.1761696477846296e-06, "loss": 0.6875, "step": 3978 }, { "epoch": 0.8517834684648524, "grad_norm": 0.14372691293050957, "learning_rate": 2.1700097826328116e-06, "loss": 0.7095, "step": 3979 }, { "epoch": 0.851997538198068, "grad_norm": 0.14027054705726305, "learning_rate": 2.1638581477718313e-06, "loss": 0.6967, "step": 3980 }, { "epoch": 0.8522116079312836, "grad_norm": 0.1438960621108349, "learning_rate": 2.157714746041286e-06, "loss": 0.6933, "step": 3981 }, { "epoch": 0.8524256776644992, "grad_norm": 0.1368611912657314, "learning_rate": 2.151579580276948e-06, "loss": 0.6867, "step": 3982 }, { "epoch": 0.8526397473977148, "grad_norm": 0.143154276663767, "learning_rate": 2.1454526533108024e-06, "loss": 0.6693, "step": 3983 }, { "epoch": 0.8528538171309304, "grad_norm": 0.14045446781032014, "learning_rate": 2.139333967971031e-06, "loss": 0.7029, "step": 3984 }, { "epoch": 0.853067886864146, "grad_norm": 0.140086411666049, "learning_rate": 2.133223527082002e-06, "loss": 0.6762, "step": 3985 }, { "epoch": 0.8532819565973616, "grad_norm": 0.1418318463075373, "learning_rate": 2.1271213334642902e-06, "loss": 0.7049, "step": 3986 }, { "epoch": 0.8534960263305772, "grad_norm": 0.13948421996434165, "learning_rate": 2.121027389934649e-06, "loss": 0.6896, "step": 3987 }, { "epoch": 0.8537100960637928, "grad_norm": 0.13862761525730624, "learning_rate": 2.114941699306037e-06, "loss": 0.7058, "step": 3988 }, { "epoch": 0.8539241657970084, "grad_norm": 0.13840602652849981, "learning_rate": 2.108864264387598e-06, "loss": 0.6903, "step": 3989 }, { "epoch": 0.8541382355302239, "grad_norm": 0.14001467256463854, "learning_rate": 2.1027950879846615e-06, "loss": 0.7052, "step": 3990 }, { "epoch": 0.8543523052634395, "grad_norm": 0.13582098398945472, "learning_rate": 2.0967341728987554e-06, "loss": 0.6872, "step": 3991 }, { "epoch": 0.8545663749966551, "grad_norm": 0.13998492598632672, "learning_rate": 2.0906815219275756e-06, "loss": 0.6864, "step": 3992 }, { "epoch": 0.8547804447298708, "grad_norm": 0.1409050434646671, "learning_rate": 2.0846371378650267e-06, "loss": 0.6602, "step": 3993 }, { "epoch": 0.8549945144630864, "grad_norm": 0.14229180526897744, "learning_rate": 2.0786010235011745e-06, "loss": 0.6781, "step": 3994 }, { "epoch": 0.855208584196302, "grad_norm": 0.14197897212796198, "learning_rate": 2.0725731816222836e-06, "loss": 0.717, "step": 3995 }, { "epoch": 0.8554226539295176, "grad_norm": 0.13879021687546958, "learning_rate": 2.0665536150108e-06, "loss": 0.6677, "step": 3996 }, { "epoch": 0.8556367236627331, "grad_norm": 0.15826009454899004, "learning_rate": 2.060542326445334e-06, "loss": 0.6991, "step": 3997 }, { "epoch": 0.8558507933959487, "grad_norm": 0.13594920436812258, "learning_rate": 2.0545393187006945e-06, "loss": 0.676, "step": 3998 }, { "epoch": 0.8560648631291643, "grad_norm": 0.1410472132816311, "learning_rate": 2.04854459454785e-06, "loss": 0.6826, "step": 3999 }, { "epoch": 0.8562789328623799, "grad_norm": 0.13966756865622357, "learning_rate": 2.0425581567539597e-06, "loss": 0.6944, "step": 4000 }, { "epoch": 0.8564930025955955, "grad_norm": 0.1405657923960637, "learning_rate": 2.0365800080823583e-06, "loss": 0.6926, "step": 4001 }, { "epoch": 0.8567070723288112, "grad_norm": 0.13870469508337874, "learning_rate": 2.0306101512925357e-06, "loss": 0.6523, "step": 4002 }, { "epoch": 0.8569211420620267, "grad_norm": 0.1373519683059727, "learning_rate": 2.0246485891401768e-06, "loss": 0.7187, "step": 4003 }, { "epoch": 0.8571352117952423, "grad_norm": 0.14028813574062063, "learning_rate": 2.01869532437712e-06, "loss": 0.6978, "step": 4004 }, { "epoch": 0.8573492815284579, "grad_norm": 0.13781274742678487, "learning_rate": 2.0127503597513877e-06, "loss": 0.7051, "step": 4005 }, { "epoch": 0.8575633512616735, "grad_norm": 0.13973132085958173, "learning_rate": 2.006813698007164e-06, "loss": 0.6982, "step": 4006 }, { "epoch": 0.8577774209948891, "grad_norm": 0.14016676711135564, "learning_rate": 2.0008853418847952e-06, "loss": 0.6933, "step": 4007 }, { "epoch": 0.8579914907281047, "grad_norm": 0.1461285120595108, "learning_rate": 1.99496529412081e-06, "loss": 0.7013, "step": 4008 }, { "epoch": 0.8582055604613202, "grad_norm": 0.13781601790971343, "learning_rate": 1.98905355744788e-06, "loss": 0.6718, "step": 4009 }, { "epoch": 0.8584196301945358, "grad_norm": 0.1375702362955899, "learning_rate": 1.9831501345948578e-06, "loss": 0.6823, "step": 4010 }, { "epoch": 0.8586336999277515, "grad_norm": 0.14192092140244358, "learning_rate": 1.9772550282867554e-06, "loss": 0.6916, "step": 4011 }, { "epoch": 0.8588477696609671, "grad_norm": 0.1423477032392777, "learning_rate": 1.9713682412447377e-06, "loss": 0.6693, "step": 4012 }, { "epoch": 0.8590618393941827, "grad_norm": 0.14418711694126216, "learning_rate": 1.9654897761861404e-06, "loss": 0.7048, "step": 4013 }, { "epoch": 0.8592759091273983, "grad_norm": 0.13848213193437015, "learning_rate": 1.9596196358244434e-06, "loss": 0.6694, "step": 4014 }, { "epoch": 0.8594899788606138, "grad_norm": 0.13818260095241675, "learning_rate": 1.9537578228693e-06, "loss": 0.6819, "step": 4015 }, { "epoch": 0.8597040485938294, "grad_norm": 0.14230002086414403, "learning_rate": 1.947904340026514e-06, "loss": 0.6929, "step": 4016 }, { "epoch": 0.859918118327045, "grad_norm": 0.1362836947439643, "learning_rate": 1.9420591899980357e-06, "loss": 0.6675, "step": 4017 }, { "epoch": 0.8601321880602606, "grad_norm": 0.1342696395775042, "learning_rate": 1.936222375481982e-06, "loss": 0.6619, "step": 4018 }, { "epoch": 0.8603462577934762, "grad_norm": 0.1392639990911854, "learning_rate": 1.930393899172611e-06, "loss": 0.6682, "step": 4019 }, { "epoch": 0.8605603275266919, "grad_norm": 0.14557138905120687, "learning_rate": 1.9245737637603357e-06, "loss": 0.6903, "step": 4020 }, { "epoch": 0.8607743972599075, "grad_norm": 0.13880560912456477, "learning_rate": 1.9187619719317286e-06, "loss": 0.6616, "step": 4021 }, { "epoch": 0.860988466993123, "grad_norm": 0.13649539154107124, "learning_rate": 1.9129585263694904e-06, "loss": 0.6835, "step": 4022 }, { "epoch": 0.8612025367263386, "grad_norm": 0.1411681513042414, "learning_rate": 1.9071634297524921e-06, "loss": 0.7097, "step": 4023 }, { "epoch": 0.8614166064595542, "grad_norm": 0.2129660008473166, "learning_rate": 1.9013766847557292e-06, "loss": 0.6706, "step": 4024 }, { "epoch": 0.8616306761927698, "grad_norm": 0.13977040417070688, "learning_rate": 1.895598294050358e-06, "loss": 0.6828, "step": 4025 }, { "epoch": 0.8618447459259854, "grad_norm": 0.1418421415881163, "learning_rate": 1.8898282603036788e-06, "loss": 0.7129, "step": 4026 }, { "epoch": 0.862058815659201, "grad_norm": 0.1380293165353464, "learning_rate": 1.8840665861791164e-06, "loss": 0.6716, "step": 4027 }, { "epoch": 0.8622728853924165, "grad_norm": 1.1070614596771704, "learning_rate": 1.8783132743362608e-06, "loss": 0.7131, "step": 4028 }, { "epoch": 0.8624869551256322, "grad_norm": 0.13706533146576322, "learning_rate": 1.8725683274308192e-06, "loss": 0.6791, "step": 4029 }, { "epoch": 0.8627010248588478, "grad_norm": 0.13937686157761398, "learning_rate": 1.8668317481146546e-06, "loss": 0.6935, "step": 4030 }, { "epoch": 0.8629150945920634, "grad_norm": 0.1407002418282179, "learning_rate": 1.8611035390357667e-06, "loss": 0.6827, "step": 4031 }, { "epoch": 0.863129164325279, "grad_norm": 0.1395748879602978, "learning_rate": 1.8553837028382738e-06, "loss": 0.6962, "step": 4032 }, { "epoch": 0.8633432340584946, "grad_norm": 0.14068603560572177, "learning_rate": 1.8496722421624547e-06, "loss": 0.6925, "step": 4033 }, { "epoch": 0.8635573037917101, "grad_norm": 0.13945254285115313, "learning_rate": 1.8439691596446985e-06, "loss": 0.679, "step": 4034 }, { "epoch": 0.8637713735249257, "grad_norm": 0.13515587483722066, "learning_rate": 1.838274457917546e-06, "loss": 0.6769, "step": 4035 }, { "epoch": 0.8639854432581413, "grad_norm": 0.14005354445109036, "learning_rate": 1.8325881396096546e-06, "loss": 0.7014, "step": 4036 }, { "epoch": 0.8641995129913569, "grad_norm": 0.3089511311556019, "learning_rate": 1.82691020734582e-06, "loss": 0.6876, "step": 4037 }, { "epoch": 0.8644135827245726, "grad_norm": 0.1394864161854626, "learning_rate": 1.8212406637469704e-06, "loss": 0.689, "step": 4038 }, { "epoch": 0.8646276524577882, "grad_norm": 0.1359203093816971, "learning_rate": 1.81557951143015e-06, "loss": 0.6623, "step": 4039 }, { "epoch": 0.8648417221910037, "grad_norm": 0.13586191488478291, "learning_rate": 1.8099267530085419e-06, "loss": 0.6786, "step": 4040 }, { "epoch": 0.8650557919242193, "grad_norm": 0.1379347374636731, "learning_rate": 1.8042823910914431e-06, "loss": 0.6899, "step": 4041 }, { "epoch": 0.8652698616574349, "grad_norm": 0.1427146555224502, "learning_rate": 1.798646428284283e-06, "loss": 0.7209, "step": 4042 }, { "epoch": 0.8654839313906505, "grad_norm": 0.14033994833419458, "learning_rate": 1.7930188671886183e-06, "loss": 0.7096, "step": 4043 }, { "epoch": 0.8656980011238661, "grad_norm": 0.5308944134458937, "learning_rate": 1.7873997104021111e-06, "loss": 0.6957, "step": 4044 }, { "epoch": 0.8659120708570817, "grad_norm": 0.14445543716033282, "learning_rate": 1.7817889605185557e-06, "loss": 0.7236, "step": 4045 }, { "epoch": 0.8661261405902972, "grad_norm": 0.14008048343161253, "learning_rate": 1.7761866201278732e-06, "loss": 0.7184, "step": 4046 }, { "epoch": 0.8663402103235129, "grad_norm": 0.1428713195925786, "learning_rate": 1.770592691816082e-06, "loss": 0.7001, "step": 4047 }, { "epoch": 0.8665542800567285, "grad_norm": 0.13973365555288117, "learning_rate": 1.7650071781653343e-06, "loss": 0.7003, "step": 4048 }, { "epoch": 0.8667683497899441, "grad_norm": 0.14335127799622926, "learning_rate": 1.7594300817538945e-06, "loss": 0.6965, "step": 4049 }, { "epoch": 0.8669824195231597, "grad_norm": 0.13719063963547518, "learning_rate": 1.7538614051561365e-06, "loss": 0.6943, "step": 4050 }, { "epoch": 0.8671964892563753, "grad_norm": 0.14118989738150972, "learning_rate": 1.7483011509425573e-06, "loss": 0.7035, "step": 4051 }, { "epoch": 0.8674105589895909, "grad_norm": 0.14086567309607934, "learning_rate": 1.7427493216797509e-06, "loss": 0.6658, "step": 4052 }, { "epoch": 0.8676246287228064, "grad_norm": 0.14059478051894736, "learning_rate": 1.7372059199304359e-06, "loss": 0.6818, "step": 4053 }, { "epoch": 0.867838698456022, "grad_norm": 0.1489502535911441, "learning_rate": 1.731670948253441e-06, "loss": 0.7071, "step": 4054 }, { "epoch": 0.8680527681892376, "grad_norm": 0.14474702569714257, "learning_rate": 1.7261444092036917e-06, "loss": 0.6896, "step": 4055 }, { "epoch": 0.8682668379224533, "grad_norm": 0.13988495404158932, "learning_rate": 1.7206263053322314e-06, "loss": 0.691, "step": 4056 }, { "epoch": 0.8684809076556689, "grad_norm": 0.14392092840912138, "learning_rate": 1.7151166391862096e-06, "loss": 0.6893, "step": 4057 }, { "epoch": 0.8686949773888845, "grad_norm": 0.13850915238121347, "learning_rate": 1.7096154133088738e-06, "loss": 0.6737, "step": 4058 }, { "epoch": 0.8689090471221, "grad_norm": 0.13610006332881708, "learning_rate": 1.7041226302395797e-06, "loss": 0.684, "step": 4059 }, { "epoch": 0.8691231168553156, "grad_norm": 0.13887628728275586, "learning_rate": 1.69863829251379e-06, "loss": 0.6932, "step": 4060 }, { "epoch": 0.8693371865885312, "grad_norm": 0.136527775146042, "learning_rate": 1.6931624026630622e-06, "loss": 0.6585, "step": 4061 }, { "epoch": 0.8695512563217468, "grad_norm": 0.13775423345621057, "learning_rate": 1.687694963215054e-06, "loss": 0.7006, "step": 4062 }, { "epoch": 0.8697653260549624, "grad_norm": 0.1434875909523652, "learning_rate": 1.6822359766935337e-06, "loss": 0.6996, "step": 4063 }, { "epoch": 0.869979395788178, "grad_norm": 0.13874008611678676, "learning_rate": 1.6767854456183519e-06, "loss": 0.6661, "step": 4064 }, { "epoch": 0.8701934655213935, "grad_norm": 0.1392466463595333, "learning_rate": 1.6713433725054694e-06, "loss": 0.6846, "step": 4065 }, { "epoch": 0.8704075352546092, "grad_norm": 0.1463817588381614, "learning_rate": 1.6659097598669305e-06, "loss": 0.6963, "step": 4066 }, { "epoch": 0.8706216049878248, "grad_norm": 0.14190399348610305, "learning_rate": 1.660484610210884e-06, "loss": 0.7038, "step": 4067 }, { "epoch": 0.8708356747210404, "grad_norm": 0.1417111676401168, "learning_rate": 1.6550679260415736e-06, "loss": 0.7028, "step": 4068 }, { "epoch": 0.871049744454256, "grad_norm": 0.13496103825030722, "learning_rate": 1.6496597098593237e-06, "loss": 0.6607, "step": 4069 }, { "epoch": 0.8712638141874716, "grad_norm": 0.1385552010527188, "learning_rate": 1.6442599641605639e-06, "loss": 0.7213, "step": 4070 }, { "epoch": 0.8714778839206871, "grad_norm": 0.14098537014948517, "learning_rate": 1.6388686914377982e-06, "loss": 0.663, "step": 4071 }, { "epoch": 0.8716919536539027, "grad_norm": 0.13869759909861296, "learning_rate": 1.6334858941796339e-06, "loss": 0.6673, "step": 4072 }, { "epoch": 0.8719060233871183, "grad_norm": 0.14474368399396312, "learning_rate": 1.6281115748707632e-06, "loss": 0.6968, "step": 4073 }, { "epoch": 0.8721200931203339, "grad_norm": 0.13961803789818047, "learning_rate": 1.6227457359919551e-06, "loss": 0.6931, "step": 4074 }, { "epoch": 0.8723341628535496, "grad_norm": 0.14183495274306632, "learning_rate": 1.6173883800200774e-06, "loss": 0.7127, "step": 4075 }, { "epoch": 0.8725482325867652, "grad_norm": 0.14046329761262472, "learning_rate": 1.6120395094280693e-06, "loss": 0.6904, "step": 4076 }, { "epoch": 0.8727623023199808, "grad_norm": 0.1434999428386889, "learning_rate": 1.6066991266849674e-06, "loss": 0.677, "step": 4077 }, { "epoch": 0.8729763720531963, "grad_norm": 0.1370578797007404, "learning_rate": 1.601367234255875e-06, "loss": 0.6887, "step": 4078 }, { "epoch": 0.8731904417864119, "grad_norm": 0.14041343652782823, "learning_rate": 1.5960438346019857e-06, "loss": 0.7075, "step": 4079 }, { "epoch": 0.8734045115196275, "grad_norm": 0.13712033347839142, "learning_rate": 1.5907289301805783e-06, "loss": 0.7176, "step": 4080 }, { "epoch": 0.8736185812528431, "grad_norm": 0.13778323332346226, "learning_rate": 1.5854225234449927e-06, "loss": 0.6911, "step": 4081 }, { "epoch": 0.8738326509860587, "grad_norm": 0.18035510722500825, "learning_rate": 1.5801246168446626e-06, "loss": 0.6808, "step": 4082 }, { "epoch": 0.8740467207192743, "grad_norm": 0.1388176427967325, "learning_rate": 1.57483521282509e-06, "loss": 0.6797, "step": 4083 }, { "epoch": 0.8742607904524899, "grad_norm": 0.14253161598562444, "learning_rate": 1.5695543138278525e-06, "loss": 0.7061, "step": 4084 }, { "epoch": 0.8744748601857055, "grad_norm": 0.1404884643300318, "learning_rate": 1.5642819222906092e-06, "loss": 0.6908, "step": 4085 }, { "epoch": 0.8746889299189211, "grad_norm": 0.13829510031856715, "learning_rate": 1.55901804064708e-06, "loss": 0.6763, "step": 4086 }, { "epoch": 0.8749029996521367, "grad_norm": 0.13772618907491851, "learning_rate": 1.553762671327068e-06, "loss": 0.6728, "step": 4087 }, { "epoch": 0.8751170693853523, "grad_norm": 0.13122916552432304, "learning_rate": 1.5485158167564373e-06, "loss": 0.6588, "step": 4088 }, { "epoch": 0.8753311391185679, "grad_norm": 0.13879769879567272, "learning_rate": 1.5432774793571282e-06, "loss": 0.709, "step": 4089 }, { "epoch": 0.8755452088517834, "grad_norm": 0.1372467912384837, "learning_rate": 1.538047661547153e-06, "loss": 0.6692, "step": 4090 }, { "epoch": 0.875759278584999, "grad_norm": 0.14808122985403258, "learning_rate": 1.5328263657405761e-06, "loss": 0.7073, "step": 4091 }, { "epoch": 0.8759733483182146, "grad_norm": 0.4852315448503471, "learning_rate": 1.527613594347548e-06, "loss": 0.6736, "step": 4092 }, { "epoch": 0.8761874180514303, "grad_norm": 0.13678621068358662, "learning_rate": 1.5224093497742654e-06, "loss": 0.6904, "step": 4093 }, { "epoch": 0.8764014877846459, "grad_norm": 0.13625031090508333, "learning_rate": 1.5172136344230027e-06, "loss": 0.6743, "step": 4094 }, { "epoch": 0.8766155575178615, "grad_norm": 0.13679272543310853, "learning_rate": 1.5120264506920968e-06, "loss": 0.6595, "step": 4095 }, { "epoch": 0.876829627251077, "grad_norm": 0.14166687897944127, "learning_rate": 1.5068478009759324e-06, "loss": 0.6986, "step": 4096 }, { "epoch": 0.8770436969842926, "grad_norm": 0.14201800916806356, "learning_rate": 1.5016776876649753e-06, "loss": 0.6796, "step": 4097 }, { "epoch": 0.8772577667175082, "grad_norm": 0.13590090128662613, "learning_rate": 1.4965161131457296e-06, "loss": 0.6799, "step": 4098 }, { "epoch": 0.8774718364507238, "grad_norm": 0.1349365279421223, "learning_rate": 1.491363079800776e-06, "loss": 0.6893, "step": 4099 }, { "epoch": 0.8776859061839394, "grad_norm": 0.1350254244677767, "learning_rate": 1.4862185900087456e-06, "loss": 0.6881, "step": 4100 }, { "epoch": 0.877899975917155, "grad_norm": 0.14145139974077467, "learning_rate": 1.4810826461443184e-06, "loss": 0.7005, "step": 4101 }, { "epoch": 0.8781140456503707, "grad_norm": 0.140456229812318, "learning_rate": 1.475955250578247e-06, "loss": 0.726, "step": 4102 }, { "epoch": 0.8783281153835862, "grad_norm": 0.13748759121736628, "learning_rate": 1.4708364056773182e-06, "loss": 0.68, "step": 4103 }, { "epoch": 0.8785421851168018, "grad_norm": 0.13777697754413418, "learning_rate": 1.4657261138043865e-06, "loss": 0.658, "step": 4104 }, { "epoch": 0.8787562548500174, "grad_norm": 0.13468060059659814, "learning_rate": 1.460624377318356e-06, "loss": 0.667, "step": 4105 }, { "epoch": 0.878970324583233, "grad_norm": 0.14844673351629187, "learning_rate": 1.4555311985741716e-06, "loss": 0.7201, "step": 4106 }, { "epoch": 0.8791843943164486, "grad_norm": 0.13935277258856124, "learning_rate": 1.4504465799228396e-06, "loss": 0.7081, "step": 4107 }, { "epoch": 0.8793984640496642, "grad_norm": 0.1450966754147359, "learning_rate": 1.445370523711409e-06, "loss": 0.7075, "step": 4108 }, { "epoch": 0.8796125337828797, "grad_norm": 0.1417894459156667, "learning_rate": 1.440303032282979e-06, "loss": 0.7013, "step": 4109 }, { "epoch": 0.8798266035160953, "grad_norm": 0.14065170619765627, "learning_rate": 1.4352441079766987e-06, "loss": 0.6946, "step": 4110 }, { "epoch": 0.880040673249311, "grad_norm": 0.13767217083294478, "learning_rate": 1.4301937531277489e-06, "loss": 0.6947, "step": 4111 }, { "epoch": 0.8802547429825266, "grad_norm": 0.13631349764913517, "learning_rate": 1.4251519700673732e-06, "loss": 0.6756, "step": 4112 }, { "epoch": 0.8804688127157422, "grad_norm": 0.1377537748795002, "learning_rate": 1.4201187611228417e-06, "loss": 0.6948, "step": 4113 }, { "epoch": 0.8806828824489578, "grad_norm": 0.13662299320895158, "learning_rate": 1.4150941286174825e-06, "loss": 0.6744, "step": 4114 }, { "epoch": 0.8808969521821733, "grad_norm": 0.13632037699573227, "learning_rate": 1.4100780748706488e-06, "loss": 0.7033, "step": 4115 }, { "epoch": 0.8811110219153889, "grad_norm": 0.1424545895794344, "learning_rate": 1.4050706021977468e-06, "loss": 0.7033, "step": 4116 }, { "epoch": 0.8813250916486045, "grad_norm": 0.13688964624674085, "learning_rate": 1.400071712910216e-06, "loss": 0.6853, "step": 4117 }, { "epoch": 0.8815391613818201, "grad_norm": 0.1415596920705807, "learning_rate": 1.395081409315533e-06, "loss": 0.6975, "step": 4118 }, { "epoch": 0.8817532311150357, "grad_norm": 0.13687888237919926, "learning_rate": 1.390099693717215e-06, "loss": 0.6809, "step": 4119 }, { "epoch": 0.8819673008482514, "grad_norm": 0.13959374865085333, "learning_rate": 1.3851265684148097e-06, "loss": 0.6793, "step": 4120 }, { "epoch": 0.882181370581467, "grad_norm": 0.13775021606462998, "learning_rate": 1.3801620357039047e-06, "loss": 0.6996, "step": 4121 }, { "epoch": 0.8823954403146825, "grad_norm": 0.13536273849605482, "learning_rate": 1.3752060978761228e-06, "loss": 0.6658, "step": 4122 }, { "epoch": 0.8826095100478981, "grad_norm": 0.13694380189645117, "learning_rate": 1.3702587572191073e-06, "loss": 0.6757, "step": 4123 }, { "epoch": 0.8828235797811137, "grad_norm": 0.13367124100384561, "learning_rate": 1.3653200160165513e-06, "loss": 0.6432, "step": 4124 }, { "epoch": 0.8830376495143293, "grad_norm": 0.13486752085843698, "learning_rate": 1.3603898765481604e-06, "loss": 0.6597, "step": 4125 }, { "epoch": 0.8832517192475449, "grad_norm": 0.1400954872084322, "learning_rate": 1.3554683410896807e-06, "loss": 0.6945, "step": 4126 }, { "epoch": 0.8834657889807604, "grad_norm": 0.14690038632643201, "learning_rate": 1.3505554119128861e-06, "loss": 0.6943, "step": 4127 }, { "epoch": 0.883679858713976, "grad_norm": 0.1377368646762418, "learning_rate": 1.3456510912855736e-06, "loss": 0.7124, "step": 4128 }, { "epoch": 0.8838939284471917, "grad_norm": 0.13851820089899997, "learning_rate": 1.340755381471568e-06, "loss": 0.7009, "step": 4129 }, { "epoch": 0.8841079981804073, "grad_norm": 0.14402806675625066, "learning_rate": 1.3358682847307236e-06, "loss": 0.6993, "step": 4130 }, { "epoch": 0.8843220679136229, "grad_norm": 0.1368628935738042, "learning_rate": 1.3309898033189117e-06, "loss": 0.6932, "step": 4131 }, { "epoch": 0.8845361376468385, "grad_norm": 0.13682052086400184, "learning_rate": 1.3261199394880309e-06, "loss": 0.6873, "step": 4132 }, { "epoch": 0.884750207380054, "grad_norm": 0.13636934574803303, "learning_rate": 1.3212586954860052e-06, "loss": 0.6868, "step": 4133 }, { "epoch": 0.8849642771132696, "grad_norm": 0.1417087412086574, "learning_rate": 1.3164060735567684e-06, "loss": 0.6856, "step": 4134 }, { "epoch": 0.8851783468464852, "grad_norm": 0.13701695852595525, "learning_rate": 1.3115620759402892e-06, "loss": 0.6953, "step": 4135 }, { "epoch": 0.8853924165797008, "grad_norm": 0.14133718939419468, "learning_rate": 1.3067267048725452e-06, "loss": 0.6936, "step": 4136 }, { "epoch": 0.8856064863129164, "grad_norm": 0.1383476056781188, "learning_rate": 1.3018999625855334e-06, "loss": 0.6876, "step": 4137 }, { "epoch": 0.8858205560461321, "grad_norm": 0.13815354089297535, "learning_rate": 1.2970818513072737e-06, "loss": 0.6724, "step": 4138 }, { "epoch": 0.8860346257793477, "grad_norm": 0.13943089852733764, "learning_rate": 1.2922723732617914e-06, "loss": 0.6629, "step": 4139 }, { "epoch": 0.8862486955125632, "grad_norm": 0.1356520486119351, "learning_rate": 1.2874715306691355e-06, "loss": 0.6774, "step": 4140 }, { "epoch": 0.8864627652457788, "grad_norm": 0.13682394251305746, "learning_rate": 1.2826793257453707e-06, "loss": 0.6862, "step": 4141 }, { "epoch": 0.8866768349789944, "grad_norm": 0.13425737975549762, "learning_rate": 1.277895760702561e-06, "loss": 0.6702, "step": 4142 }, { "epoch": 0.88689090471221, "grad_norm": 0.13715559124946522, "learning_rate": 1.2731208377487958e-06, "loss": 0.6717, "step": 4143 }, { "epoch": 0.8871049744454256, "grad_norm": 0.13791502849871914, "learning_rate": 1.268354559088174e-06, "loss": 0.6867, "step": 4144 }, { "epoch": 0.8873190441786412, "grad_norm": 0.1376700480430702, "learning_rate": 1.2635969269207959e-06, "loss": 0.6871, "step": 4145 }, { "epoch": 0.8875331139118567, "grad_norm": 0.13558269481742685, "learning_rate": 1.258847943442778e-06, "loss": 0.6786, "step": 4146 }, { "epoch": 0.8877471836450724, "grad_norm": 0.14059255609283336, "learning_rate": 1.254107610846247e-06, "loss": 0.695, "step": 4147 }, { "epoch": 0.887961253378288, "grad_norm": 0.1337203655304122, "learning_rate": 1.249375931319321e-06, "loss": 0.656, "step": 4148 }, { "epoch": 0.8881753231115036, "grad_norm": 0.13911079351124517, "learning_rate": 1.2446529070461443e-06, "loss": 0.686, "step": 4149 }, { "epoch": 0.8883893928447192, "grad_norm": 0.1327340678772263, "learning_rate": 1.239938540206851e-06, "loss": 0.6667, "step": 4150 }, { "epoch": 0.8886034625779348, "grad_norm": 0.32176166095405384, "learning_rate": 1.2352328329775865e-06, "loss": 0.7068, "step": 4151 }, { "epoch": 0.8888175323111503, "grad_norm": 0.1371432945479455, "learning_rate": 1.230535787530498e-06, "loss": 0.688, "step": 4152 }, { "epoch": 0.8890316020443659, "grad_norm": 0.14197367576520264, "learning_rate": 1.2258474060337267e-06, "loss": 0.7041, "step": 4153 }, { "epoch": 0.8892456717775815, "grad_norm": 0.14111039898587507, "learning_rate": 1.2211676906514303e-06, "loss": 0.6933, "step": 4154 }, { "epoch": 0.8894597415107971, "grad_norm": 0.13911051066692642, "learning_rate": 1.2164966435437474e-06, "loss": 0.6829, "step": 4155 }, { "epoch": 0.8896738112440128, "grad_norm": 0.1362048186205355, "learning_rate": 1.2118342668668336e-06, "loss": 0.6876, "step": 4156 }, { "epoch": 0.8898878809772284, "grad_norm": 0.13818062029644287, "learning_rate": 1.207180562772825e-06, "loss": 0.6973, "step": 4157 }, { "epoch": 0.890101950710444, "grad_norm": 0.1317278216703112, "learning_rate": 1.2025355334098676e-06, "loss": 0.6626, "step": 4158 }, { "epoch": 0.8903160204436595, "grad_norm": 0.13939131334518293, "learning_rate": 1.1978991809221019e-06, "loss": 0.6978, "step": 4159 }, { "epoch": 0.8905300901768751, "grad_norm": 0.13289617835808065, "learning_rate": 1.1932715074496514e-06, "loss": 0.6731, "step": 4160 }, { "epoch": 0.8907441599100907, "grad_norm": 0.13587332632333382, "learning_rate": 1.1886525151286477e-06, "loss": 0.687, "step": 4161 }, { "epoch": 0.8909582296433063, "grad_norm": 0.13414299970098992, "learning_rate": 1.184042206091207e-06, "loss": 0.6729, "step": 4162 }, { "epoch": 0.8911722993765219, "grad_norm": 0.13301210870013755, "learning_rate": 1.1794405824654386e-06, "loss": 0.6868, "step": 4163 }, { "epoch": 0.8913863691097375, "grad_norm": 0.1381687560987605, "learning_rate": 1.1748476463754478e-06, "loss": 0.7018, "step": 4164 }, { "epoch": 0.8916004388429531, "grad_norm": 0.14151043043864178, "learning_rate": 1.1702633999413204e-06, "loss": 0.6632, "step": 4165 }, { "epoch": 0.8918145085761687, "grad_norm": 0.1375264682306881, "learning_rate": 1.165687845279142e-06, "loss": 0.6857, "step": 4166 }, { "epoch": 0.8920285783093843, "grad_norm": 0.1349179406097826, "learning_rate": 1.1611209845009718e-06, "loss": 0.6808, "step": 4167 }, { "epoch": 0.8922426480425999, "grad_norm": 0.1349911400223185, "learning_rate": 1.1565628197148704e-06, "loss": 0.6556, "step": 4168 }, { "epoch": 0.8924567177758155, "grad_norm": 0.13829912234677266, "learning_rate": 1.1520133530248812e-06, "loss": 0.6713, "step": 4169 }, { "epoch": 0.8926707875090311, "grad_norm": 0.1362169824803463, "learning_rate": 1.1474725865310199e-06, "loss": 0.6762, "step": 4170 }, { "epoch": 0.8928848572422466, "grad_norm": 0.13349799694666242, "learning_rate": 1.1429405223293056e-06, "loss": 0.6562, "step": 4171 }, { "epoch": 0.8930989269754622, "grad_norm": 0.1352831346317916, "learning_rate": 1.1384171625117246e-06, "loss": 0.7042, "step": 4172 }, { "epoch": 0.8933129967086778, "grad_norm": 0.13249036648854318, "learning_rate": 1.1339025091662537e-06, "loss": 0.6611, "step": 4173 }, { "epoch": 0.8935270664418934, "grad_norm": 0.1340249258222099, "learning_rate": 1.1293965643768523e-06, "loss": 0.6863, "step": 4174 }, { "epoch": 0.8937411361751091, "grad_norm": 0.1349001998761694, "learning_rate": 1.1248993302234502e-06, "loss": 0.6907, "step": 4175 }, { "epoch": 0.8939552059083247, "grad_norm": 0.1318757777674753, "learning_rate": 1.1204108087819666e-06, "loss": 0.6667, "step": 4176 }, { "epoch": 0.8941692756415403, "grad_norm": 0.13632503156690215, "learning_rate": 1.1159310021242909e-06, "loss": 0.7022, "step": 4177 }, { "epoch": 0.8943833453747558, "grad_norm": 0.13556018752789825, "learning_rate": 1.1114599123182956e-06, "loss": 0.6734, "step": 4178 }, { "epoch": 0.8945974151079714, "grad_norm": 0.135742094137929, "learning_rate": 1.1069975414278321e-06, "loss": 0.7064, "step": 4179 }, { "epoch": 0.894811484841187, "grad_norm": 0.1359650650535543, "learning_rate": 1.102543891512715e-06, "loss": 0.69, "step": 4180 }, { "epoch": 0.8950255545744026, "grad_norm": 0.1373653086794688, "learning_rate": 1.0980989646287466e-06, "loss": 0.69, "step": 4181 }, { "epoch": 0.8952396243076182, "grad_norm": 0.13816654457909658, "learning_rate": 1.0936627628276918e-06, "loss": 0.721, "step": 4182 }, { "epoch": 0.8954536940408337, "grad_norm": 0.13615854922586995, "learning_rate": 1.0892352881572976e-06, "loss": 0.6636, "step": 4183 }, { "epoch": 0.8956677637740494, "grad_norm": 0.14082358750022633, "learning_rate": 1.0848165426612778e-06, "loss": 0.6976, "step": 4184 }, { "epoch": 0.895881833507265, "grad_norm": 0.13391560669031236, "learning_rate": 1.080406528379314e-06, "loss": 0.7248, "step": 4185 }, { "epoch": 0.8960959032404806, "grad_norm": 0.2292997829312597, "learning_rate": 1.0760052473470673e-06, "loss": 0.6818, "step": 4186 }, { "epoch": 0.8963099729736962, "grad_norm": 0.13541889686321737, "learning_rate": 1.0716127015961541e-06, "loss": 0.6891, "step": 4187 }, { "epoch": 0.8965240427069118, "grad_norm": 0.13648025986660156, "learning_rate": 1.0672288931541664e-06, "loss": 0.6687, "step": 4188 }, { "epoch": 0.8967381124401274, "grad_norm": 0.13716114841950905, "learning_rate": 1.0628538240446672e-06, "loss": 0.6657, "step": 4189 }, { "epoch": 0.8969521821733429, "grad_norm": 0.13591264002806472, "learning_rate": 1.0584874962871728e-06, "loss": 0.6845, "step": 4190 }, { "epoch": 0.8971662519065585, "grad_norm": 0.1363589779239559, "learning_rate": 1.0541299118971815e-06, "loss": 0.6907, "step": 4191 }, { "epoch": 0.8973803216397741, "grad_norm": 0.13668582559807566, "learning_rate": 1.049781072886138e-06, "loss": 0.7095, "step": 4192 }, { "epoch": 0.8975943913729898, "grad_norm": 0.13323433252010586, "learning_rate": 1.0454409812614586e-06, "loss": 0.6692, "step": 4193 }, { "epoch": 0.8978084611062054, "grad_norm": 0.13716866600975855, "learning_rate": 1.0411096390265297e-06, "loss": 0.6993, "step": 4194 }, { "epoch": 0.898022530839421, "grad_norm": 0.13613843088374558, "learning_rate": 1.036787048180683e-06, "loss": 0.6777, "step": 4195 }, { "epoch": 0.8982366005726365, "grad_norm": 0.13370154993136393, "learning_rate": 1.0324732107192249e-06, "loss": 0.6754, "step": 4196 }, { "epoch": 0.8984506703058521, "grad_norm": 0.13288978948956706, "learning_rate": 1.0281681286334068e-06, "loss": 0.6555, "step": 4197 }, { "epoch": 0.8986647400390677, "grad_norm": 0.13626230149594187, "learning_rate": 1.0238718039104545e-06, "loss": 0.6984, "step": 4198 }, { "epoch": 0.8988788097722833, "grad_norm": 0.133980931294781, "learning_rate": 1.0195842385335375e-06, "loss": 0.6742, "step": 4199 }, { "epoch": 0.8990928795054989, "grad_norm": 0.13860021319839488, "learning_rate": 1.0153054344817926e-06, "loss": 0.6932, "step": 4200 }, { "epoch": 0.8993069492387145, "grad_norm": 0.13518658916061166, "learning_rate": 1.0110353937303064e-06, "loss": 0.6876, "step": 4201 }, { "epoch": 0.8995210189719302, "grad_norm": 0.13478101610443857, "learning_rate": 1.0067741182501201e-06, "loss": 0.6889, "step": 4202 }, { "epoch": 0.8997350887051457, "grad_norm": 0.13807951269005206, "learning_rate": 1.0025216100082359e-06, "loss": 0.707, "step": 4203 }, { "epoch": 0.8999491584383613, "grad_norm": 0.13839079339829718, "learning_rate": 9.982778709675967e-07, "loss": 0.6835, "step": 4204 }, { "epoch": 0.9001632281715769, "grad_norm": 0.13869051306125363, "learning_rate": 9.94042903087109e-07, "loss": 0.6854, "step": 4205 }, { "epoch": 0.9003772979047925, "grad_norm": 0.13586026032485238, "learning_rate": 9.89816708321627e-07, "loss": 0.6706, "step": 4206 }, { "epoch": 0.9005913676380081, "grad_norm": 0.13934395073413944, "learning_rate": 9.855992886219501e-07, "loss": 0.7135, "step": 4207 }, { "epoch": 0.9008054373712236, "grad_norm": 0.1314625614588388, "learning_rate": 9.813906459348388e-07, "loss": 0.6656, "step": 4208 }, { "epoch": 0.9010195071044392, "grad_norm": 0.13528061424125878, "learning_rate": 9.77190782202986e-07, "loss": 0.7033, "step": 4209 }, { "epoch": 0.9012335768376548, "grad_norm": 0.1334704367168475, "learning_rate": 9.72999699365047e-07, "loss": 0.7018, "step": 4210 }, { "epoch": 0.9014476465708705, "grad_norm": 0.13974577952090841, "learning_rate": 9.68817399355617e-07, "loss": 0.6883, "step": 4211 }, { "epoch": 0.9016617163040861, "grad_norm": 0.1420288171507796, "learning_rate": 9.646438841052364e-07, "loss": 0.7104, "step": 4212 }, { "epoch": 0.9018757860373017, "grad_norm": 0.20868338774224038, "learning_rate": 9.604791555403924e-07, "loss": 0.688, "step": 4213 }, { "epoch": 0.9020898557705173, "grad_norm": 0.13340624567742823, "learning_rate": 9.56323215583521e-07, "loss": 0.6617, "step": 4214 }, { "epoch": 0.9023039255037328, "grad_norm": 0.13582068993976912, "learning_rate": 9.521760661529878e-07, "loss": 0.6887, "step": 4215 }, { "epoch": 0.9025179952369484, "grad_norm": 0.1405173650563728, "learning_rate": 9.480377091631166e-07, "loss": 0.7009, "step": 4216 }, { "epoch": 0.902732064970164, "grad_norm": 0.13691065637710362, "learning_rate": 9.43908146524164e-07, "loss": 0.7067, "step": 4217 }, { "epoch": 0.9029461347033796, "grad_norm": 0.13657292298918064, "learning_rate": 9.397873801423252e-07, "loss": 0.656, "step": 4218 }, { "epoch": 0.9031602044365952, "grad_norm": 0.13747198064130112, "learning_rate": 9.356754119197386e-07, "loss": 0.6953, "step": 4219 }, { "epoch": 0.9033742741698109, "grad_norm": 0.13493345557005731, "learning_rate": 9.315722437544883e-07, "loss": 0.6782, "step": 4220 }, { "epoch": 0.9035883439030264, "grad_norm": 0.1360362388580472, "learning_rate": 9.274778775405813e-07, "loss": 0.6813, "step": 4221 }, { "epoch": 0.903802413636242, "grad_norm": 0.13648955678738503, "learning_rate": 9.233923151679724e-07, "loss": 0.6967, "step": 4222 }, { "epoch": 0.9040164833694576, "grad_norm": 0.13909357343446574, "learning_rate": 9.193155585225511e-07, "loss": 0.7042, "step": 4223 }, { "epoch": 0.9042305531026732, "grad_norm": 0.13887076469319895, "learning_rate": 9.152476094861384e-07, "loss": 0.7095, "step": 4224 }, { "epoch": 0.9044446228358888, "grad_norm": 0.13716160358883434, "learning_rate": 9.111884699364926e-07, "loss": 0.7037, "step": 4225 }, { "epoch": 0.9046586925691044, "grad_norm": 0.1401960258811389, "learning_rate": 9.07138141747308e-07, "loss": 0.6599, "step": 4226 }, { "epoch": 0.9048727623023199, "grad_norm": 0.19759839121290612, "learning_rate": 9.030966267882024e-07, "loss": 0.6862, "step": 4227 }, { "epoch": 0.9050868320355355, "grad_norm": 0.1380400568062713, "learning_rate": 8.990639269247392e-07, "loss": 0.7016, "step": 4228 }, { "epoch": 0.9053009017687512, "grad_norm": 0.13798859019694037, "learning_rate": 8.950400440184004e-07, "loss": 0.7022, "step": 4229 }, { "epoch": 0.9055149715019668, "grad_norm": 0.13820047638881433, "learning_rate": 8.910249799266024e-07, "loss": 0.6957, "step": 4230 }, { "epoch": 0.9057290412351824, "grad_norm": 0.13423981271998803, "learning_rate": 8.870187365026961e-07, "loss": 0.6714, "step": 4231 }, { "epoch": 0.905943110968398, "grad_norm": 0.13946111140730902, "learning_rate": 8.830213155959511e-07, "loss": 0.709, "step": 4232 }, { "epoch": 0.9061571807016136, "grad_norm": 0.13548709371465542, "learning_rate": 8.790327190515757e-07, "loss": 0.7107, "step": 4233 }, { "epoch": 0.9063712504348291, "grad_norm": 0.13351014447234621, "learning_rate": 8.750529487106907e-07, "loss": 0.6611, "step": 4234 }, { "epoch": 0.9065853201680447, "grad_norm": 0.19082707639063, "learning_rate": 8.710820064103553e-07, "loss": 0.7009, "step": 4235 }, { "epoch": 0.9067993899012603, "grad_norm": 0.14032475995120708, "learning_rate": 8.671198939835523e-07, "loss": 0.7091, "step": 4236 }, { "epoch": 0.9070134596344759, "grad_norm": 0.13718192279792774, "learning_rate": 8.631666132591787e-07, "loss": 0.6787, "step": 4237 }, { "epoch": 0.9072275293676916, "grad_norm": 0.1369111108111374, "learning_rate": 8.592221660620681e-07, "loss": 0.6836, "step": 4238 }, { "epoch": 0.9074415991009072, "grad_norm": 0.1381243286863606, "learning_rate": 8.55286554212964e-07, "loss": 0.6635, "step": 4239 }, { "epoch": 0.9076556688341227, "grad_norm": 0.13582615247440263, "learning_rate": 8.513597795285422e-07, "loss": 0.7128, "step": 4240 }, { "epoch": 0.9078697385673383, "grad_norm": 0.1345352526826563, "learning_rate": 8.474418438213927e-07, "loss": 0.6654, "step": 4241 }, { "epoch": 0.9080838083005539, "grad_norm": 0.13392776665001568, "learning_rate": 8.435327489000267e-07, "loss": 0.6855, "step": 4242 }, { "epoch": 0.9082978780337695, "grad_norm": 0.13830397496776295, "learning_rate": 8.396324965688785e-07, "loss": 0.6834, "step": 4243 }, { "epoch": 0.9085119477669851, "grad_norm": 0.1345719563921343, "learning_rate": 8.357410886282946e-07, "loss": 0.6882, "step": 4244 }, { "epoch": 0.9087260175002007, "grad_norm": 0.13969201427154823, "learning_rate": 8.318585268745449e-07, "loss": 0.7141, "step": 4245 }, { "epoch": 0.9089400872334162, "grad_norm": 0.13621902698302082, "learning_rate": 8.27984813099807e-07, "loss": 0.6861, "step": 4246 }, { "epoch": 0.9091541569666319, "grad_norm": 0.13470107612327034, "learning_rate": 8.241199490921836e-07, "loss": 0.6779, "step": 4247 }, { "epoch": 0.9093682266998475, "grad_norm": 0.13756096212331864, "learning_rate": 8.202639366356923e-07, "loss": 0.6805, "step": 4248 }, { "epoch": 0.9095822964330631, "grad_norm": 0.13487815443914544, "learning_rate": 8.16416777510256e-07, "loss": 0.7133, "step": 4249 }, { "epoch": 0.9097963661662787, "grad_norm": 0.6780640255438278, "learning_rate": 8.125784734917186e-07, "loss": 0.6821, "step": 4250 }, { "epoch": 0.9100104358994943, "grad_norm": 0.1397252595980418, "learning_rate": 8.087490263518338e-07, "loss": 0.7032, "step": 4251 }, { "epoch": 0.9102245056327098, "grad_norm": 0.13391700082774008, "learning_rate": 8.049284378582656e-07, "loss": 0.6939, "step": 4252 }, { "epoch": 0.9104385753659254, "grad_norm": 0.1392238017282743, "learning_rate": 8.011167097745943e-07, "loss": 0.6917, "step": 4253 }, { "epoch": 0.910652645099141, "grad_norm": 0.13765354847185426, "learning_rate": 7.973138438603034e-07, "loss": 0.6743, "step": 4254 }, { "epoch": 0.9108667148323566, "grad_norm": 0.1371561090435617, "learning_rate": 7.935198418707935e-07, "loss": 0.6724, "step": 4255 }, { "epoch": 0.9110807845655723, "grad_norm": 0.14308962400311664, "learning_rate": 7.897347055573634e-07, "loss": 0.6678, "step": 4256 }, { "epoch": 0.9112948542987879, "grad_norm": 0.13712832529790503, "learning_rate": 7.859584366672268e-07, "loss": 0.7004, "step": 4257 }, { "epoch": 0.9115089240320035, "grad_norm": 0.13657913703164848, "learning_rate": 7.821910369435048e-07, "loss": 0.7099, "step": 4258 }, { "epoch": 0.911722993765219, "grad_norm": 0.13626676720993744, "learning_rate": 7.784325081252197e-07, "loss": 0.6735, "step": 4259 }, { "epoch": 0.9119370634984346, "grad_norm": 0.1406899067908914, "learning_rate": 7.746828519473038e-07, "loss": 0.7023, "step": 4260 }, { "epoch": 0.9121511332316502, "grad_norm": 0.13602732059821163, "learning_rate": 7.709420701405878e-07, "loss": 0.7045, "step": 4261 }, { "epoch": 0.9123652029648658, "grad_norm": 0.14004791769388897, "learning_rate": 7.67210164431813e-07, "loss": 0.6811, "step": 4262 }, { "epoch": 0.9125792726980814, "grad_norm": 0.18193442284236228, "learning_rate": 7.634871365436192e-07, "loss": 0.6869, "step": 4263 }, { "epoch": 0.912793342431297, "grad_norm": 0.1365871204195117, "learning_rate": 7.597729881945492e-07, "loss": 0.6855, "step": 4264 }, { "epoch": 0.9130074121645126, "grad_norm": 0.13805740320721904, "learning_rate": 7.560677210990497e-07, "loss": 0.701, "step": 4265 }, { "epoch": 0.9132214818977282, "grad_norm": 0.13376363466133837, "learning_rate": 7.52371336967459e-07, "loss": 0.6902, "step": 4266 }, { "epoch": 0.9134355516309438, "grad_norm": 0.13443944421728973, "learning_rate": 7.486838375060257e-07, "loss": 0.6761, "step": 4267 }, { "epoch": 0.9136496213641594, "grad_norm": 0.13479866072362381, "learning_rate": 7.450052244168949e-07, "loss": 0.6515, "step": 4268 }, { "epoch": 0.913863691097375, "grad_norm": 0.13890310816730966, "learning_rate": 7.413354993981015e-07, "loss": 0.6787, "step": 4269 }, { "epoch": 0.9140777608305906, "grad_norm": 0.13452385813391135, "learning_rate": 7.376746641435883e-07, "loss": 0.6948, "step": 4270 }, { "epoch": 0.9142918305638061, "grad_norm": 0.1348138019183146, "learning_rate": 7.340227203431882e-07, "loss": 0.7141, "step": 4271 }, { "epoch": 0.9145059002970217, "grad_norm": 0.14191670066971723, "learning_rate": 7.303796696826348e-07, "loss": 0.7142, "step": 4272 }, { "epoch": 0.9147199700302373, "grad_norm": 0.134099494770952, "learning_rate": 7.267455138435497e-07, "loss": 0.6903, "step": 4273 }, { "epoch": 0.914934039763453, "grad_norm": 0.13583121927338482, "learning_rate": 7.231202545034554e-07, "loss": 0.7117, "step": 4274 }, { "epoch": 0.9151481094966686, "grad_norm": 0.13886885883411906, "learning_rate": 7.195038933357645e-07, "loss": 0.7012, "step": 4275 }, { "epoch": 0.9153621792298842, "grad_norm": 0.13770266378658522, "learning_rate": 7.158964320097794e-07, "loss": 0.7132, "step": 4276 }, { "epoch": 0.9155762489630997, "grad_norm": 0.13461741783486814, "learning_rate": 7.122978721907015e-07, "loss": 0.6913, "step": 4277 }, { "epoch": 0.9157903186963153, "grad_norm": 0.1424576873922085, "learning_rate": 7.087082155396196e-07, "loss": 0.6816, "step": 4278 }, { "epoch": 0.9160043884295309, "grad_norm": 0.14082754894010235, "learning_rate": 7.051274637135108e-07, "loss": 0.7105, "step": 4279 }, { "epoch": 0.9162184581627465, "grad_norm": 0.14077410771621526, "learning_rate": 7.015556183652439e-07, "loss": 0.7326, "step": 4280 }, { "epoch": 0.9164325278959621, "grad_norm": 0.1384443980516193, "learning_rate": 6.979926811435755e-07, "loss": 0.6861, "step": 4281 }, { "epoch": 0.9166465976291777, "grad_norm": 0.13739173872270724, "learning_rate": 6.944386536931547e-07, "loss": 0.6947, "step": 4282 }, { "epoch": 0.9168606673623932, "grad_norm": 0.13832205645117335, "learning_rate": 6.908935376545067e-07, "loss": 0.7074, "step": 4283 }, { "epoch": 0.9170747370956089, "grad_norm": 0.13279344935726523, "learning_rate": 6.87357334664056e-07, "loss": 0.6893, "step": 4284 }, { "epoch": 0.9172888068288245, "grad_norm": 0.17245526487864973, "learning_rate": 6.838300463541103e-07, "loss": 0.7016, "step": 4285 }, { "epoch": 0.9175028765620401, "grad_norm": 0.13536991850625252, "learning_rate": 6.803116743528516e-07, "loss": 0.6945, "step": 4286 }, { "epoch": 0.9177169462952557, "grad_norm": 0.13495945295599548, "learning_rate": 6.768022202843605e-07, "loss": 0.7181, "step": 4287 }, { "epoch": 0.9179310160284713, "grad_norm": 0.15546373828749133, "learning_rate": 6.733016857685903e-07, "loss": 0.666, "step": 4288 }, { "epoch": 0.9181450857616869, "grad_norm": 0.13600256434721386, "learning_rate": 6.698100724213819e-07, "loss": 0.6778, "step": 4289 }, { "epoch": 0.9183591554949024, "grad_norm": 0.13338886836676647, "learning_rate": 6.663273818544613e-07, "loss": 0.6731, "step": 4290 }, { "epoch": 0.918573225228118, "grad_norm": 0.13957994583414532, "learning_rate": 6.628536156754273e-07, "loss": 0.6838, "step": 4291 }, { "epoch": 0.9187872949613336, "grad_norm": 0.14212469513701823, "learning_rate": 6.59388775487766e-07, "loss": 0.6983, "step": 4292 }, { "epoch": 0.9190013646945493, "grad_norm": 0.13658478466758459, "learning_rate": 6.559328628908446e-07, "loss": 0.671, "step": 4293 }, { "epoch": 0.9192154344277649, "grad_norm": 0.13367364859649347, "learning_rate": 6.524858794799005e-07, "loss": 0.688, "step": 4294 }, { "epoch": 0.9194295041609805, "grad_norm": 0.13800794729231464, "learning_rate": 6.49047826846061e-07, "loss": 0.6902, "step": 4295 }, { "epoch": 0.919643573894196, "grad_norm": 0.13528922517669267, "learning_rate": 6.456187065763165e-07, "loss": 0.6924, "step": 4296 }, { "epoch": 0.9198576436274116, "grad_norm": 0.13582332354188012, "learning_rate": 6.421985202535497e-07, "loss": 0.6987, "step": 4297 }, { "epoch": 0.9200717133606272, "grad_norm": 0.13086211665040132, "learning_rate": 6.387872694565112e-07, "loss": 0.666, "step": 4298 }, { "epoch": 0.9202857830938428, "grad_norm": 0.13560469131861522, "learning_rate": 6.353849557598235e-07, "loss": 0.691, "step": 4299 }, { "epoch": 0.9204998528270584, "grad_norm": 0.1396926440578085, "learning_rate": 6.319915807339927e-07, "loss": 0.7283, "step": 4300 }, { "epoch": 0.920713922560274, "grad_norm": 0.13773179089997417, "learning_rate": 6.286071459453969e-07, "loss": 0.6897, "step": 4301 }, { "epoch": 0.9209279922934897, "grad_norm": 0.13891185958429028, "learning_rate": 6.252316529562797e-07, "loss": 0.7037, "step": 4302 }, { "epoch": 0.9211420620267052, "grad_norm": 0.133402661825684, "learning_rate": 6.218651033247636e-07, "loss": 0.6747, "step": 4303 }, { "epoch": 0.9213561317599208, "grad_norm": 0.13430283133525148, "learning_rate": 6.185074986048456e-07, "loss": 0.6973, "step": 4304 }, { "epoch": 0.9215702014931364, "grad_norm": 0.13605896074993906, "learning_rate": 6.151588403463838e-07, "loss": 0.7241, "step": 4305 }, { "epoch": 0.921784271226352, "grad_norm": 0.1336695624237095, "learning_rate": 6.118191300951171e-07, "loss": 0.6703, "step": 4306 }, { "epoch": 0.9219983409595676, "grad_norm": 0.137823698276276, "learning_rate": 6.084883693926502e-07, "loss": 0.6895, "step": 4307 }, { "epoch": 0.9222124106927831, "grad_norm": 0.13543544897243118, "learning_rate": 6.051665597764534e-07, "loss": 0.6947, "step": 4308 }, { "epoch": 0.9224264804259987, "grad_norm": 0.1403307461248502, "learning_rate": 6.018537027798665e-07, "loss": 0.7043, "step": 4309 }, { "epoch": 0.9226405501592143, "grad_norm": 0.13496220856618912, "learning_rate": 5.985497999321044e-07, "loss": 0.6826, "step": 4310 }, { "epoch": 0.92285461989243, "grad_norm": 0.13833307562725605, "learning_rate": 5.952548527582358e-07, "loss": 0.6961, "step": 4311 }, { "epoch": 0.9230686896256456, "grad_norm": 0.13497327975776574, "learning_rate": 5.919688627792086e-07, "loss": 0.682, "step": 4312 }, { "epoch": 0.9232827593588612, "grad_norm": 0.1346846618255718, "learning_rate": 5.88691831511825e-07, "loss": 0.6823, "step": 4313 }, { "epoch": 0.9234968290920768, "grad_norm": 0.13567372056383306, "learning_rate": 5.854237604687574e-07, "loss": 0.6834, "step": 4314 }, { "epoch": 0.9237108988252923, "grad_norm": 0.13482695070791353, "learning_rate": 5.821646511585433e-07, "loss": 0.6617, "step": 4315 }, { "epoch": 0.9239249685585079, "grad_norm": 0.13795490822378076, "learning_rate": 5.789145050855793e-07, "loss": 0.7108, "step": 4316 }, { "epoch": 0.9241390382917235, "grad_norm": 0.1379904292501817, "learning_rate": 5.7567332375013e-07, "loss": 0.6928, "step": 4317 }, { "epoch": 0.9243531080249391, "grad_norm": 0.1343495493065187, "learning_rate": 5.724411086483139e-07, "loss": 0.662, "step": 4318 }, { "epoch": 0.9245671777581547, "grad_norm": 0.1349392778611965, "learning_rate": 5.6921786127212e-07, "loss": 0.6817, "step": 4319 }, { "epoch": 0.9247812474913704, "grad_norm": 0.22708194026805, "learning_rate": 5.660035831093935e-07, "loss": 0.698, "step": 4320 }, { "epoch": 0.924995317224586, "grad_norm": 0.13672210511292796, "learning_rate": 5.627982756438344e-07, "loss": 0.7111, "step": 4321 }, { "epoch": 0.9252093869578015, "grad_norm": 0.1363800057454497, "learning_rate": 5.596019403550145e-07, "loss": 0.6684, "step": 4322 }, { "epoch": 0.9254234566910171, "grad_norm": 0.13840339865274745, "learning_rate": 5.564145787183473e-07, "loss": 0.6758, "step": 4323 }, { "epoch": 0.9256375264242327, "grad_norm": 0.13425733861343547, "learning_rate": 5.532361922051221e-07, "loss": 0.6809, "step": 4324 }, { "epoch": 0.9258515961574483, "grad_norm": 0.13930949914002347, "learning_rate": 5.500667822824679e-07, "loss": 0.7167, "step": 4325 }, { "epoch": 0.9260656658906639, "grad_norm": 0.1344001305617284, "learning_rate": 5.469063504133832e-07, "loss": 0.6875, "step": 4326 }, { "epoch": 0.9262797356238794, "grad_norm": 0.13340477883012816, "learning_rate": 5.437548980567187e-07, "loss": 0.6786, "step": 4327 }, { "epoch": 0.926493805357095, "grad_norm": 0.13378110904294804, "learning_rate": 5.406124266671753e-07, "loss": 0.6913, "step": 4328 }, { "epoch": 0.9267078750903107, "grad_norm": 0.134446707366625, "learning_rate": 5.374789376953149e-07, "loss": 0.6963, "step": 4329 }, { "epoch": 0.9269219448235263, "grad_norm": 0.1338691969218982, "learning_rate": 5.343544325875494e-07, "loss": 0.6669, "step": 4330 }, { "epoch": 0.9271360145567419, "grad_norm": 0.13841565087395938, "learning_rate": 5.312389127861428e-07, "loss": 0.6902, "step": 4331 }, { "epoch": 0.9273500842899575, "grad_norm": 0.13213259093858248, "learning_rate": 5.281323797292182e-07, "loss": 0.6485, "step": 4332 }, { "epoch": 0.927564154023173, "grad_norm": 0.13820700523065554, "learning_rate": 5.250348348507395e-07, "loss": 0.7012, "step": 4333 }, { "epoch": 0.9277782237563886, "grad_norm": 0.13884337919224177, "learning_rate": 5.219462795805341e-07, "loss": 0.6931, "step": 4334 }, { "epoch": 0.9279922934896042, "grad_norm": 0.14160605232931436, "learning_rate": 5.188667153442661e-07, "loss": 0.7401, "step": 4335 }, { "epoch": 0.9282063632228198, "grad_norm": 0.13562775573795124, "learning_rate": 5.157961435634628e-07, "loss": 0.6852, "step": 4336 }, { "epoch": 0.9284204329560354, "grad_norm": 0.13329570849461553, "learning_rate": 5.127345656554928e-07, "loss": 0.6655, "step": 4337 }, { "epoch": 0.9286345026892511, "grad_norm": 0.13411666000495157, "learning_rate": 5.09681983033572e-07, "loss": 0.6786, "step": 4338 }, { "epoch": 0.9288485724224667, "grad_norm": 0.13226173142356434, "learning_rate": 5.066383971067735e-07, "loss": 0.6784, "step": 4339 }, { "epoch": 0.9290626421556822, "grad_norm": 0.1389445083227217, "learning_rate": 5.036038092800044e-07, "loss": 0.7004, "step": 4340 }, { "epoch": 0.9292767118888978, "grad_norm": 0.138040961006918, "learning_rate": 5.005782209540267e-07, "loss": 0.6915, "step": 4341 }, { "epoch": 0.9294907816221134, "grad_norm": 0.1399110270978668, "learning_rate": 4.975616335254474e-07, "loss": 0.7115, "step": 4342 }, { "epoch": 0.929704851355329, "grad_norm": 0.1367055044829687, "learning_rate": 4.945540483867173e-07, "loss": 0.6983, "step": 4343 }, { "epoch": 0.9299189210885446, "grad_norm": 0.13359304363733251, "learning_rate": 4.915554669261346e-07, "loss": 0.7019, "step": 4344 }, { "epoch": 0.9301329908217602, "grad_norm": 0.1341565940715023, "learning_rate": 4.885658905278345e-07, "loss": 0.6756, "step": 4345 }, { "epoch": 0.9303470605549757, "grad_norm": 0.13292871340605816, "learning_rate": 4.855853205718019e-07, "loss": 0.6851, "step": 4346 }, { "epoch": 0.9305611302881914, "grad_norm": 0.13233325821628328, "learning_rate": 4.826137584338653e-07, "loss": 0.6881, "step": 4347 }, { "epoch": 0.930775200021407, "grad_norm": 0.134368961841193, "learning_rate": 4.796512054856872e-07, "loss": 0.7141, "step": 4348 }, { "epoch": 0.9309892697546226, "grad_norm": 0.13955584155179407, "learning_rate": 4.766976630947806e-07, "loss": 0.7054, "step": 4349 }, { "epoch": 0.9312033394878382, "grad_norm": 0.1343345661614504, "learning_rate": 4.737531326244926e-07, "loss": 0.67, "step": 4350 }, { "epoch": 0.9314174092210538, "grad_norm": 0.13838895579839383, "learning_rate": 4.7081761543401604e-07, "loss": 0.6818, "step": 4351 }, { "epoch": 0.9316314789542693, "grad_norm": 0.13649714084423922, "learning_rate": 4.678911128783781e-07, "loss": 0.7375, "step": 4352 }, { "epoch": 0.9318455486874849, "grad_norm": 0.13867464505550625, "learning_rate": 4.64973626308447e-07, "loss": 0.6888, "step": 4353 }, { "epoch": 0.9320596184207005, "grad_norm": 0.13662169417139988, "learning_rate": 4.6206515707093e-07, "loss": 0.7053, "step": 4354 }, { "epoch": 0.9322736881539161, "grad_norm": 0.1346330008188449, "learning_rate": 4.59165706508371e-07, "loss": 0.6918, "step": 4355 }, { "epoch": 0.9324877578871318, "grad_norm": 0.13594427563867142, "learning_rate": 4.5627527595915043e-07, "loss": 0.6741, "step": 4356 }, { "epoch": 0.9327018276203474, "grad_norm": 0.13626106039314084, "learning_rate": 4.5339386675748775e-07, "loss": 0.6916, "step": 4357 }, { "epoch": 0.932915897353563, "grad_norm": 0.13426337054089502, "learning_rate": 4.5052148023343234e-07, "loss": 0.6959, "step": 4358 }, { "epoch": 0.9331299670867785, "grad_norm": 0.1350433586386857, "learning_rate": 4.4765811771287693e-07, "loss": 0.685, "step": 4359 }, { "epoch": 0.9333440368199941, "grad_norm": 0.14065317528715676, "learning_rate": 4.44803780517542e-07, "loss": 0.7046, "step": 4360 }, { "epoch": 0.9335581065532097, "grad_norm": 0.13425958907066096, "learning_rate": 4.419584699649826e-07, "loss": 0.666, "step": 4361 }, { "epoch": 0.9337721762864253, "grad_norm": 0.13427276238033473, "learning_rate": 4.3912218736859467e-07, "loss": 0.6703, "step": 4362 }, { "epoch": 0.9339862460196409, "grad_norm": 0.1309313948057116, "learning_rate": 4.362949340375955e-07, "loss": 0.6681, "step": 4363 }, { "epoch": 0.9342003157528564, "grad_norm": 0.1309459584048416, "learning_rate": 4.3347671127704327e-07, "loss": 0.6758, "step": 4364 }, { "epoch": 0.9344143854860721, "grad_norm": 0.13306099235595414, "learning_rate": 4.306675203878219e-07, "loss": 0.6685, "step": 4365 }, { "epoch": 0.9346284552192877, "grad_norm": 0.13658798500494218, "learning_rate": 4.2786736266664965e-07, "loss": 0.6837, "step": 4366 }, { "epoch": 0.9348425249525033, "grad_norm": 0.13481011599257658, "learning_rate": 4.250762394060748e-07, "loss": 0.6928, "step": 4367 }, { "epoch": 0.9350565946857189, "grad_norm": 0.13428232860094064, "learning_rate": 4.2229415189447344e-07, "loss": 0.6809, "step": 4368 }, { "epoch": 0.9352706644189345, "grad_norm": 0.13680713632093935, "learning_rate": 4.195211014160561e-07, "loss": 0.6959, "step": 4369 }, { "epoch": 0.93548473415215, "grad_norm": 0.1347440990639614, "learning_rate": 4.167570892508521e-07, "loss": 0.6776, "step": 4370 }, { "epoch": 0.9356988038853656, "grad_norm": 0.14506695808540718, "learning_rate": 4.140021166747299e-07, "loss": 0.6971, "step": 4371 }, { "epoch": 0.9359128736185812, "grad_norm": 0.1444892034589848, "learning_rate": 4.112561849593766e-07, "loss": 0.6971, "step": 4372 }, { "epoch": 0.9361269433517968, "grad_norm": 0.1357028449587074, "learning_rate": 4.085192953723072e-07, "loss": 0.6765, "step": 4373 }, { "epoch": 0.9363410130850125, "grad_norm": 0.14539471081296, "learning_rate": 4.0579144917686884e-07, "loss": 0.6844, "step": 4374 }, { "epoch": 0.9365550828182281, "grad_norm": 0.13454942448465235, "learning_rate": 4.0307264763223e-07, "loss": 0.6732, "step": 4375 }, { "epoch": 0.9367691525514437, "grad_norm": 0.13730075304577238, "learning_rate": 4.0036289199338e-07, "loss": 0.7159, "step": 4376 }, { "epoch": 0.9369832222846592, "grad_norm": 0.13724928461966887, "learning_rate": 3.9766218351114495e-07, "loss": 0.7087, "step": 4377 }, { "epoch": 0.9371972920178748, "grad_norm": 0.13364995741210184, "learning_rate": 3.949705234321588e-07, "loss": 0.6863, "step": 4378 }, { "epoch": 0.9374113617510904, "grad_norm": 0.13579667041546697, "learning_rate": 3.922879129988921e-07, "loss": 0.6778, "step": 4379 }, { "epoch": 0.937625431484306, "grad_norm": 0.13256914121735144, "learning_rate": 3.8961435344963216e-07, "loss": 0.6849, "step": 4380 }, { "epoch": 0.9378395012175216, "grad_norm": 0.13199552332290715, "learning_rate": 3.8694984601848727e-07, "loss": 0.6923, "step": 4381 }, { "epoch": 0.9380535709507372, "grad_norm": 0.13486365897241173, "learning_rate": 3.842943919353914e-07, "loss": 0.6542, "step": 4382 }, { "epoch": 0.9382676406839529, "grad_norm": 0.14000675774329724, "learning_rate": 3.8164799242609516e-07, "loss": 0.7297, "step": 4383 }, { "epoch": 0.9384817104171684, "grad_norm": 0.1370351927551661, "learning_rate": 3.790106487121725e-07, "loss": 0.6944, "step": 4384 }, { "epoch": 0.938695780150384, "grad_norm": 0.1339984287202759, "learning_rate": 3.763823620110207e-07, "loss": 0.6994, "step": 4385 }, { "epoch": 0.9389098498835996, "grad_norm": 0.21962560537470135, "learning_rate": 3.737631335358427e-07, "loss": 0.6857, "step": 4386 }, { "epoch": 0.9391239196168152, "grad_norm": 0.1341062847667069, "learning_rate": 3.7115296449567795e-07, "loss": 0.6686, "step": 4387 }, { "epoch": 0.9393379893500308, "grad_norm": 0.15318823133532672, "learning_rate": 3.685518560953738e-07, "loss": 0.709, "step": 4388 }, { "epoch": 0.9395520590832463, "grad_norm": 0.13484647147667309, "learning_rate": 3.659598095355921e-07, "loss": 0.6827, "step": 4389 }, { "epoch": 0.9397661288164619, "grad_norm": 0.13107261887840005, "learning_rate": 3.633768260128223e-07, "loss": 0.6734, "step": 4390 }, { "epoch": 0.9399801985496775, "grad_norm": 0.12984179382874467, "learning_rate": 3.6080290671936635e-07, "loss": 0.6622, "step": 4391 }, { "epoch": 0.9401942682828931, "grad_norm": 0.1436758111172662, "learning_rate": 3.582380528433338e-07, "loss": 0.6966, "step": 4392 }, { "epoch": 0.9404083380161088, "grad_norm": 0.13607666299249183, "learning_rate": 3.5568226556866206e-07, "loss": 0.6861, "step": 4393 }, { "epoch": 0.9406224077493244, "grad_norm": 0.1362418372533418, "learning_rate": 3.5313554607509846e-07, "loss": 0.6968, "step": 4394 }, { "epoch": 0.94083647748254, "grad_norm": 0.13870892474796043, "learning_rate": 3.5059789553819835e-07, "loss": 0.7313, "step": 4395 }, { "epoch": 0.9410505472157555, "grad_norm": 0.13294179159147404, "learning_rate": 3.480693151293424e-07, "loss": 0.6911, "step": 4396 }, { "epoch": 0.9412646169489711, "grad_norm": 0.13480810563573517, "learning_rate": 3.4554980601571474e-07, "loss": 0.6941, "step": 4397 }, { "epoch": 0.9414786866821867, "grad_norm": 0.13178243274206683, "learning_rate": 3.4303936936031624e-07, "loss": 0.6635, "step": 4398 }, { "epoch": 0.9416927564154023, "grad_norm": 0.1327853646381699, "learning_rate": 3.4053800632196434e-07, "loss": 0.67, "step": 4399 }, { "epoch": 0.9419068261486179, "grad_norm": 0.13921429858113968, "learning_rate": 3.380457180552799e-07, "loss": 0.6904, "step": 4400 }, { "epoch": 0.9421208958818335, "grad_norm": 0.1353642741588206, "learning_rate": 3.3556250571069813e-07, "loss": 0.671, "step": 4401 }, { "epoch": 0.9423349656150491, "grad_norm": 0.13596869594698763, "learning_rate": 3.3308837043446897e-07, "loss": 0.7122, "step": 4402 }, { "epoch": 0.9425490353482647, "grad_norm": 0.1361060299958215, "learning_rate": 3.306233133686454e-07, "loss": 0.696, "step": 4403 }, { "epoch": 0.9427631050814803, "grad_norm": 0.13932448088370206, "learning_rate": 3.281673356510928e-07, "loss": 0.6978, "step": 4404 }, { "epoch": 0.9429771748146959, "grad_norm": 0.13320871046081048, "learning_rate": 3.2572043841548664e-07, "loss": 0.688, "step": 4405 }, { "epoch": 0.9431912445479115, "grad_norm": 0.2034995414611716, "learning_rate": 3.232826227913144e-07, "loss": 0.7143, "step": 4406 }, { "epoch": 0.9434053142811271, "grad_norm": 0.13209390106435284, "learning_rate": 3.208538899038605e-07, "loss": 0.6811, "step": 4407 }, { "epoch": 0.9436193840143426, "grad_norm": 0.13962661634831805, "learning_rate": 3.1843424087422805e-07, "loss": 0.7176, "step": 4408 }, { "epoch": 0.9438334537475582, "grad_norm": 0.13101465821042343, "learning_rate": 3.1602367681932146e-07, "loss": 0.667, "step": 4409 }, { "epoch": 0.9440475234807738, "grad_norm": 0.1344252901880154, "learning_rate": 3.1362219885185283e-07, "loss": 0.6861, "step": 4410 }, { "epoch": 0.9442615932139895, "grad_norm": 0.13888038594671412, "learning_rate": 3.1122980808033997e-07, "loss": 0.7037, "step": 4411 }, { "epoch": 0.9444756629472051, "grad_norm": 0.13686136284085942, "learning_rate": 3.088465056091061e-07, "loss": 0.6975, "step": 4412 }, { "epoch": 0.9446897326804207, "grad_norm": 0.1338173824839266, "learning_rate": 3.0647229253828014e-07, "loss": 0.68, "step": 4413 }, { "epoch": 0.9449038024136363, "grad_norm": 0.13403148746534746, "learning_rate": 3.041071699637921e-07, "loss": 0.6726, "step": 4414 }, { "epoch": 0.9451178721468518, "grad_norm": 0.13377541711281205, "learning_rate": 3.017511389773775e-07, "loss": 0.6628, "step": 4415 }, { "epoch": 0.9453319418800674, "grad_norm": 0.13616913975187378, "learning_rate": 2.9940420066658204e-07, "loss": 0.6846, "step": 4416 }, { "epoch": 0.945546011613283, "grad_norm": 0.13699250871645566, "learning_rate": 2.970663561147413e-07, "loss": 0.6778, "step": 4417 }, { "epoch": 0.9457600813464986, "grad_norm": 0.14021969737564713, "learning_rate": 2.9473760640100546e-07, "loss": 0.7215, "step": 4418 }, { "epoch": 0.9459741510797142, "grad_norm": 0.1345648386139211, "learning_rate": 2.924179526003168e-07, "loss": 0.6825, "step": 4419 }, { "epoch": 0.9461882208129299, "grad_norm": 0.13169122305121064, "learning_rate": 2.901073957834255e-07, "loss": 0.687, "step": 4420 }, { "epoch": 0.9464022905461454, "grad_norm": 0.1338011288145907, "learning_rate": 2.8780593701688064e-07, "loss": 0.6873, "step": 4421 }, { "epoch": 0.946616360279361, "grad_norm": 0.13758208575962663, "learning_rate": 2.855135773630302e-07, "loss": 0.7097, "step": 4422 }, { "epoch": 0.9468304300125766, "grad_norm": 0.13515567347664284, "learning_rate": 2.832303178800233e-07, "loss": 0.665, "step": 4423 }, { "epoch": 0.9470444997457922, "grad_norm": 0.35350120064438184, "learning_rate": 2.80956159621808e-07, "loss": 0.7249, "step": 4424 }, { "epoch": 0.9472585694790078, "grad_norm": 0.1367390259092207, "learning_rate": 2.7869110363813344e-07, "loss": 0.7237, "step": 4425 }, { "epoch": 0.9474726392122234, "grad_norm": 0.13325065939442832, "learning_rate": 2.7643515097454554e-07, "loss": 0.6605, "step": 4426 }, { "epoch": 0.9476867089454389, "grad_norm": 0.14815206254336985, "learning_rate": 2.7418830267238463e-07, "loss": 0.6906, "step": 4427 }, { "epoch": 0.9479007786786545, "grad_norm": 0.13320652457641352, "learning_rate": 2.719505597687944e-07, "loss": 0.6752, "step": 4428 }, { "epoch": 0.9481148484118702, "grad_norm": 0.1383322152862038, "learning_rate": 2.6972192329671077e-07, "loss": 0.6748, "step": 4429 }, { "epoch": 0.9483289181450858, "grad_norm": 0.13515569302825342, "learning_rate": 2.675023942848687e-07, "loss": 0.7014, "step": 4430 }, { "epoch": 0.9485429878783014, "grad_norm": 0.13961809113509327, "learning_rate": 2.6529197375780414e-07, "loss": 0.7026, "step": 4431 }, { "epoch": 0.948757057611517, "grad_norm": 0.1332682385828863, "learning_rate": 2.630906627358343e-07, "loss": 0.6859, "step": 4432 }, { "epoch": 0.9489711273447325, "grad_norm": 0.1332312600242191, "learning_rate": 2.6089846223508853e-07, "loss": 0.6807, "step": 4433 }, { "epoch": 0.9491851970779481, "grad_norm": 0.13678085190562997, "learning_rate": 2.587153732674752e-07, "loss": 0.7067, "step": 4434 }, { "epoch": 0.9493992668111637, "grad_norm": 0.13671878737652773, "learning_rate": 2.5654139684070823e-07, "loss": 0.7146, "step": 4435 }, { "epoch": 0.9496133365443793, "grad_norm": 0.13399327711840386, "learning_rate": 2.5437653395829374e-07, "loss": 0.675, "step": 4436 }, { "epoch": 0.9498274062775949, "grad_norm": 0.13492856584051507, "learning_rate": 2.5222078561952133e-07, "loss": 0.6755, "step": 4437 }, { "epoch": 0.9500414760108106, "grad_norm": 0.13405416284934196, "learning_rate": 2.500741528194883e-07, "loss": 0.6931, "step": 4438 }, { "epoch": 0.9502555457440262, "grad_norm": 0.1335342587000633, "learning_rate": 2.4793663654906873e-07, "loss": 0.6749, "step": 4439 }, { "epoch": 0.9504696154772417, "grad_norm": 0.13363797238302091, "learning_rate": 2.4580823779494223e-07, "loss": 0.6909, "step": 4440 }, { "epoch": 0.9506836852104573, "grad_norm": 0.13993036987058308, "learning_rate": 2.436889575395718e-07, "loss": 0.7144, "step": 4441 }, { "epoch": 0.9508977549436729, "grad_norm": 0.1350306114647872, "learning_rate": 2.415787967612127e-07, "loss": 0.6808, "step": 4442 }, { "epoch": 0.9511118246768885, "grad_norm": 0.132935355904939, "learning_rate": 2.394777564339146e-07, "loss": 0.6922, "step": 4443 }, { "epoch": 0.9513258944101041, "grad_norm": 0.13584181055687816, "learning_rate": 2.373858375275062e-07, "loss": 0.7099, "step": 4444 }, { "epoch": 0.9515399641433196, "grad_norm": 0.13149826096458744, "learning_rate": 2.353030410076218e-07, "loss": 0.6684, "step": 4445 }, { "epoch": 0.9517540338765352, "grad_norm": 0.13398714533216113, "learning_rate": 2.332293678356723e-07, "loss": 0.6838, "step": 4446 }, { "epoch": 0.9519681036097509, "grad_norm": 0.13535967046509848, "learning_rate": 2.311648189688609e-07, "loss": 0.71, "step": 4447 }, { "epoch": 0.9521821733429665, "grad_norm": 0.13057818672857943, "learning_rate": 2.2910939536018307e-07, "loss": 0.6772, "step": 4448 }, { "epoch": 0.9523962430761821, "grad_norm": 0.13637946468727496, "learning_rate": 2.2706309795841318e-07, "loss": 0.7041, "step": 4449 }, { "epoch": 0.9526103128093977, "grad_norm": 0.13425348068570012, "learning_rate": 2.250259277081246e-07, "loss": 0.683, "step": 4450 }, { "epoch": 0.9528243825426133, "grad_norm": 0.1333832035130265, "learning_rate": 2.2299788554966507e-07, "loss": 0.6914, "step": 4451 }, { "epoch": 0.9530384522758288, "grad_norm": 0.1308044899213028, "learning_rate": 2.209789724191791e-07, "loss": 0.6722, "step": 4452 }, { "epoch": 0.9532525220090444, "grad_norm": 0.1372767061875979, "learning_rate": 2.1896918924859457e-07, "loss": 0.7358, "step": 4453 }, { "epoch": 0.95346659174226, "grad_norm": 0.1371866888592856, "learning_rate": 2.1696853696562047e-07, "loss": 0.685, "step": 4454 }, { "epoch": 0.9536806614754756, "grad_norm": 0.13061073792711084, "learning_rate": 2.149770164937559e-07, "loss": 0.685, "step": 4455 }, { "epoch": 0.9538947312086913, "grad_norm": 0.13442012214273127, "learning_rate": 2.1299462875228105e-07, "loss": 0.6849, "step": 4456 }, { "epoch": 0.9541088009419069, "grad_norm": 0.15943456154091798, "learning_rate": 2.1102137465626615e-07, "loss": 0.6692, "step": 4457 }, { "epoch": 0.9543228706751224, "grad_norm": 0.132934193772929, "learning_rate": 2.0905725511655815e-07, "loss": 0.6776, "step": 4458 }, { "epoch": 0.954536940408338, "grad_norm": 0.13564113269996744, "learning_rate": 2.0710227103979186e-07, "loss": 0.6639, "step": 4459 }, { "epoch": 0.9547510101415536, "grad_norm": 0.13845246517811657, "learning_rate": 2.0515642332838537e-07, "loss": 0.7074, "step": 4460 }, { "epoch": 0.9549650798747692, "grad_norm": 0.13403493581662498, "learning_rate": 2.032197128805402e-07, "loss": 0.68, "step": 4461 }, { "epoch": 0.9551791496079848, "grad_norm": 0.138020929093702, "learning_rate": 2.012921405902346e-07, "loss": 0.7176, "step": 4462 }, { "epoch": 0.9553932193412004, "grad_norm": 0.1363071013158765, "learning_rate": 1.993737073472324e-07, "loss": 0.6726, "step": 4463 }, { "epoch": 0.9556072890744159, "grad_norm": 0.13671866164881524, "learning_rate": 1.9746441403708294e-07, "loss": 0.7132, "step": 4464 }, { "epoch": 0.9558213588076316, "grad_norm": 0.13271304688009625, "learning_rate": 1.9556426154110798e-07, "loss": 0.6677, "step": 4465 }, { "epoch": 0.9560354285408472, "grad_norm": 0.14321486852087098, "learning_rate": 1.9367325073641695e-07, "loss": 0.7064, "step": 4466 }, { "epoch": 0.9562494982740628, "grad_norm": 0.1347899281523679, "learning_rate": 1.9179138249589836e-07, "loss": 0.6871, "step": 4467 }, { "epoch": 0.9564635680072784, "grad_norm": 0.1350541043284291, "learning_rate": 1.8991865768821506e-07, "loss": 0.6617, "step": 4468 }, { "epoch": 0.956677637740494, "grad_norm": 0.1560933060408755, "learning_rate": 1.8805507717781558e-07, "loss": 0.6981, "step": 4469 }, { "epoch": 0.9568917074737096, "grad_norm": 0.17697761160628103, "learning_rate": 1.8620064182492513e-07, "loss": 0.6937, "step": 4470 }, { "epoch": 0.9571057772069251, "grad_norm": 0.12982871103577237, "learning_rate": 1.8435535248554792e-07, "loss": 0.6664, "step": 4471 }, { "epoch": 0.9573198469401407, "grad_norm": 0.31474112827900536, "learning_rate": 1.825192100114692e-07, "loss": 0.6866, "step": 4472 }, { "epoch": 0.9575339166733563, "grad_norm": 0.1320021736959052, "learning_rate": 1.8069221525024217e-07, "loss": 0.6785, "step": 4473 }, { "epoch": 0.957747986406572, "grad_norm": 0.13296474083529464, "learning_rate": 1.7887436904520772e-07, "loss": 0.679, "step": 4474 }, { "epoch": 0.9579620561397876, "grad_norm": 0.131819375973322, "learning_rate": 1.7706567223548353e-07, "loss": 0.6693, "step": 4475 }, { "epoch": 0.9581761258730032, "grad_norm": 0.1333344066768023, "learning_rate": 1.7526612565595513e-07, "loss": 0.6722, "step": 4476 }, { "epoch": 0.9583901956062187, "grad_norm": 0.13502912752118432, "learning_rate": 1.7347573013729357e-07, "loss": 0.7027, "step": 4477 }, { "epoch": 0.9586042653394343, "grad_norm": 0.13916177471834354, "learning_rate": 1.7169448650594e-07, "loss": 0.7026, "step": 4478 }, { "epoch": 0.9588183350726499, "grad_norm": 0.13060265900714255, "learning_rate": 1.6992239558411448e-07, "loss": 0.6887, "step": 4479 }, { "epoch": 0.9590324048058655, "grad_norm": 0.13025350318471712, "learning_rate": 1.6815945818981382e-07, "loss": 0.6729, "step": 4480 }, { "epoch": 0.9592464745390811, "grad_norm": 0.1329791169788774, "learning_rate": 1.664056751368004e-07, "loss": 0.6825, "step": 4481 }, { "epoch": 0.9594605442722967, "grad_norm": 0.13406186774432716, "learning_rate": 1.6466104723461995e-07, "loss": 0.6926, "step": 4482 }, { "epoch": 0.9596746140055123, "grad_norm": 0.13551817770086835, "learning_rate": 1.6292557528859276e-07, "loss": 0.7007, "step": 4483 }, { "epoch": 0.9598886837387279, "grad_norm": 0.13647123698636426, "learning_rate": 1.6119926009980468e-07, "loss": 0.6807, "step": 4484 }, { "epoch": 0.9601027534719435, "grad_norm": 0.1363350685771318, "learning_rate": 1.5948210246512276e-07, "loss": 0.712, "step": 4485 }, { "epoch": 0.9603168232051591, "grad_norm": 0.14022481649117083, "learning_rate": 1.57774103177184e-07, "loss": 0.6867, "step": 4486 }, { "epoch": 0.9605308929383747, "grad_norm": 0.13422756693687948, "learning_rate": 1.5607526302439558e-07, "loss": 0.6975, "step": 4487 }, { "epoch": 0.9607449626715903, "grad_norm": 0.13485927075782286, "learning_rate": 1.5438558279093907e-07, "loss": 0.6973, "step": 4488 }, { "epoch": 0.9609590324048058, "grad_norm": 0.1337655303690488, "learning_rate": 1.5270506325676838e-07, "loss": 0.6923, "step": 4489 }, { "epoch": 0.9611731021380214, "grad_norm": 0.13397662237486613, "learning_rate": 1.5103370519760963e-07, "loss": 0.6814, "step": 4490 }, { "epoch": 0.961387171871237, "grad_norm": 0.13626932420697732, "learning_rate": 1.4937150938495682e-07, "loss": 0.6974, "step": 4491 }, { "epoch": 0.9616012416044527, "grad_norm": 0.13348708982316596, "learning_rate": 1.4771847658608063e-07, "loss": 0.6756, "step": 4492 }, { "epoch": 0.9618153113376683, "grad_norm": 0.13474337487021495, "learning_rate": 1.460746075640107e-07, "loss": 0.6977, "step": 4493 }, { "epoch": 0.9620293810708839, "grad_norm": 0.13275729646111029, "learning_rate": 1.4443990307755784e-07, "loss": 0.6781, "step": 4494 }, { "epoch": 0.9622434508040995, "grad_norm": 0.1354756638284012, "learning_rate": 1.4281436388130066e-07, "loss": 0.6998, "step": 4495 }, { "epoch": 0.962457520537315, "grad_norm": 0.1374455786312147, "learning_rate": 1.4119799072558339e-07, "loss": 0.7162, "step": 4496 }, { "epoch": 0.9626715902705306, "grad_norm": 0.13387082697697542, "learning_rate": 1.395907843565203e-07, "loss": 0.6874, "step": 4497 }, { "epoch": 0.9628856600037462, "grad_norm": 0.13300187771492367, "learning_rate": 1.379927455159935e-07, "loss": 0.6898, "step": 4498 }, { "epoch": 0.9630997297369618, "grad_norm": 0.13649271462598345, "learning_rate": 1.364038749416574e-07, "loss": 0.6862, "step": 4499 }, { "epoch": 0.9633137994701774, "grad_norm": 0.13130800543235865, "learning_rate": 1.3482417336693198e-07, "loss": 0.6665, "step": 4500 }, { "epoch": 0.963527869203393, "grad_norm": 0.1353247385654163, "learning_rate": 1.3325364152100063e-07, "loss": 0.6953, "step": 4501 }, { "epoch": 0.9637419389366086, "grad_norm": 0.13627686124618352, "learning_rate": 1.316922801288234e-07, "loss": 0.712, "step": 4502 }, { "epoch": 0.9639560086698242, "grad_norm": 0.13842766847637222, "learning_rate": 1.3014008991111936e-07, "loss": 0.6979, "step": 4503 }, { "epoch": 0.9641700784030398, "grad_norm": 0.13539032497831988, "learning_rate": 1.285970715843754e-07, "loss": 0.6989, "step": 4504 }, { "epoch": 0.9643841481362554, "grad_norm": 0.13114386394446242, "learning_rate": 1.270632258608484e-07, "loss": 0.6835, "step": 4505 }, { "epoch": 0.964598217869471, "grad_norm": 0.1347767192348643, "learning_rate": 1.2553855344855648e-07, "loss": 0.6578, "step": 4506 }, { "epoch": 0.9648122876026866, "grad_norm": 0.17564488194509872, "learning_rate": 1.2402305505128553e-07, "loss": 0.6902, "step": 4507 }, { "epoch": 0.9650263573359021, "grad_norm": 0.135273284231026, "learning_rate": 1.2251673136858931e-07, "loss": 0.6883, "step": 4508 }, { "epoch": 0.9652404270691177, "grad_norm": 0.1377201593590194, "learning_rate": 1.2101958309578275e-07, "loss": 0.6963, "step": 4509 }, { "epoch": 0.9654544968023333, "grad_norm": 0.13284051953668552, "learning_rate": 1.1953161092394637e-07, "loss": 0.6621, "step": 4510 }, { "epoch": 0.965668566535549, "grad_norm": 0.1335395751103413, "learning_rate": 1.1805281553992631e-07, "loss": 0.7218, "step": 4511 }, { "epoch": 0.9658826362687646, "grad_norm": 0.1326742311677986, "learning_rate": 1.1658319762633207e-07, "loss": 0.6955, "step": 4512 }, { "epoch": 0.9660967060019802, "grad_norm": 0.1333845974289204, "learning_rate": 1.1512275786153437e-07, "loss": 0.6829, "step": 4513 }, { "epoch": 0.9663107757351957, "grad_norm": 0.13277402275570413, "learning_rate": 1.136714969196695e-07, "loss": 0.6828, "step": 4514 }, { "epoch": 0.9665248454684113, "grad_norm": 0.13891227491218763, "learning_rate": 1.1222941547064159e-07, "loss": 0.6815, "step": 4515 }, { "epoch": 0.9667389152016269, "grad_norm": 0.1325049108312949, "learning_rate": 1.1079651418010706e-07, "loss": 0.6569, "step": 4516 }, { "epoch": 0.9669529849348425, "grad_norm": 0.1311403374148213, "learning_rate": 1.0937279370949461e-07, "loss": 0.6904, "step": 4517 }, { "epoch": 0.9671670546680581, "grad_norm": 0.13610484449605284, "learning_rate": 1.0795825471598742e-07, "loss": 0.6825, "step": 4518 }, { "epoch": 0.9673811244012737, "grad_norm": 0.1357642611537287, "learning_rate": 1.0655289785253875e-07, "loss": 0.6813, "step": 4519 }, { "epoch": 0.9675951941344894, "grad_norm": 0.13416760158515398, "learning_rate": 1.0515672376785413e-07, "loss": 0.6915, "step": 4520 }, { "epoch": 0.9678092638677049, "grad_norm": 0.13157043480676256, "learning_rate": 1.0376973310640692e-07, "loss": 0.6847, "step": 4521 }, { "epoch": 0.9680233336009205, "grad_norm": 0.1312532642516754, "learning_rate": 1.0239192650842944e-07, "loss": 0.6819, "step": 4522 }, { "epoch": 0.9682374033341361, "grad_norm": 0.17392334900415451, "learning_rate": 1.0102330460991516e-07, "loss": 0.7287, "step": 4523 }, { "epoch": 0.9684514730673517, "grad_norm": 0.13614588847997638, "learning_rate": 9.966386804261651e-08, "loss": 0.6857, "step": 4524 }, { "epoch": 0.9686655428005673, "grad_norm": 0.13761547708688895, "learning_rate": 9.831361743404711e-08, "loss": 0.6998, "step": 4525 }, { "epoch": 0.9688796125337829, "grad_norm": 0.13365846991376126, "learning_rate": 9.697255340748169e-08, "loss": 0.6638, "step": 4526 }, { "epoch": 0.9690936822669984, "grad_norm": 0.13092060620285198, "learning_rate": 9.564067658195175e-08, "loss": 0.6685, "step": 4527 }, { "epoch": 0.969307752000214, "grad_norm": 0.133799858697993, "learning_rate": 9.431798757224775e-08, "loss": 0.6734, "step": 4528 }, { "epoch": 0.9695218217334297, "grad_norm": 0.13258756362529836, "learning_rate": 9.300448698892128e-08, "loss": 0.7031, "step": 4529 }, { "epoch": 0.9697358914666453, "grad_norm": 0.13506452719613907, "learning_rate": 9.170017543828291e-08, "loss": 0.6823, "step": 4530 }, { "epoch": 0.9699499611998609, "grad_norm": 0.12968976156955592, "learning_rate": 9.040505352240215e-08, "loss": 0.6692, "step": 4531 }, { "epoch": 0.9701640309330765, "grad_norm": 0.1393291618833024, "learning_rate": 8.911912183910077e-08, "loss": 0.7383, "step": 4532 }, { "epoch": 0.970378100666292, "grad_norm": 0.13532171209441804, "learning_rate": 8.784238098196396e-08, "loss": 0.6859, "step": 4533 }, { "epoch": 0.9705921703995076, "grad_norm": 0.13127295571392864, "learning_rate": 8.657483154033586e-08, "loss": 0.6821, "step": 4534 }, { "epoch": 0.9708062401327232, "grad_norm": 0.13111413121684962, "learning_rate": 8.531647409931065e-08, "loss": 0.6674, "step": 4535 }, { "epoch": 0.9710203098659388, "grad_norm": 0.13673708125171508, "learning_rate": 8.406730923974593e-08, "loss": 0.689, "step": 4536 }, { "epoch": 0.9712343795991544, "grad_norm": 0.1346365686237501, "learning_rate": 8.282733753825378e-08, "loss": 0.7005, "step": 4537 }, { "epoch": 0.9714484493323701, "grad_norm": 0.13564020677220007, "learning_rate": 8.159655956720303e-08, "loss": 0.6937, "step": 4538 }, { "epoch": 0.9716625190655857, "grad_norm": 0.13294099028119316, "learning_rate": 8.037497589471699e-08, "loss": 0.6826, "step": 4539 }, { "epoch": 0.9718765887988012, "grad_norm": 0.13263632674633796, "learning_rate": 7.916258708468016e-08, "loss": 0.6925, "step": 4540 }, { "epoch": 0.9720906585320168, "grad_norm": 0.3907402055413767, "learning_rate": 7.79593936967249e-08, "loss": 0.6641, "step": 4541 }, { "epoch": 0.9723047282652324, "grad_norm": 0.1345438436171536, "learning_rate": 7.676539628624469e-08, "loss": 0.6759, "step": 4542 }, { "epoch": 0.972518797998448, "grad_norm": 0.13517071987349472, "learning_rate": 7.558059540438755e-08, "loss": 0.7079, "step": 4543 }, { "epoch": 0.9727328677316636, "grad_norm": 0.12940672250537424, "learning_rate": 7.440499159805381e-08, "loss": 0.6713, "step": 4544 }, { "epoch": 0.9729469374648791, "grad_norm": 0.13381568999577242, "learning_rate": 7.323858540990047e-08, "loss": 0.6828, "step": 4545 }, { "epoch": 0.9731610071980947, "grad_norm": 0.13679608309033056, "learning_rate": 7.208137737833908e-08, "loss": 0.701, "step": 4546 }, { "epoch": 0.9733750769313104, "grad_norm": 0.1354627134399797, "learning_rate": 7.093336803753347e-08, "loss": 0.6907, "step": 4547 }, { "epoch": 0.973589146664526, "grad_norm": 0.22164076577938704, "learning_rate": 6.979455791740641e-08, "loss": 0.6972, "step": 4548 }, { "epoch": 0.9738032163977416, "grad_norm": 0.13698734603919305, "learning_rate": 6.86649475436263e-08, "loss": 0.7268, "step": 4549 }, { "epoch": 0.9740172861309572, "grad_norm": 0.1358707421234845, "learning_rate": 6.754453743761824e-08, "loss": 0.682, "step": 4550 }, { "epoch": 0.9742313558641728, "grad_norm": 0.13640199519497662, "learning_rate": 6.643332811656633e-08, "loss": 0.7169, "step": 4551 }, { "epoch": 0.9744454255973883, "grad_norm": 0.13291791306820896, "learning_rate": 6.533132009340026e-08, "loss": 0.6747, "step": 4552 }, { "epoch": 0.9746594953306039, "grad_norm": 0.13214013184003487, "learning_rate": 6.423851387680424e-08, "loss": 0.6771, "step": 4553 }, { "epoch": 0.9748735650638195, "grad_norm": 0.1382466117805832, "learning_rate": 6.315490997121698e-08, "loss": 0.7015, "step": 4554 }, { "epoch": 0.9750876347970351, "grad_norm": 0.13628649993624967, "learning_rate": 6.208050887682727e-08, "loss": 0.718, "step": 4555 }, { "epoch": 0.9753017045302508, "grad_norm": 0.13249796108740908, "learning_rate": 6.101531108957614e-08, "loss": 0.686, "step": 4556 }, { "epoch": 0.9755157742634664, "grad_norm": 0.1342226678938587, "learning_rate": 5.995931710115921e-08, "loss": 0.6844, "step": 4557 }, { "epoch": 0.975729843996682, "grad_norm": 0.13273625153984353, "learning_rate": 5.891252739901765e-08, "loss": 0.6894, "step": 4558 }, { "epoch": 0.9759439137298975, "grad_norm": 0.13228113597366548, "learning_rate": 5.787494246635161e-08, "loss": 0.7049, "step": 4559 }, { "epoch": 0.9761579834631131, "grad_norm": 0.13027522400683192, "learning_rate": 5.684656278210687e-08, "loss": 0.6666, "step": 4560 }, { "epoch": 0.9763720531963287, "grad_norm": 0.16405356281879138, "learning_rate": 5.5827388820979265e-08, "loss": 0.6831, "step": 4561 }, { "epoch": 0.9765861229295443, "grad_norm": 0.12884738108177957, "learning_rate": 5.481742105342136e-08, "loss": 0.6602, "step": 4562 }, { "epoch": 0.9768001926627599, "grad_norm": 0.13399673236269471, "learning_rate": 5.3816659945631346e-08, "loss": 0.6836, "step": 4563 }, { "epoch": 0.9770142623959754, "grad_norm": 0.1356504234992234, "learning_rate": 5.282510595955748e-08, "loss": 0.6942, "step": 4564 }, { "epoch": 0.9772283321291911, "grad_norm": 0.13302492348170722, "learning_rate": 5.18427595529003e-08, "loss": 0.6698, "step": 4565 }, { "epoch": 0.9774424018624067, "grad_norm": 0.12824289024922397, "learning_rate": 5.086962117910821e-08, "loss": 0.6618, "step": 4566 }, { "epoch": 0.9776564715956223, "grad_norm": 0.1362923366364546, "learning_rate": 4.990569128737965e-08, "loss": 0.6799, "step": 4567 }, { "epoch": 0.9778705413288379, "grad_norm": 0.13261851006172748, "learning_rate": 4.895097032266538e-08, "loss": 0.6854, "step": 4568 }, { "epoch": 0.9780846110620535, "grad_norm": 0.13578132918616978, "learning_rate": 4.800545872566176e-08, "loss": 0.6907, "step": 4569 }, { "epoch": 0.978298680795269, "grad_norm": 0.13198275226992776, "learning_rate": 4.7069156932813e-08, "loss": 0.6945, "step": 4570 }, { "epoch": 0.9785127505284846, "grad_norm": 0.13245423791551902, "learning_rate": 4.614206537631783e-08, "loss": 0.6792, "step": 4571 }, { "epoch": 0.9787268202617002, "grad_norm": 0.1342756783960331, "learning_rate": 4.522418448411614e-08, "loss": 0.6971, "step": 4572 }, { "epoch": 0.9789408899949158, "grad_norm": 0.13353541275258607, "learning_rate": 4.431551467990458e-08, "loss": 0.6806, "step": 4573 }, { "epoch": 0.9791549597281315, "grad_norm": 0.1368572601063207, "learning_rate": 4.3416056383120964e-08, "loss": 0.7087, "step": 4574 }, { "epoch": 0.9793690294613471, "grad_norm": 0.1348599817122092, "learning_rate": 4.252581000895095e-08, "loss": 0.672, "step": 4575 }, { "epoch": 0.9795830991945627, "grad_norm": 0.1345546877940355, "learning_rate": 4.164477596833694e-08, "loss": 0.7196, "step": 4576 }, { "epoch": 0.9797971689277782, "grad_norm": 0.13187991954492437, "learning_rate": 4.0772954667958055e-08, "loss": 0.6767, "step": 4577 }, { "epoch": 0.9800112386609938, "grad_norm": 0.13743497793794174, "learning_rate": 3.991034651024572e-08, "loss": 0.7028, "step": 4578 }, { "epoch": 0.9802253083942094, "grad_norm": 0.13462939869563612, "learning_rate": 3.905695189337921e-08, "loss": 0.6947, "step": 4579 }, { "epoch": 0.980439378127425, "grad_norm": 0.13259974964785384, "learning_rate": 3.821277121128342e-08, "loss": 0.6691, "step": 4580 }, { "epoch": 0.9806534478606406, "grad_norm": 0.13137142332961674, "learning_rate": 3.737780485363107e-08, "loss": 0.6888, "step": 4581 }, { "epoch": 0.9808675175938562, "grad_norm": 0.1324316788555167, "learning_rate": 3.6552053205842766e-08, "loss": 0.691, "step": 4582 }, { "epoch": 0.9810815873270718, "grad_norm": 0.13704081128650358, "learning_rate": 3.5735516649080257e-08, "loss": 0.7123, "step": 4583 }, { "epoch": 0.9812956570602874, "grad_norm": 0.13262662939039688, "learning_rate": 3.4928195560257614e-08, "loss": 0.6789, "step": 4584 }, { "epoch": 0.981509726793503, "grad_norm": 0.13835703252968434, "learning_rate": 3.413009031203229e-08, "loss": 0.701, "step": 4585 }, { "epoch": 0.9817237965267186, "grad_norm": 0.13324173506990608, "learning_rate": 3.334120127280738e-08, "loss": 0.6722, "step": 4586 }, { "epoch": 0.9819378662599342, "grad_norm": 0.13913532729510986, "learning_rate": 3.256152880673602e-08, "loss": 0.7032, "step": 4587 }, { "epoch": 0.9821519359931498, "grad_norm": 0.13414261865764993, "learning_rate": 3.179107327370812e-08, "loss": 0.6785, "step": 4588 }, { "epoch": 0.9823660057263653, "grad_norm": 0.13619968507042535, "learning_rate": 3.102983502937029e-08, "loss": 0.6929, "step": 4589 }, { "epoch": 0.9825800754595809, "grad_norm": 0.1333380174811774, "learning_rate": 3.027781442510369e-08, "loss": 0.6589, "step": 4590 }, { "epoch": 0.9827941451927965, "grad_norm": 0.13474858725447048, "learning_rate": 2.9535011808043967e-08, "loss": 0.6876, "step": 4591 }, { "epoch": 0.9830082149260122, "grad_norm": 0.12802733758990661, "learning_rate": 2.880142752106574e-08, "loss": 0.6623, "step": 4592 }, { "epoch": 0.9832222846592278, "grad_norm": 0.1326494823025216, "learning_rate": 2.8077061902787028e-08, "loss": 0.6812, "step": 4593 }, { "epoch": 0.9834363543924434, "grad_norm": 0.1305521513669887, "learning_rate": 2.7361915287578144e-08, "loss": 0.6596, "step": 4594 }, { "epoch": 0.983650424125659, "grad_norm": 0.13359993975925222, "learning_rate": 2.665598800554836e-08, "loss": 0.6826, "step": 4595 }, { "epoch": 0.9838644938588745, "grad_norm": 0.13421851061639803, "learning_rate": 2.5959280382550355e-08, "loss": 0.6894, "step": 4596 }, { "epoch": 0.9840785635920901, "grad_norm": 0.13241344494829288, "learning_rate": 2.5271792740186874e-08, "loss": 0.6654, "step": 4597 }, { "epoch": 0.9842926333253057, "grad_norm": 0.13343662178139085, "learning_rate": 2.4593525395797402e-08, "loss": 0.6888, "step": 4598 }, { "epoch": 0.9845067030585213, "grad_norm": 0.14384676987663056, "learning_rate": 2.3924478662469275e-08, "loss": 0.7035, "step": 4599 }, { "epoch": 0.9847207727917369, "grad_norm": 0.13538969155135305, "learning_rate": 2.326465284903545e-08, "loss": 0.6904, "step": 4600 }, { "epoch": 0.9849348425249526, "grad_norm": 0.13219703601808955, "learning_rate": 2.2614048260067856e-08, "loss": 0.6757, "step": 4601 }, { "epoch": 0.9851489122581681, "grad_norm": 0.1336655266590498, "learning_rate": 2.1972665195886256e-08, "loss": 0.6883, "step": 4602 }, { "epoch": 0.9853629819913837, "grad_norm": 0.13353985092154214, "learning_rate": 2.1340503952551606e-08, "loss": 0.69, "step": 4603 }, { "epoch": 0.9855770517245993, "grad_norm": 0.13625638470157295, "learning_rate": 2.0717564821868264e-08, "loss": 0.7155, "step": 4604 }, { "epoch": 0.9857911214578149, "grad_norm": 0.1369602486021807, "learning_rate": 2.0103848091381773e-08, "loss": 0.6773, "step": 4605 }, { "epoch": 0.9860051911910305, "grad_norm": 0.13519674482160918, "learning_rate": 1.949935404438552e-08, "loss": 0.6888, "step": 4606 }, { "epoch": 0.986219260924246, "grad_norm": 0.13288621786661212, "learning_rate": 1.890408295990964e-08, "loss": 0.6626, "step": 4607 }, { "epoch": 0.9864333306574616, "grad_norm": 0.13460149976880195, "learning_rate": 1.8318035112734335e-08, "loss": 0.6882, "step": 4608 }, { "epoch": 0.9866474003906772, "grad_norm": 0.13397113179129877, "learning_rate": 1.7741210773376538e-08, "loss": 0.6898, "step": 4609 }, { "epoch": 0.9868614701238928, "grad_norm": 0.13250612058289976, "learning_rate": 1.7173610208096603e-08, "loss": 0.656, "step": 4610 }, { "epoch": 0.9870755398571085, "grad_norm": 0.1330792982515392, "learning_rate": 1.661523367889606e-08, "loss": 0.6772, "step": 4611 }, { "epoch": 0.9872896095903241, "grad_norm": 0.1367407065381511, "learning_rate": 1.6066081443524284e-08, "loss": 0.7211, "step": 4612 }, { "epoch": 0.9875036793235397, "grad_norm": 0.12994364033838343, "learning_rate": 1.55261537554674e-08, "loss": 0.6624, "step": 4613 }, { "epoch": 0.9877177490567552, "grad_norm": 0.1386557268862067, "learning_rate": 1.499545086395493e-08, "loss": 0.6876, "step": 4614 }, { "epoch": 0.9879318187899708, "grad_norm": 0.14067556190485303, "learning_rate": 1.4473973013957587e-08, "loss": 0.6934, "step": 4615 }, { "epoch": 0.9881458885231864, "grad_norm": 0.13844331925045963, "learning_rate": 1.3961720446191707e-08, "loss": 0.7093, "step": 4616 }, { "epoch": 0.988359958256402, "grad_norm": 0.13448001465852008, "learning_rate": 1.3458693397105926e-08, "loss": 0.6757, "step": 4617 }, { "epoch": 0.9885740279896176, "grad_norm": 0.16708772475882527, "learning_rate": 1.2964892098903393e-08, "loss": 0.6857, "step": 4618 }, { "epoch": 0.9887880977228332, "grad_norm": 0.3096230916909998, "learning_rate": 1.2480316779517332e-08, "loss": 0.7003, "step": 4619 }, { "epoch": 0.9890021674560489, "grad_norm": 0.1353265563541458, "learning_rate": 1.2004967662628819e-08, "loss": 0.6971, "step": 4620 }, { "epoch": 0.9892162371892644, "grad_norm": 0.13128722461174966, "learning_rate": 1.1538844967660112e-08, "loss": 0.6693, "step": 4621 }, { "epoch": 0.98943030692248, "grad_norm": 0.13025951588726517, "learning_rate": 1.1081948909767992e-08, "loss": 0.6588, "step": 4622 }, { "epoch": 0.9896443766556956, "grad_norm": 0.13357595007751152, "learning_rate": 1.0634279699857086e-08, "loss": 0.6686, "step": 4623 }, { "epoch": 0.9898584463889112, "grad_norm": 0.13467638747773059, "learning_rate": 1.0195837544570986e-08, "loss": 0.6897, "step": 4624 }, { "epoch": 0.9900725161221268, "grad_norm": 0.1318685880619982, "learning_rate": 9.766622646292246e-09, "loss": 0.6883, "step": 4625 }, { "epoch": 0.9902865858553423, "grad_norm": 0.13587796907019287, "learning_rate": 9.346635203149046e-09, "loss": 0.6952, "step": 4626 }, { "epoch": 0.9905006555885579, "grad_norm": 0.13822458130193174, "learning_rate": 8.93587540900409e-09, "loss": 0.7011, "step": 4627 }, { "epoch": 0.9907147253217735, "grad_norm": 0.13549919290199122, "learning_rate": 8.53434345346349e-09, "loss": 0.6856, "step": 4628 }, { "epoch": 0.9909287950549892, "grad_norm": 0.13443503225334064, "learning_rate": 8.142039521874534e-09, "loss": 0.7048, "step": 4629 }, { "epoch": 0.9911428647882048, "grad_norm": 0.13682513693725026, "learning_rate": 7.758963795321262e-09, "loss": 0.7063, "step": 4630 }, { "epoch": 0.9913569345214204, "grad_norm": 0.1337141407342257, "learning_rate": 7.385116450635555e-09, "loss": 0.6911, "step": 4631 }, { "epoch": 0.991571004254636, "grad_norm": 0.13226056373114575, "learning_rate": 7.020497660381598e-09, "loss": 0.6892, "step": 4632 }, { "epoch": 0.9917850739878515, "grad_norm": 0.1332366376295061, "learning_rate": 6.665107592866982e-09, "loss": 0.6951, "step": 4633 }, { "epoch": 0.9919991437210671, "grad_norm": 0.13468292491388104, "learning_rate": 6.318946412140481e-09, "loss": 0.7092, "step": 4634 }, { "epoch": 0.9922132134542827, "grad_norm": 0.1334168070312017, "learning_rate": 5.982014277987614e-09, "loss": 0.6828, "step": 4635 }, { "epoch": 0.9924272831874983, "grad_norm": 0.1332326702101735, "learning_rate": 5.654311345937302e-09, "loss": 0.697, "step": 4636 }, { "epoch": 0.9926413529207139, "grad_norm": 0.13201991385115935, "learning_rate": 5.335837767255214e-09, "loss": 0.6745, "step": 4637 }, { "epoch": 0.9928554226539296, "grad_norm": 0.13479723742553412, "learning_rate": 5.0265936889482e-09, "loss": 0.7088, "step": 4638 }, { "epoch": 0.9930694923871451, "grad_norm": 0.138370397655778, "learning_rate": 4.726579253764296e-09, "loss": 0.6977, "step": 4639 }, { "epoch": 0.9932835621203607, "grad_norm": 0.13221492488593217, "learning_rate": 4.435794600188281e-09, "loss": 0.6939, "step": 4640 }, { "epoch": 0.9934976318535763, "grad_norm": 0.13330176726420126, "learning_rate": 4.154239862446119e-09, "loss": 0.6735, "step": 4641 }, { "epoch": 0.9937117015867919, "grad_norm": 0.13338245456308295, "learning_rate": 3.881915170502737e-09, "loss": 0.676, "step": 4642 }, { "epoch": 0.9939257713200075, "grad_norm": 0.13408047808814588, "learning_rate": 3.6188206500620273e-09, "loss": 0.6977, "step": 4643 }, { "epoch": 0.9941398410532231, "grad_norm": 0.13091899343953667, "learning_rate": 3.3649564225690655e-09, "loss": 0.6772, "step": 4644 }, { "epoch": 0.9943539107864386, "grad_norm": 0.13246650687304787, "learning_rate": 3.1203226052078926e-09, "loss": 0.6993, "step": 4645 }, { "epoch": 0.9945679805196542, "grad_norm": 0.13530972931274127, "learning_rate": 2.8849193109015127e-09, "loss": 0.7144, "step": 4646 }, { "epoch": 0.9947820502528699, "grad_norm": 0.13605103683147698, "learning_rate": 2.658746648307453e-09, "loss": 0.7093, "step": 4647 }, { "epoch": 0.9949961199860855, "grad_norm": 0.13233322258862, "learning_rate": 2.441804721831087e-09, "loss": 0.6689, "step": 4648 }, { "epoch": 0.9952101897193011, "grad_norm": 0.13295750468869433, "learning_rate": 2.2340936316100904e-09, "loss": 0.6726, "step": 4649 }, { "epoch": 0.9954242594525167, "grad_norm": 0.13218341043847168, "learning_rate": 2.0356134735233234e-09, "loss": 0.6944, "step": 4650 }, { "epoch": 0.9956383291857323, "grad_norm": 0.6752122162301123, "learning_rate": 1.8463643391908314e-09, "loss": 0.7099, "step": 4651 }, { "epoch": 0.9958523989189478, "grad_norm": 0.13282059775678576, "learning_rate": 1.6663463159671821e-09, "loss": 0.678, "step": 4652 }, { "epoch": 0.9960664686521634, "grad_norm": 0.13069347851367272, "learning_rate": 1.4955594869525692e-09, "loss": 0.671, "step": 4653 }, { "epoch": 0.996280538385379, "grad_norm": 0.13686805337972868, "learning_rate": 1.3340039309750475e-09, "loss": 0.6952, "step": 4654 }, { "epoch": 0.9964946081185946, "grad_norm": 0.13554420337436102, "learning_rate": 1.181679722614959e-09, "loss": 0.7047, "step": 4655 }, { "epoch": 0.9967086778518103, "grad_norm": 0.13092668613792285, "learning_rate": 1.038586932182728e-09, "loss": 0.6706, "step": 4656 }, { "epoch": 0.9969227475850259, "grad_norm": 0.13492033405363069, "learning_rate": 9.047256257277426e-10, "loss": 0.6837, "step": 4657 }, { "epoch": 0.9971368173182414, "grad_norm": 0.1359172406873756, "learning_rate": 7.800958650405754e-10, "loss": 0.7018, "step": 4658 }, { "epoch": 0.997350887051457, "grad_norm": 0.13351807830800608, "learning_rate": 6.646977076529837e-10, "loss": 0.6969, "step": 4659 }, { "epoch": 0.9975649567846726, "grad_norm": 0.12618875789265696, "learning_rate": 5.585312068312476e-10, "loss": 0.6465, "step": 4660 }, { "epoch": 0.9977790265178882, "grad_norm": 0.13713215611649313, "learning_rate": 4.6159641157839107e-10, "loss": 0.7069, "step": 4661 }, { "epoch": 0.9979930962511038, "grad_norm": 0.14591205614842845, "learning_rate": 3.738933666430633e-10, "loss": 0.6816, "step": 4662 }, { "epoch": 0.9982071659843194, "grad_norm": 0.13525007504585135, "learning_rate": 2.954221125084367e-10, "loss": 0.6965, "step": 4663 }, { "epoch": 0.9984212357175349, "grad_norm": 0.13344615803605883, "learning_rate": 2.2618268539664756e-10, "loss": 0.6604, "step": 4664 }, { "epoch": 0.9986353054507506, "grad_norm": 0.13354118073820614, "learning_rate": 1.6617511726657597e-10, "loss": 0.6893, "step": 4665 }, { "epoch": 0.9988493751839662, "grad_norm": 0.13045906658111772, "learning_rate": 1.1539943582050683e-10, "loss": 0.6664, "step": 4666 }, { "epoch": 0.9990634449171818, "grad_norm": 0.13023994655516052, "learning_rate": 7.385566449302773e-11, "loss": 0.6659, "step": 4667 }, { "epoch": 0.9992775146503974, "grad_norm": 0.12962065845272616, "learning_rate": 4.154382246435162e-11, "loss": 0.6698, "step": 4668 }, { "epoch": 0.999491584383613, "grad_norm": 0.16239162310804545, "learning_rate": 1.8463924646994202e-11, "loss": 0.6786, "step": 4669 }, { "epoch": 0.9997056541168285, "grad_norm": 0.1344152267995376, "learning_rate": 4.615981694655603e-12, "loss": 0.6913, "step": 4670 }, { "epoch": 0.9999197238500441, "grad_norm": 0.13351560734418214, "learning_rate": 0.0, "loss": 0.6768, "step": 4671 }, { "epoch": 0.9999197238500441, "step": 4671, "total_flos": 9004996003627008.0, "train_loss": 0.728125217524414, "train_runtime": 85870.4917, "train_samples_per_second": 24.371, "train_steps_per_second": 0.054 } ], "logging_steps": 1, "max_steps": 4671, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9004996003627008.0, "train_batch_size": 7, "trial_name": null, "trial_params": null }