diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.8726673319408363, + "epoch": 1.010031634190783, "eval_steps": 500, - "global_step": 432000, + "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -302407,6 +302407,47614 @@ "learning_rate": 5.529181335435124e-07, "loss": 15.4848, "step": 432000 + }, + { + "epoch": 0.8726875325735202, + "grad_norm": 192.4467010498047, + "learning_rate": 5.527585871042867e-07, + "loss": 26.0368, + "step": 432010 + }, + { + "epoch": 0.872707733206204, + "grad_norm": 6.8492255210876465, + "learning_rate": 5.525990623403765e-07, + "loss": 21.4868, + "step": 432020 + }, + { + "epoch": 0.8727279338388878, + "grad_norm": 593.39404296875, + "learning_rate": 5.524395592525584e-07, + "loss": 12.9314, + "step": 432030 + }, + { + "epoch": 0.8727481344715716, + "grad_norm": 155.49880981445312, + "learning_rate": 5.522800778416099e-07, + "loss": 34.3999, + "step": 432040 + }, + { + "epoch": 0.8727683351042554, + "grad_norm": 338.1510314941406, + "learning_rate": 5.521206181083111e-07, + "loss": 22.7508, + "step": 432050 + }, + { + "epoch": 0.8727885357369393, + "grad_norm": 108.6231689453125, + "learning_rate": 5.519611800534347e-07, + "loss": 3.8747, + "step": 432060 + }, + { + "epoch": 0.8728087363696231, + "grad_norm": 209.6058807373047, + "learning_rate": 5.518017636777606e-07, + "loss": 12.9019, + "step": 432070 + }, + { + "epoch": 0.8728289370023069, + "grad_norm": 258.5714416503906, + "learning_rate": 5.516423689820655e-07, + "loss": 21.6524, + "step": 432080 + }, + { + "epoch": 0.8728491376349907, + "grad_norm": 304.92889404296875, + "learning_rate": 5.514829959671264e-07, + "loss": 14.3826, + "step": 432090 + }, + { + "epoch": 0.8728693382676745, + "grad_norm": 279.6145935058594, + "learning_rate": 5.51323644633719e-07, + "loss": 6.3146, + "step": 432100 + }, + { + "epoch": 0.8728895389003584, + "grad_norm": 224.16091918945312, + "learning_rate": 5.511643149826206e-07, + "loss": 25.1425, + "step": 432110 + }, + { + "epoch": 0.8729097395330422, + "grad_norm": 270.8456115722656, + "learning_rate": 5.510050070146083e-07, + "loss": 14.7547, + "step": 432120 + }, + { + "epoch": 0.872929940165726, + "grad_norm": 384.7947998046875, + "learning_rate": 5.508457207304574e-07, + "loss": 22.2386, + "step": 432130 + }, + { + "epoch": 0.8729501407984098, + "grad_norm": 289.92950439453125, + "learning_rate": 5.506864561309455e-07, + "loss": 12.8796, + "step": 432140 + }, + { + "epoch": 0.8729703414310936, + "grad_norm": 148.26498413085938, + "learning_rate": 5.505272132168471e-07, + "loss": 12.9029, + "step": 432150 + }, + { + "epoch": 0.8729905420637775, + "grad_norm": 294.7095642089844, + "learning_rate": 5.503679919889404e-07, + "loss": 17.0287, + "step": 432160 + }, + { + "epoch": 0.8730107426964613, + "grad_norm": 165.29808044433594, + "learning_rate": 5.502087924480005e-07, + "loss": 24.1768, + "step": 432170 + }, + { + "epoch": 0.8730309433291451, + "grad_norm": 197.2328338623047, + "learning_rate": 5.50049614594802e-07, + "loss": 15.0723, + "step": 432180 + }, + { + "epoch": 0.8730511439618289, + "grad_norm": 257.3417053222656, + "learning_rate": 5.498904584301235e-07, + "loss": 21.5337, + "step": 432190 + }, + { + "epoch": 0.8730713445945127, + "grad_norm": 197.20399475097656, + "learning_rate": 5.497313239547374e-07, + "loss": 14.8936, + "step": 432200 + }, + { + "epoch": 0.8730915452271966, + "grad_norm": 221.76002502441406, + "learning_rate": 5.49572211169423e-07, + "loss": 23.2643, + "step": 432210 + }, + { + "epoch": 0.8731117458598804, + "grad_norm": 269.3241271972656, + "learning_rate": 5.49413120074952e-07, + "loss": 20.478, + "step": 432220 + }, + { + "epoch": 0.8731319464925641, + "grad_norm": 567.644775390625, + "learning_rate": 5.492540506721033e-07, + "loss": 16.5414, + "step": 432230 + }, + { + "epoch": 0.8731521471252479, + "grad_norm": 0.8764551281929016, + "learning_rate": 5.490950029616504e-07, + "loss": 19.1325, + "step": 432240 + }, + { + "epoch": 0.8731723477579317, + "grad_norm": 431.1417236328125, + "learning_rate": 5.489359769443675e-07, + "loss": 21.3286, + "step": 432250 + }, + { + "epoch": 0.8731925483906156, + "grad_norm": 611.9507446289062, + "learning_rate": 5.487769726210318e-07, + "loss": 17.4648, + "step": 432260 + }, + { + "epoch": 0.8732127490232994, + "grad_norm": 367.40814208984375, + "learning_rate": 5.486179899924171e-07, + "loss": 11.3435, + "step": 432270 + }, + { + "epoch": 0.8732329496559832, + "grad_norm": 85.75486755371094, + "learning_rate": 5.484590290592979e-07, + "loss": 18.5802, + "step": 432280 + }, + { + "epoch": 0.873253150288667, + "grad_norm": 125.14513397216797, + "learning_rate": 5.483000898224494e-07, + "loss": 11.8362, + "step": 432290 + }, + { + "epoch": 0.8732733509213508, + "grad_norm": 279.9031677246094, + "learning_rate": 5.48141172282648e-07, + "loss": 58.6816, + "step": 432300 + }, + { + "epoch": 0.8732935515540347, + "grad_norm": 318.6170959472656, + "learning_rate": 5.479822764406645e-07, + "loss": 22.523, + "step": 432310 + }, + { + "epoch": 0.8733137521867185, + "grad_norm": 291.5913391113281, + "learning_rate": 5.478234022972756e-07, + "loss": 12.7422, + "step": 432320 + }, + { + "epoch": 0.8733339528194023, + "grad_norm": 260.2528076171875, + "learning_rate": 5.476645498532567e-07, + "loss": 11.3449, + "step": 432330 + }, + { + "epoch": 0.8733541534520861, + "grad_norm": 408.50860595703125, + "learning_rate": 5.475057191093808e-07, + "loss": 16.1845, + "step": 432340 + }, + { + "epoch": 0.8733743540847699, + "grad_norm": 132.4649658203125, + "learning_rate": 5.473469100664208e-07, + "loss": 18.9693, + "step": 432350 + }, + { + "epoch": 0.8733945547174538, + "grad_norm": 383.5670471191406, + "learning_rate": 5.471881227251518e-07, + "loss": 13.5977, + "step": 432360 + }, + { + "epoch": 0.8734147553501376, + "grad_norm": 350.06976318359375, + "learning_rate": 5.470293570863499e-07, + "loss": 20.193, + "step": 432370 + }, + { + "epoch": 0.8734349559828214, + "grad_norm": 140.1104278564453, + "learning_rate": 5.46870613150785e-07, + "loss": 3.5753, + "step": 432380 + }, + { + "epoch": 0.8734551566155052, + "grad_norm": 290.8729248046875, + "learning_rate": 5.467118909192326e-07, + "loss": 23.7194, + "step": 432390 + }, + { + "epoch": 0.873475357248189, + "grad_norm": 366.9217529296875, + "learning_rate": 5.46553190392467e-07, + "loss": 17.7457, + "step": 432400 + }, + { + "epoch": 0.8734955578808729, + "grad_norm": 294.6042175292969, + "learning_rate": 5.46394511571261e-07, + "loss": 10.6936, + "step": 432410 + }, + { + "epoch": 0.8735157585135567, + "grad_norm": 139.17315673828125, + "learning_rate": 5.462358544563873e-07, + "loss": 16.0233, + "step": 432420 + }, + { + "epoch": 0.8735359591462405, + "grad_norm": 99.56596374511719, + "learning_rate": 5.460772190486208e-07, + "loss": 14.7335, + "step": 432430 + }, + { + "epoch": 0.8735561597789243, + "grad_norm": 80.13524627685547, + "learning_rate": 5.459186053487336e-07, + "loss": 13.6393, + "step": 432440 + }, + { + "epoch": 0.8735763604116081, + "grad_norm": 540.6686401367188, + "learning_rate": 5.457600133574987e-07, + "loss": 22.7271, + "step": 432450 + }, + { + "epoch": 0.873596561044292, + "grad_norm": 190.26438903808594, + "learning_rate": 5.456014430756895e-07, + "loss": 16.4118, + "step": 432460 + }, + { + "epoch": 0.8736167616769758, + "grad_norm": 189.78236389160156, + "learning_rate": 5.454428945040774e-07, + "loss": 12.8243, + "step": 432470 + }, + { + "epoch": 0.8736369623096596, + "grad_norm": 232.68580627441406, + "learning_rate": 5.452843676434377e-07, + "loss": 16.0286, + "step": 432480 + }, + { + "epoch": 0.8736571629423433, + "grad_norm": 111.18245697021484, + "learning_rate": 5.45125862494541e-07, + "loss": 19.2267, + "step": 432490 + }, + { + "epoch": 0.8736773635750271, + "grad_norm": 126.48763275146484, + "learning_rate": 5.449673790581611e-07, + "loss": 25.2206, + "step": 432500 + }, + { + "epoch": 0.8736975642077109, + "grad_norm": 405.0051574707031, + "learning_rate": 5.448089173350696e-07, + "loss": 9.0047, + "step": 432510 + }, + { + "epoch": 0.8737177648403948, + "grad_norm": 128.05975341796875, + "learning_rate": 5.446504773260386e-07, + "loss": 36.9651, + "step": 432520 + }, + { + "epoch": 0.8737379654730786, + "grad_norm": 0.0, + "learning_rate": 5.44492059031842e-07, + "loss": 14.3221, + "step": 432530 + }, + { + "epoch": 0.8737581661057624, + "grad_norm": 451.4018859863281, + "learning_rate": 5.443336624532492e-07, + "loss": 15.6212, + "step": 432540 + }, + { + "epoch": 0.8737783667384462, + "grad_norm": 452.4760437011719, + "learning_rate": 5.44175287591035e-07, + "loss": 17.0667, + "step": 432550 + }, + { + "epoch": 0.87379856737113, + "grad_norm": 183.5372314453125, + "learning_rate": 5.440169344459701e-07, + "loss": 29.9927, + "step": 432560 + }, + { + "epoch": 0.8738187680038139, + "grad_norm": 41.35247039794922, + "learning_rate": 5.438586030188247e-07, + "loss": 8.9583, + "step": 432570 + }, + { + "epoch": 0.8738389686364977, + "grad_norm": 183.9976806640625, + "learning_rate": 5.437002933103724e-07, + "loss": 18.7638, + "step": 432580 + }, + { + "epoch": 0.8738591692691815, + "grad_norm": 116.30809020996094, + "learning_rate": 5.435420053213863e-07, + "loss": 16.6084, + "step": 432590 + }, + { + "epoch": 0.8738793699018653, + "grad_norm": 87.30075073242188, + "learning_rate": 5.433837390526341e-07, + "loss": 14.0521, + "step": 432600 + }, + { + "epoch": 0.8738995705345491, + "grad_norm": 169.73892211914062, + "learning_rate": 5.432254945048887e-07, + "loss": 9.207, + "step": 432610 + }, + { + "epoch": 0.873919771167233, + "grad_norm": 75.16477966308594, + "learning_rate": 5.430672716789232e-07, + "loss": 10.9678, + "step": 432620 + }, + { + "epoch": 0.8739399717999168, + "grad_norm": 281.18536376953125, + "learning_rate": 5.429090705755069e-07, + "loss": 10.1701, + "step": 432630 + }, + { + "epoch": 0.8739601724326006, + "grad_norm": 303.79815673828125, + "learning_rate": 5.427508911954105e-07, + "loss": 13.4879, + "step": 432640 + }, + { + "epoch": 0.8739803730652844, + "grad_norm": 246.65687561035156, + "learning_rate": 5.425927335394054e-07, + "loss": 18.9741, + "step": 432650 + }, + { + "epoch": 0.8740005736979682, + "grad_norm": 272.3833312988281, + "learning_rate": 5.424345976082645e-07, + "loss": 16.4858, + "step": 432660 + }, + { + "epoch": 0.8740207743306521, + "grad_norm": 305.1922912597656, + "learning_rate": 5.42276483402755e-07, + "loss": 38.6647, + "step": 432670 + }, + { + "epoch": 0.8740409749633359, + "grad_norm": 414.1039123535156, + "learning_rate": 5.421183909236494e-07, + "loss": 12.9707, + "step": 432680 + }, + { + "epoch": 0.8740611755960197, + "grad_norm": 112.91834259033203, + "learning_rate": 5.419603201717189e-07, + "loss": 14.2765, + "step": 432690 + }, + { + "epoch": 0.8740813762287035, + "grad_norm": 14.11253547668457, + "learning_rate": 5.418022711477333e-07, + "loss": 19.9561, + "step": 432700 + }, + { + "epoch": 0.8741015768613873, + "grad_norm": 221.5771484375, + "learning_rate": 5.416442438524616e-07, + "loss": 33.8349, + "step": 432710 + }, + { + "epoch": 0.8741217774940712, + "grad_norm": 268.58001708984375, + "learning_rate": 5.414862382866759e-07, + "loss": 13.9833, + "step": 432720 + }, + { + "epoch": 0.874141978126755, + "grad_norm": 159.95399475097656, + "learning_rate": 5.413282544511455e-07, + "loss": 13.2408, + "step": 432730 + }, + { + "epoch": 0.8741621787594387, + "grad_norm": 201.13697814941406, + "learning_rate": 5.4117029234664e-07, + "loss": 14.5072, + "step": 432740 + }, + { + "epoch": 0.8741823793921225, + "grad_norm": 13.50367546081543, + "learning_rate": 5.410123519739302e-07, + "loss": 12.3527, + "step": 432750 + }, + { + "epoch": 0.8742025800248063, + "grad_norm": 463.3029479980469, + "learning_rate": 5.408544333337845e-07, + "loss": 15.4947, + "step": 432760 + }, + { + "epoch": 0.8742227806574902, + "grad_norm": 169.9464111328125, + "learning_rate": 5.406965364269745e-07, + "loss": 14.3322, + "step": 432770 + }, + { + "epoch": 0.874242981290174, + "grad_norm": 374.7083740234375, + "learning_rate": 5.405386612542685e-07, + "loss": 8.4789, + "step": 432780 + }, + { + "epoch": 0.8742631819228578, + "grad_norm": 122.0746841430664, + "learning_rate": 5.403808078164358e-07, + "loss": 9.1264, + "step": 432790 + }, + { + "epoch": 0.8742833825555416, + "grad_norm": 255.9857940673828, + "learning_rate": 5.402229761142464e-07, + "loss": 15.9121, + "step": 432800 + }, + { + "epoch": 0.8743035831882254, + "grad_norm": 111.83055877685547, + "learning_rate": 5.400651661484684e-07, + "loss": 8.633, + "step": 432810 + }, + { + "epoch": 0.8743237838209093, + "grad_norm": 305.3992004394531, + "learning_rate": 5.399073779198732e-07, + "loss": 14.5601, + "step": 432820 + }, + { + "epoch": 0.8743439844535931, + "grad_norm": 17.42432975769043, + "learning_rate": 5.397496114292278e-07, + "loss": 27.667, + "step": 432830 + }, + { + "epoch": 0.8743641850862769, + "grad_norm": 271.02520751953125, + "learning_rate": 5.395918666773026e-07, + "loss": 18.6716, + "step": 432840 + }, + { + "epoch": 0.8743843857189607, + "grad_norm": 242.5654754638672, + "learning_rate": 5.394341436648653e-07, + "loss": 22.1673, + "step": 432850 + }, + { + "epoch": 0.8744045863516445, + "grad_norm": 113.94843292236328, + "learning_rate": 5.392764423926844e-07, + "loss": 21.2269, + "step": 432860 + }, + { + "epoch": 0.8744247869843284, + "grad_norm": 180.2521209716797, + "learning_rate": 5.391187628615296e-07, + "loss": 16.6291, + "step": 432870 + }, + { + "epoch": 0.8744449876170122, + "grad_norm": 431.97552490234375, + "learning_rate": 5.389611050721694e-07, + "loss": 19.8089, + "step": 432880 + }, + { + "epoch": 0.874465188249696, + "grad_norm": 374.6367492675781, + "learning_rate": 5.388034690253701e-07, + "loss": 9.5884, + "step": 432890 + }, + { + "epoch": 0.8744853888823798, + "grad_norm": 116.84526062011719, + "learning_rate": 5.386458547219026e-07, + "loss": 22.0699, + "step": 432900 + }, + { + "epoch": 0.8745055895150636, + "grad_norm": 177.0876922607422, + "learning_rate": 5.384882621625353e-07, + "loss": 12.8417, + "step": 432910 + }, + { + "epoch": 0.8745257901477475, + "grad_norm": 1729.6988525390625, + "learning_rate": 5.383306913480335e-07, + "loss": 34.3606, + "step": 432920 + }, + { + "epoch": 0.8745459907804313, + "grad_norm": 370.0172424316406, + "learning_rate": 5.381731422791664e-07, + "loss": 23.8157, + "step": 432930 + }, + { + "epoch": 0.8745661914131151, + "grad_norm": 659.2686767578125, + "learning_rate": 5.380156149567034e-07, + "loss": 18.0229, + "step": 432940 + }, + { + "epoch": 0.8745863920457989, + "grad_norm": 233.52113342285156, + "learning_rate": 5.378581093814112e-07, + "loss": 10.3576, + "step": 432950 + }, + { + "epoch": 0.8746065926784827, + "grad_norm": 266.9483337402344, + "learning_rate": 5.377006255540562e-07, + "loss": 20.8076, + "step": 432960 + }, + { + "epoch": 0.8746267933111666, + "grad_norm": 127.8639144897461, + "learning_rate": 5.375431634754074e-07, + "loss": 17.1147, + "step": 432970 + }, + { + "epoch": 0.8746469939438504, + "grad_norm": 361.4924011230469, + "learning_rate": 5.373857231462337e-07, + "loss": 21.1542, + "step": 432980 + }, + { + "epoch": 0.8746671945765342, + "grad_norm": 1143.997802734375, + "learning_rate": 5.372283045672994e-07, + "loss": 29.7198, + "step": 432990 + }, + { + "epoch": 0.8746873952092179, + "grad_norm": 44.34175491333008, + "learning_rate": 5.370709077393721e-07, + "loss": 25.0875, + "step": 433000 + }, + { + "epoch": 0.8747075958419017, + "grad_norm": 197.0361785888672, + "learning_rate": 5.369135326632219e-07, + "loss": 25.3473, + "step": 433010 + }, + { + "epoch": 0.8747277964745855, + "grad_norm": 46.178123474121094, + "learning_rate": 5.367561793396132e-07, + "loss": 13.566, + "step": 433020 + }, + { + "epoch": 0.8747479971072694, + "grad_norm": 182.61131286621094, + "learning_rate": 5.365988477693124e-07, + "loss": 18.0747, + "step": 433030 + }, + { + "epoch": 0.8747681977399532, + "grad_norm": 254.03570556640625, + "learning_rate": 5.364415379530891e-07, + "loss": 24.1083, + "step": 433040 + }, + { + "epoch": 0.874788398372637, + "grad_norm": 198.87677001953125, + "learning_rate": 5.362842498917081e-07, + "loss": 11.2923, + "step": 433050 + }, + { + "epoch": 0.8748085990053208, + "grad_norm": 150.2322998046875, + "learning_rate": 5.36126983585935e-07, + "loss": 15.313, + "step": 433060 + }, + { + "epoch": 0.8748287996380046, + "grad_norm": 495.1058044433594, + "learning_rate": 5.359697390365387e-07, + "loss": 17.2233, + "step": 433070 + }, + { + "epoch": 0.8748490002706885, + "grad_norm": 332.6954650878906, + "learning_rate": 5.35812516244284e-07, + "loss": 18.017, + "step": 433080 + }, + { + "epoch": 0.8748692009033723, + "grad_norm": 500.8349609375, + "learning_rate": 5.356553152099381e-07, + "loss": 26.3494, + "step": 433090 + }, + { + "epoch": 0.8748894015360561, + "grad_norm": 255.11306762695312, + "learning_rate": 5.354981359342659e-07, + "loss": 17.2023, + "step": 433100 + }, + { + "epoch": 0.8749096021687399, + "grad_norm": 180.84852600097656, + "learning_rate": 5.353409784180352e-07, + "loss": 5.9042, + "step": 433110 + }, + { + "epoch": 0.8749298028014237, + "grad_norm": 24.889875411987305, + "learning_rate": 5.35183842662011e-07, + "loss": 16.8743, + "step": 433120 + }, + { + "epoch": 0.8749500034341076, + "grad_norm": 275.861083984375, + "learning_rate": 5.350267286669585e-07, + "loss": 9.4849, + "step": 433130 + }, + { + "epoch": 0.8749702040667914, + "grad_norm": 897.2681274414062, + "learning_rate": 5.348696364336448e-07, + "loss": 17.6884, + "step": 433140 + }, + { + "epoch": 0.8749904046994752, + "grad_norm": 435.4981994628906, + "learning_rate": 5.347125659628344e-07, + "loss": 19.0055, + "step": 433150 + }, + { + "epoch": 0.875010605332159, + "grad_norm": 34.28255844116211, + "learning_rate": 5.345555172552941e-07, + "loss": 29.3929, + "step": 433160 + }, + { + "epoch": 0.8750308059648428, + "grad_norm": 430.943115234375, + "learning_rate": 5.343984903117889e-07, + "loss": 11.3227, + "step": 433170 + }, + { + "epoch": 0.8750510065975267, + "grad_norm": 369.8266906738281, + "learning_rate": 5.342414851330824e-07, + "loss": 24.4882, + "step": 433180 + }, + { + "epoch": 0.8750712072302105, + "grad_norm": 137.533935546875, + "learning_rate": 5.340845017199425e-07, + "loss": 22.292, + "step": 433190 + }, + { + "epoch": 0.8750914078628943, + "grad_norm": 203.81004333496094, + "learning_rate": 5.339275400731331e-07, + "loss": 23.8607, + "step": 433200 + }, + { + "epoch": 0.8751116084955781, + "grad_norm": 219.90093994140625, + "learning_rate": 5.337706001934184e-07, + "loss": 12.4077, + "step": 433210 + }, + { + "epoch": 0.8751318091282619, + "grad_norm": 127.35514068603516, + "learning_rate": 5.33613682081564e-07, + "loss": 14.9779, + "step": 433220 + }, + { + "epoch": 0.8751520097609458, + "grad_norm": 471.42706298828125, + "learning_rate": 5.334567857383354e-07, + "loss": 27.8361, + "step": 433230 + }, + { + "epoch": 0.8751722103936296, + "grad_norm": 354.9244079589844, + "learning_rate": 5.332999111644971e-07, + "loss": 16.9136, + "step": 433240 + }, + { + "epoch": 0.8751924110263134, + "grad_norm": 182.89178466796875, + "learning_rate": 5.331430583608122e-07, + "loss": 16.3957, + "step": 433250 + }, + { + "epoch": 0.8752126116589971, + "grad_norm": 450.6783752441406, + "learning_rate": 5.329862273280462e-07, + "loss": 25.9119, + "step": 433260 + }, + { + "epoch": 0.8752328122916809, + "grad_norm": 7.982193946838379, + "learning_rate": 5.328294180669658e-07, + "loss": 15.3313, + "step": 433270 + }, + { + "epoch": 0.8752530129243647, + "grad_norm": 130.57077026367188, + "learning_rate": 5.326726305783308e-07, + "loss": 23.8535, + "step": 433280 + }, + { + "epoch": 0.8752732135570486, + "grad_norm": 17.09382438659668, + "learning_rate": 5.325158648629075e-07, + "loss": 16.2155, + "step": 433290 + }, + { + "epoch": 0.8752934141897324, + "grad_norm": 190.3108673095703, + "learning_rate": 5.323591209214612e-07, + "loss": 20.9664, + "step": 433300 + }, + { + "epoch": 0.8753136148224162, + "grad_norm": 183.39068603515625, + "learning_rate": 5.322023987547547e-07, + "loss": 13.8173, + "step": 433310 + }, + { + "epoch": 0.8753338154551, + "grad_norm": 115.43177795410156, + "learning_rate": 5.320456983635508e-07, + "loss": 15.826, + "step": 433320 + }, + { + "epoch": 0.8753540160877838, + "grad_norm": 463.8359069824219, + "learning_rate": 5.318890197486154e-07, + "loss": 20.1505, + "step": 433330 + }, + { + "epoch": 0.8753742167204677, + "grad_norm": 145.56065368652344, + "learning_rate": 5.317323629107108e-07, + "loss": 11.3221, + "step": 433340 + }, + { + "epoch": 0.8753944173531515, + "grad_norm": 219.30812072753906, + "learning_rate": 5.315757278505995e-07, + "loss": 18.8657, + "step": 433350 + }, + { + "epoch": 0.8754146179858353, + "grad_norm": 95.59041595458984, + "learning_rate": 5.314191145690473e-07, + "loss": 20.0257, + "step": 433360 + }, + { + "epoch": 0.8754348186185191, + "grad_norm": 283.51708984375, + "learning_rate": 5.312625230668155e-07, + "loss": 22.2627, + "step": 433370 + }, + { + "epoch": 0.875455019251203, + "grad_norm": 205.8687744140625, + "learning_rate": 5.311059533446694e-07, + "loss": 11.2702, + "step": 433380 + }, + { + "epoch": 0.8754752198838868, + "grad_norm": 209.12066650390625, + "learning_rate": 5.309494054033704e-07, + "loss": 11.8368, + "step": 433390 + }, + { + "epoch": 0.8754954205165706, + "grad_norm": 377.4494323730469, + "learning_rate": 5.307928792436812e-07, + "loss": 22.0285, + "step": 433400 + }, + { + "epoch": 0.8755156211492544, + "grad_norm": 508.06512451171875, + "learning_rate": 5.306363748663668e-07, + "loss": 29.6034, + "step": 433410 + }, + { + "epoch": 0.8755358217819382, + "grad_norm": 273.0721130371094, + "learning_rate": 5.304798922721871e-07, + "loss": 15.6005, + "step": 433420 + }, + { + "epoch": 0.875556022414622, + "grad_norm": 223.41030883789062, + "learning_rate": 5.303234314619071e-07, + "loss": 10.0523, + "step": 433430 + }, + { + "epoch": 0.8755762230473059, + "grad_norm": 342.0409240722656, + "learning_rate": 5.301669924362884e-07, + "loss": 24.0745, + "step": 433440 + }, + { + "epoch": 0.8755964236799897, + "grad_norm": 268.7024230957031, + "learning_rate": 5.300105751960943e-07, + "loss": 18.359, + "step": 433450 + }, + { + "epoch": 0.8756166243126735, + "grad_norm": 127.2983169555664, + "learning_rate": 5.298541797420864e-07, + "loss": 12.788, + "step": 433460 + }, + { + "epoch": 0.8756368249453573, + "grad_norm": 361.1843566894531, + "learning_rate": 5.296978060750257e-07, + "loss": 16.1108, + "step": 433470 + }, + { + "epoch": 0.8756570255780411, + "grad_norm": 294.2516784667969, + "learning_rate": 5.295414541956773e-07, + "loss": 21.2914, + "step": 433480 + }, + { + "epoch": 0.875677226210725, + "grad_norm": 137.48095703125, + "learning_rate": 5.293851241048015e-07, + "loss": 11.4306, + "step": 433490 + }, + { + "epoch": 0.8756974268434088, + "grad_norm": 106.34599304199219, + "learning_rate": 5.292288158031595e-07, + "loss": 15.4627, + "step": 433500 + }, + { + "epoch": 0.8757176274760925, + "grad_norm": 152.8980712890625, + "learning_rate": 5.290725292915138e-07, + "loss": 16.1661, + "step": 433510 + }, + { + "epoch": 0.8757378281087763, + "grad_norm": 80.7688217163086, + "learning_rate": 5.28916264570628e-07, + "loss": 19.8095, + "step": 433520 + }, + { + "epoch": 0.8757580287414601, + "grad_norm": 138.82008361816406, + "learning_rate": 5.287600216412609e-07, + "loss": 24.6289, + "step": 433530 + }, + { + "epoch": 0.875778229374144, + "grad_norm": 509.1368103027344, + "learning_rate": 5.286038005041744e-07, + "loss": 23.4458, + "step": 433540 + }, + { + "epoch": 0.8757984300068278, + "grad_norm": 62.8992805480957, + "learning_rate": 5.28447601160132e-07, + "loss": 21.7229, + "step": 433550 + }, + { + "epoch": 0.8758186306395116, + "grad_norm": 324.73089599609375, + "learning_rate": 5.28291423609894e-07, + "loss": 19.2802, + "step": 433560 + }, + { + "epoch": 0.8758388312721954, + "grad_norm": 322.7063903808594, + "learning_rate": 5.281352678542195e-07, + "loss": 15.1388, + "step": 433570 + }, + { + "epoch": 0.8758590319048792, + "grad_norm": 36.00373840332031, + "learning_rate": 5.279791338938717e-07, + "loss": 23.9129, + "step": 433580 + }, + { + "epoch": 0.8758792325375631, + "grad_norm": 233.86868286132812, + "learning_rate": 5.278230217296132e-07, + "loss": 26.1835, + "step": 433590 + }, + { + "epoch": 0.8758994331702469, + "grad_norm": 339.3141174316406, + "learning_rate": 5.276669313622013e-07, + "loss": 19.1924, + "step": 433600 + }, + { + "epoch": 0.8759196338029307, + "grad_norm": 332.3990783691406, + "learning_rate": 5.275108627923975e-07, + "loss": 18.69, + "step": 433610 + }, + { + "epoch": 0.8759398344356145, + "grad_norm": 212.42300415039062, + "learning_rate": 5.273548160209651e-07, + "loss": 20.9045, + "step": 433620 + }, + { + "epoch": 0.8759600350682983, + "grad_norm": 318.3375244140625, + "learning_rate": 5.271987910486625e-07, + "loss": 21.3603, + "step": 433630 + }, + { + "epoch": 0.8759802357009822, + "grad_norm": 455.908447265625, + "learning_rate": 5.270427878762496e-07, + "loss": 17.5253, + "step": 433640 + }, + { + "epoch": 0.876000436333666, + "grad_norm": 355.2138366699219, + "learning_rate": 5.268868065044886e-07, + "loss": 18.036, + "step": 433650 + }, + { + "epoch": 0.8760206369663498, + "grad_norm": 169.66409301757812, + "learning_rate": 5.267308469341387e-07, + "loss": 19.3718, + "step": 433660 + }, + { + "epoch": 0.8760408375990336, + "grad_norm": 0.0, + "learning_rate": 5.265749091659589e-07, + "loss": 14.599, + "step": 433670 + }, + { + "epoch": 0.8760610382317174, + "grad_norm": 185.46743774414062, + "learning_rate": 5.264189932007119e-07, + "loss": 14.404, + "step": 433680 + }, + { + "epoch": 0.8760812388644013, + "grad_norm": 342.9581298828125, + "learning_rate": 5.262630990391549e-07, + "loss": 18.9033, + "step": 433690 + }, + { + "epoch": 0.8761014394970851, + "grad_norm": 367.83795166015625, + "learning_rate": 5.2610722668205e-07, + "loss": 20.94, + "step": 433700 + }, + { + "epoch": 0.8761216401297689, + "grad_norm": 334.108642578125, + "learning_rate": 5.259513761301549e-07, + "loss": 19.7453, + "step": 433710 + }, + { + "epoch": 0.8761418407624527, + "grad_norm": 285.80804443359375, + "learning_rate": 5.257955473842314e-07, + "loss": 12.9342, + "step": 433720 + }, + { + "epoch": 0.8761620413951365, + "grad_norm": 207.70826721191406, + "learning_rate": 5.25639740445037e-07, + "loss": 20.3354, + "step": 433730 + }, + { + "epoch": 0.8761822420278204, + "grad_norm": 351.1932373046875, + "learning_rate": 5.254839553133312e-07, + "loss": 10.8909, + "step": 433740 + }, + { + "epoch": 0.8762024426605042, + "grad_norm": 3.709616184234619, + "learning_rate": 5.253281919898751e-07, + "loss": 32.6381, + "step": 433750 + }, + { + "epoch": 0.876222643293188, + "grad_norm": 195.4125213623047, + "learning_rate": 5.251724504754258e-07, + "loss": 9.7482, + "step": 433760 + }, + { + "epoch": 0.8762428439258717, + "grad_norm": 455.1700134277344, + "learning_rate": 5.250167307707437e-07, + "loss": 19.7197, + "step": 433770 + }, + { + "epoch": 0.8762630445585555, + "grad_norm": 357.58306884765625, + "learning_rate": 5.24861032876588e-07, + "loss": 20.1256, + "step": 433780 + }, + { + "epoch": 0.8762832451912393, + "grad_norm": 264.9433288574219, + "learning_rate": 5.247053567937155e-07, + "loss": 20.2208, + "step": 433790 + }, + { + "epoch": 0.8763034458239232, + "grad_norm": 93.81202697753906, + "learning_rate": 5.245497025228874e-07, + "loss": 32.6254, + "step": 433800 + }, + { + "epoch": 0.876323646456607, + "grad_norm": 574.3193359375, + "learning_rate": 5.243940700648609e-07, + "loss": 14.8747, + "step": 433810 + }, + { + "epoch": 0.8763438470892908, + "grad_norm": 114.02771759033203, + "learning_rate": 5.242384594203942e-07, + "loss": 19.4134, + "step": 433820 + }, + { + "epoch": 0.8763640477219746, + "grad_norm": 220.89328002929688, + "learning_rate": 5.240828705902462e-07, + "loss": 6.8964, + "step": 433830 + }, + { + "epoch": 0.8763842483546584, + "grad_norm": 213.7437744140625, + "learning_rate": 5.239273035751763e-07, + "loss": 30.1331, + "step": 433840 + }, + { + "epoch": 0.8764044489873423, + "grad_norm": 580.44189453125, + "learning_rate": 5.237717583759421e-07, + "loss": 16.9098, + "step": 433850 + }, + { + "epoch": 0.8764246496200261, + "grad_norm": 283.7548828125, + "learning_rate": 5.236162349933005e-07, + "loss": 16.622, + "step": 433860 + }, + { + "epoch": 0.8764448502527099, + "grad_norm": 517.726318359375, + "learning_rate": 5.234607334280117e-07, + "loss": 12.846, + "step": 433870 + }, + { + "epoch": 0.8764650508853937, + "grad_norm": 401.5696716308594, + "learning_rate": 5.23305253680832e-07, + "loss": 19.2747, + "step": 433880 + }, + { + "epoch": 0.8764852515180775, + "grad_norm": 517.2724609375, + "learning_rate": 5.231497957525184e-07, + "loss": 16.5344, + "step": 433890 + }, + { + "epoch": 0.8765054521507614, + "grad_norm": 118.29544067382812, + "learning_rate": 5.229943596438297e-07, + "loss": 17.5756, + "step": 433900 + }, + { + "epoch": 0.8765256527834452, + "grad_norm": 280.3024597167969, + "learning_rate": 5.22838945355525e-07, + "loss": 14.8495, + "step": 433910 + }, + { + "epoch": 0.876545853416129, + "grad_norm": 107.56294250488281, + "learning_rate": 5.2268355288836e-07, + "loss": 7.7354, + "step": 433920 + }, + { + "epoch": 0.8765660540488128, + "grad_norm": 171.7476043701172, + "learning_rate": 5.225281822430911e-07, + "loss": 17.65, + "step": 433930 + }, + { + "epoch": 0.8765862546814966, + "grad_norm": 568.8677368164062, + "learning_rate": 5.22372833420478e-07, + "loss": 31.2041, + "step": 433940 + }, + { + "epoch": 0.8766064553141805, + "grad_norm": 139.4942169189453, + "learning_rate": 5.222175064212764e-07, + "loss": 9.4249, + "step": 433950 + }, + { + "epoch": 0.8766266559468643, + "grad_norm": 631.3897094726562, + "learning_rate": 5.220622012462429e-07, + "loss": 24.6887, + "step": 433960 + }, + { + "epoch": 0.8766468565795481, + "grad_norm": 144.6816864013672, + "learning_rate": 5.219069178961361e-07, + "loss": 16.9325, + "step": 433970 + }, + { + "epoch": 0.8766670572122319, + "grad_norm": 431.0509948730469, + "learning_rate": 5.217516563717107e-07, + "loss": 28.0626, + "step": 433980 + }, + { + "epoch": 0.8766872578449157, + "grad_norm": 1093.6778564453125, + "learning_rate": 5.215964166737258e-07, + "loss": 23.2797, + "step": 433990 + }, + { + "epoch": 0.8767074584775996, + "grad_norm": 351.3003845214844, + "learning_rate": 5.214411988029355e-07, + "loss": 13.4805, + "step": 434000 + }, + { + "epoch": 0.8767276591102834, + "grad_norm": 317.1842041015625, + "learning_rate": 5.212860027600986e-07, + "loss": 13.3169, + "step": 434010 + }, + { + "epoch": 0.8767478597429671, + "grad_norm": 549.303466796875, + "learning_rate": 5.21130828545971e-07, + "loss": 9.3565, + "step": 434020 + }, + { + "epoch": 0.8767680603756509, + "grad_norm": 245.1389923095703, + "learning_rate": 5.209756761613072e-07, + "loss": 20.0705, + "step": 434030 + }, + { + "epoch": 0.8767882610083347, + "grad_norm": 56.18154525756836, + "learning_rate": 5.208205456068655e-07, + "loss": 20.1889, + "step": 434040 + }, + { + "epoch": 0.8768084616410186, + "grad_norm": 357.11993408203125, + "learning_rate": 5.206654368834002e-07, + "loss": 28.9057, + "step": 434050 + }, + { + "epoch": 0.8768286622737024, + "grad_norm": 261.443115234375, + "learning_rate": 5.205103499916697e-07, + "loss": 10.3796, + "step": 434060 + }, + { + "epoch": 0.8768488629063862, + "grad_norm": 862.103515625, + "learning_rate": 5.203552849324284e-07, + "loss": 18.4036, + "step": 434070 + }, + { + "epoch": 0.87686906353907, + "grad_norm": 325.0450439453125, + "learning_rate": 5.202002417064306e-07, + "loss": 28.2807, + "step": 434080 + }, + { + "epoch": 0.8768892641717538, + "grad_norm": 343.0401611328125, + "learning_rate": 5.200452203144352e-07, + "loss": 17.4518, + "step": 434090 + }, + { + "epoch": 0.8769094648044377, + "grad_norm": 277.0809326171875, + "learning_rate": 5.198902207571955e-07, + "loss": 12.2072, + "step": 434100 + }, + { + "epoch": 0.8769296654371215, + "grad_norm": 293.6759033203125, + "learning_rate": 5.197352430354669e-07, + "loss": 19.1621, + "step": 434110 + }, + { + "epoch": 0.8769498660698053, + "grad_norm": 180.9759979248047, + "learning_rate": 5.19580287150005e-07, + "loss": 25.1278, + "step": 434120 + }, + { + "epoch": 0.8769700667024891, + "grad_norm": 772.21435546875, + "learning_rate": 5.194253531015675e-07, + "loss": 16.2817, + "step": 434130 + }, + { + "epoch": 0.8769902673351729, + "grad_norm": 180.82733154296875, + "learning_rate": 5.192704408909055e-07, + "loss": 17.4808, + "step": 434140 + }, + { + "epoch": 0.8770104679678568, + "grad_norm": 152.46719360351562, + "learning_rate": 5.191155505187756e-07, + "loss": 17.088, + "step": 434150 + }, + { + "epoch": 0.8770306686005406, + "grad_norm": 28.198760986328125, + "learning_rate": 5.189606819859344e-07, + "loss": 10.0622, + "step": 434160 + }, + { + "epoch": 0.8770508692332244, + "grad_norm": 316.7867431640625, + "learning_rate": 5.188058352931352e-07, + "loss": 20.1294, + "step": 434170 + }, + { + "epoch": 0.8770710698659082, + "grad_norm": 655.7664794921875, + "learning_rate": 5.186510104411319e-07, + "loss": 20.0069, + "step": 434180 + }, + { + "epoch": 0.877091270498592, + "grad_norm": 229.0889129638672, + "learning_rate": 5.184962074306798e-07, + "loss": 17.3417, + "step": 434190 + }, + { + "epoch": 0.8771114711312759, + "grad_norm": 303.02496337890625, + "learning_rate": 5.183414262625364e-07, + "loss": 13.3623, + "step": 434200 + }, + { + "epoch": 0.8771316717639597, + "grad_norm": 140.8537139892578, + "learning_rate": 5.181866669374507e-07, + "loss": 12.6561, + "step": 434210 + }, + { + "epoch": 0.8771518723966435, + "grad_norm": 695.171875, + "learning_rate": 5.180319294561797e-07, + "loss": 21.0463, + "step": 434220 + }, + { + "epoch": 0.8771720730293273, + "grad_norm": 147.87380981445312, + "learning_rate": 5.178772138194782e-07, + "loss": 15.1771, + "step": 434230 + }, + { + "epoch": 0.8771922736620111, + "grad_norm": 681.9638061523438, + "learning_rate": 5.177225200281e-07, + "loss": 18.6806, + "step": 434240 + }, + { + "epoch": 0.877212474294695, + "grad_norm": 274.4325256347656, + "learning_rate": 5.175678480827972e-07, + "loss": 15.7013, + "step": 434250 + }, + { + "epoch": 0.8772326749273788, + "grad_norm": 392.75830078125, + "learning_rate": 5.174131979843266e-07, + "loss": 20.9433, + "step": 434260 + }, + { + "epoch": 0.8772528755600626, + "grad_norm": 167.98194885253906, + "learning_rate": 5.172585697334398e-07, + "loss": 20.6973, + "step": 434270 + }, + { + "epoch": 0.8772730761927463, + "grad_norm": 369.706298828125, + "learning_rate": 5.171039633308905e-07, + "loss": 14.5283, + "step": 434280 + }, + { + "epoch": 0.8772932768254301, + "grad_norm": 217.7832794189453, + "learning_rate": 5.169493787774338e-07, + "loss": 14.155, + "step": 434290 + }, + { + "epoch": 0.8773134774581139, + "grad_norm": 301.4135437011719, + "learning_rate": 5.167948160738206e-07, + "loss": 10.2434, + "step": 434300 + }, + { + "epoch": 0.8773336780907978, + "grad_norm": 12.187145233154297, + "learning_rate": 5.166402752208071e-07, + "loss": 7.3482, + "step": 434310 + }, + { + "epoch": 0.8773538787234816, + "grad_norm": 310.41082763671875, + "learning_rate": 5.164857562191439e-07, + "loss": 15.5349, + "step": 434320 + }, + { + "epoch": 0.8773740793561654, + "grad_norm": 570.3519897460938, + "learning_rate": 5.163312590695869e-07, + "loss": 20.4465, + "step": 434330 + }, + { + "epoch": 0.8773942799888492, + "grad_norm": 214.05459594726562, + "learning_rate": 5.161767837728871e-07, + "loss": 21.3943, + "step": 434340 + }, + { + "epoch": 0.877414480621533, + "grad_norm": 7.265511989593506, + "learning_rate": 5.160223303297967e-07, + "loss": 12.4559, + "step": 434350 + }, + { + "epoch": 0.8774346812542169, + "grad_norm": 231.39935302734375, + "learning_rate": 5.15867898741071e-07, + "loss": 17.0576, + "step": 434360 + }, + { + "epoch": 0.8774548818869007, + "grad_norm": 164.5531463623047, + "learning_rate": 5.1571348900746e-07, + "loss": 17.2305, + "step": 434370 + }, + { + "epoch": 0.8774750825195845, + "grad_norm": 292.3380432128906, + "learning_rate": 5.155591011297184e-07, + "loss": 28.4496, + "step": 434380 + }, + { + "epoch": 0.8774952831522683, + "grad_norm": 108.02655029296875, + "learning_rate": 5.154047351085983e-07, + "loss": 12.7543, + "step": 434390 + }, + { + "epoch": 0.8775154837849521, + "grad_norm": 410.6449279785156, + "learning_rate": 5.152503909448503e-07, + "loss": 19.7891, + "step": 434400 + }, + { + "epoch": 0.877535684417636, + "grad_norm": 595.8047485351562, + "learning_rate": 5.150960686392293e-07, + "loss": 21.3929, + "step": 434410 + }, + { + "epoch": 0.8775558850503198, + "grad_norm": 420.0340881347656, + "learning_rate": 5.149417681924856e-07, + "loss": 20.7275, + "step": 434420 + }, + { + "epoch": 0.8775760856830036, + "grad_norm": 73.7215576171875, + "learning_rate": 5.147874896053711e-07, + "loss": 15.5586, + "step": 434430 + }, + { + "epoch": 0.8775962863156874, + "grad_norm": 202.3787841796875, + "learning_rate": 5.146332328786386e-07, + "loss": 19.9977, + "step": 434440 + }, + { + "epoch": 0.8776164869483712, + "grad_norm": 172.36013793945312, + "learning_rate": 5.144789980130404e-07, + "loss": 14.8833, + "step": 434450 + }, + { + "epoch": 0.8776366875810551, + "grad_norm": 486.5126647949219, + "learning_rate": 5.143247850093274e-07, + "loss": 23.1488, + "step": 434460 + }, + { + "epoch": 0.8776568882137389, + "grad_norm": 287.8289489746094, + "learning_rate": 5.141705938682506e-07, + "loss": 19.85, + "step": 434470 + }, + { + "epoch": 0.8776770888464227, + "grad_norm": 246.57994079589844, + "learning_rate": 5.140164245905633e-07, + "loss": 14.6019, + "step": 434480 + }, + { + "epoch": 0.8776972894791065, + "grad_norm": 321.09375, + "learning_rate": 5.138622771770157e-07, + "loss": 20.4411, + "step": 434490 + }, + { + "epoch": 0.8777174901117903, + "grad_norm": 283.07440185546875, + "learning_rate": 5.137081516283582e-07, + "loss": 15.5579, + "step": 434500 + }, + { + "epoch": 0.8777376907444742, + "grad_norm": 343.1944580078125, + "learning_rate": 5.135540479453432e-07, + "loss": 16.5614, + "step": 434510 + }, + { + "epoch": 0.877757891377158, + "grad_norm": 273.73388671875, + "learning_rate": 5.133999661287226e-07, + "loss": 26.7135, + "step": 434520 + }, + { + "epoch": 0.8777780920098417, + "grad_norm": 213.76499938964844, + "learning_rate": 5.13245906179246e-07, + "loss": 34.9487, + "step": 434530 + }, + { + "epoch": 0.8777982926425255, + "grad_norm": 108.06747436523438, + "learning_rate": 5.130918680976643e-07, + "loss": 12.7697, + "step": 434540 + }, + { + "epoch": 0.8778184932752093, + "grad_norm": 184.45631408691406, + "learning_rate": 5.129378518847295e-07, + "loss": 21.9418, + "step": 434550 + }, + { + "epoch": 0.8778386939078932, + "grad_norm": 334.7638244628906, + "learning_rate": 5.127838575411908e-07, + "loss": 13.8198, + "step": 434560 + }, + { + "epoch": 0.877858894540577, + "grad_norm": 224.79470825195312, + "learning_rate": 5.126298850677991e-07, + "loss": 13.9634, + "step": 434570 + }, + { + "epoch": 0.8778790951732608, + "grad_norm": 226.68106079101562, + "learning_rate": 5.124759344653057e-07, + "loss": 8.0073, + "step": 434580 + }, + { + "epoch": 0.8778992958059446, + "grad_norm": 336.3865966796875, + "learning_rate": 5.123220057344597e-07, + "loss": 21.1141, + "step": 434590 + }, + { + "epoch": 0.8779194964386284, + "grad_norm": 348.7720947265625, + "learning_rate": 5.121680988760125e-07, + "loss": 19.3948, + "step": 434600 + }, + { + "epoch": 0.8779396970713123, + "grad_norm": 139.48974609375, + "learning_rate": 5.120142138907131e-07, + "loss": 15.3754, + "step": 434610 + }, + { + "epoch": 0.8779598977039961, + "grad_norm": 451.96209716796875, + "learning_rate": 5.11860350779313e-07, + "loss": 19.5513, + "step": 434620 + }, + { + "epoch": 0.8779800983366799, + "grad_norm": 367.45257568359375, + "learning_rate": 5.11706509542561e-07, + "loss": 15.1161, + "step": 434630 + }, + { + "epoch": 0.8780002989693637, + "grad_norm": 0.0, + "learning_rate": 5.115526901812062e-07, + "loss": 18.2227, + "step": 434640 + }, + { + "epoch": 0.8780204996020475, + "grad_norm": 395.2568359375, + "learning_rate": 5.113988926960001e-07, + "loss": 16.2354, + "step": 434650 + }, + { + "epoch": 0.8780407002347314, + "grad_norm": 362.00653076171875, + "learning_rate": 5.112451170876903e-07, + "loss": 18.4036, + "step": 434660 + }, + { + "epoch": 0.8780609008674152, + "grad_norm": 210.4418487548828, + "learning_rate": 5.110913633570286e-07, + "loss": 29.6425, + "step": 434670 + }, + { + "epoch": 0.878081101500099, + "grad_norm": 278.4564514160156, + "learning_rate": 5.109376315047632e-07, + "loss": 12.1836, + "step": 434680 + }, + { + "epoch": 0.8781013021327828, + "grad_norm": 403.2332458496094, + "learning_rate": 5.107839215316424e-07, + "loss": 26.7823, + "step": 434690 + }, + { + "epoch": 0.8781215027654666, + "grad_norm": 302.29144287109375, + "learning_rate": 5.106302334384172e-07, + "loss": 16.1126, + "step": 434700 + }, + { + "epoch": 0.8781417033981505, + "grad_norm": 496.5931701660156, + "learning_rate": 5.104765672258355e-07, + "loss": 23.4361, + "step": 434710 + }, + { + "epoch": 0.8781619040308343, + "grad_norm": 290.95648193359375, + "learning_rate": 5.103229228946455e-07, + "loss": 12.0244, + "step": 434720 + }, + { + "epoch": 0.8781821046635181, + "grad_norm": 125.8919906616211, + "learning_rate": 5.101693004455977e-07, + "loss": 10.4043, + "step": 434730 + }, + { + "epoch": 0.8782023052962019, + "grad_norm": 319.6761474609375, + "learning_rate": 5.100156998794415e-07, + "loss": 17.449, + "step": 434740 + }, + { + "epoch": 0.8782225059288857, + "grad_norm": 253.58807373046875, + "learning_rate": 5.098621211969224e-07, + "loss": 19.8291, + "step": 434750 + }, + { + "epoch": 0.8782427065615696, + "grad_norm": 12.38747787475586, + "learning_rate": 5.09708564398791e-07, + "loss": 12.9324, + "step": 434760 + }, + { + "epoch": 0.8782629071942534, + "grad_norm": 227.8798065185547, + "learning_rate": 5.095550294857959e-07, + "loss": 13.6221, + "step": 434770 + }, + { + "epoch": 0.8782831078269372, + "grad_norm": 225.2969970703125, + "learning_rate": 5.094015164586852e-07, + "loss": 15.3277, + "step": 434780 + }, + { + "epoch": 0.8783033084596209, + "grad_norm": 224.01995849609375, + "learning_rate": 5.092480253182058e-07, + "loss": 21.3851, + "step": 434790 + }, + { + "epoch": 0.8783235090923047, + "grad_norm": 125.3165512084961, + "learning_rate": 5.090945560651073e-07, + "loss": 16.2378, + "step": 434800 + }, + { + "epoch": 0.8783437097249885, + "grad_norm": 240.43637084960938, + "learning_rate": 5.08941108700139e-07, + "loss": 8.1387, + "step": 434810 + }, + { + "epoch": 0.8783639103576724, + "grad_norm": 198.0110626220703, + "learning_rate": 5.087876832240446e-07, + "loss": 22.9592, + "step": 434820 + }, + { + "epoch": 0.8783841109903562, + "grad_norm": 131.37515258789062, + "learning_rate": 5.086342796375749e-07, + "loss": 6.9379, + "step": 434830 + }, + { + "epoch": 0.87840431162304, + "grad_norm": 170.5103302001953, + "learning_rate": 5.084808979414779e-07, + "loss": 15.2344, + "step": 434840 + }, + { + "epoch": 0.8784245122557238, + "grad_norm": 158.17288208007812, + "learning_rate": 5.083275381364999e-07, + "loss": 24.6042, + "step": 434850 + }, + { + "epoch": 0.8784447128884076, + "grad_norm": 281.2638244628906, + "learning_rate": 5.081742002233881e-07, + "loss": 13.3936, + "step": 434860 + }, + { + "epoch": 0.8784649135210915, + "grad_norm": 300.9057922363281, + "learning_rate": 5.080208842028911e-07, + "loss": 25.3561, + "step": 434870 + }, + { + "epoch": 0.8784851141537753, + "grad_norm": 394.9960021972656, + "learning_rate": 5.078675900757557e-07, + "loss": 21.546, + "step": 434880 + }, + { + "epoch": 0.8785053147864591, + "grad_norm": 309.2256774902344, + "learning_rate": 5.07714317842728e-07, + "loss": 12.8756, + "step": 434890 + }, + { + "epoch": 0.8785255154191429, + "grad_norm": 348.014892578125, + "learning_rate": 5.075610675045567e-07, + "loss": 16.8524, + "step": 434900 + }, + { + "epoch": 0.8785457160518267, + "grad_norm": 316.774169921875, + "learning_rate": 5.074078390619869e-07, + "loss": 15.8469, + "step": 434910 + }, + { + "epoch": 0.8785659166845106, + "grad_norm": 376.3726806640625, + "learning_rate": 5.072546325157673e-07, + "loss": 12.6933, + "step": 434920 + }, + { + "epoch": 0.8785861173171944, + "grad_norm": 225.87057495117188, + "learning_rate": 5.071014478666425e-07, + "loss": 15.8447, + "step": 434930 + }, + { + "epoch": 0.8786063179498782, + "grad_norm": 293.9652404785156, + "learning_rate": 5.069482851153618e-07, + "loss": 14.8278, + "step": 434940 + }, + { + "epoch": 0.878626518582562, + "grad_norm": 228.35572814941406, + "learning_rate": 5.0679514426267e-07, + "loss": 16.2477, + "step": 434950 + }, + { + "epoch": 0.8786467192152458, + "grad_norm": 12.565250396728516, + "learning_rate": 5.06642025309313e-07, + "loss": 10.6839, + "step": 434960 + }, + { + "epoch": 0.8786669198479297, + "grad_norm": 613.4114990234375, + "learning_rate": 5.064889282560382e-07, + "loss": 22.458, + "step": 434970 + }, + { + "epoch": 0.8786871204806135, + "grad_norm": 112.433837890625, + "learning_rate": 5.063358531035906e-07, + "loss": 13.6767, + "step": 434980 + }, + { + "epoch": 0.8787073211132973, + "grad_norm": 289.1630554199219, + "learning_rate": 5.06182799852718e-07, + "loss": 13.6845, + "step": 434990 + }, + { + "epoch": 0.8787275217459811, + "grad_norm": 907.2689819335938, + "learning_rate": 5.06029768504166e-07, + "loss": 23.3365, + "step": 435000 + }, + { + "epoch": 0.8787477223786649, + "grad_norm": 188.04006958007812, + "learning_rate": 5.058767590586783e-07, + "loss": 15.6127, + "step": 435010 + }, + { + "epoch": 0.8787679230113488, + "grad_norm": 290.1544189453125, + "learning_rate": 5.057237715170032e-07, + "loss": 15.7476, + "step": 435020 + }, + { + "epoch": 0.8787881236440326, + "grad_norm": 391.63677978515625, + "learning_rate": 5.055708058798853e-07, + "loss": 22.9555, + "step": 435030 + }, + { + "epoch": 0.8788083242767164, + "grad_norm": 388.8981628417969, + "learning_rate": 5.054178621480694e-07, + "loss": 16.7432, + "step": 435040 + }, + { + "epoch": 0.8788285249094001, + "grad_norm": 307.1207275390625, + "learning_rate": 5.052649403223015e-07, + "loss": 20.3882, + "step": 435050 + }, + { + "epoch": 0.8788487255420839, + "grad_norm": 291.4189147949219, + "learning_rate": 5.051120404033283e-07, + "loss": 17.1048, + "step": 435060 + }, + { + "epoch": 0.8788689261747678, + "grad_norm": 41.89212417602539, + "learning_rate": 5.049591623918937e-07, + "loss": 25.6163, + "step": 435070 + }, + { + "epoch": 0.8788891268074516, + "grad_norm": 299.0412902832031, + "learning_rate": 5.04806306288742e-07, + "loss": 20.8267, + "step": 435080 + }, + { + "epoch": 0.8789093274401354, + "grad_norm": 344.2184143066406, + "learning_rate": 5.046534720946206e-07, + "loss": 17.9315, + "step": 435090 + }, + { + "epoch": 0.8789295280728192, + "grad_norm": 13.170239448547363, + "learning_rate": 5.045006598102725e-07, + "loss": 23.2014, + "step": 435100 + }, + { + "epoch": 0.878949728705503, + "grad_norm": 234.60589599609375, + "learning_rate": 5.043478694364423e-07, + "loss": 15.3005, + "step": 435110 + }, + { + "epoch": 0.8789699293381869, + "grad_norm": 296.8214416503906, + "learning_rate": 5.04195100973875e-07, + "loss": 15.956, + "step": 435120 + }, + { + "epoch": 0.8789901299708707, + "grad_norm": 175.46246337890625, + "learning_rate": 5.040423544233164e-07, + "loss": 15.195, + "step": 435130 + }, + { + "epoch": 0.8790103306035545, + "grad_norm": 334.2615661621094, + "learning_rate": 5.0388962978551e-07, + "loss": 8.3599, + "step": 435140 + }, + { + "epoch": 0.8790305312362383, + "grad_norm": 187.0152130126953, + "learning_rate": 5.037369270611997e-07, + "loss": 15.5271, + "step": 435150 + }, + { + "epoch": 0.8790507318689221, + "grad_norm": 99.10022735595703, + "learning_rate": 5.035842462511309e-07, + "loss": 31.9883, + "step": 435160 + }, + { + "epoch": 0.879070932501606, + "grad_norm": 145.28305053710938, + "learning_rate": 5.034315873560475e-07, + "loss": 15.9212, + "step": 435170 + }, + { + "epoch": 0.8790911331342898, + "grad_norm": 272.4645080566406, + "learning_rate": 5.032789503766922e-07, + "loss": 19.081, + "step": 435180 + }, + { + "epoch": 0.8791113337669736, + "grad_norm": 275.4917907714844, + "learning_rate": 5.031263353138105e-07, + "loss": 24.3935, + "step": 435190 + }, + { + "epoch": 0.8791315343996574, + "grad_norm": 224.77224731445312, + "learning_rate": 5.029737421681446e-07, + "loss": 26.4198, + "step": 435200 + }, + { + "epoch": 0.8791517350323412, + "grad_norm": 267.1885070800781, + "learning_rate": 5.028211709404407e-07, + "loss": 24.4465, + "step": 435210 + }, + { + "epoch": 0.879171935665025, + "grad_norm": 468.73748779296875, + "learning_rate": 5.026686216314397e-07, + "loss": 14.3564, + "step": 435220 + }, + { + "epoch": 0.8791921362977089, + "grad_norm": 160.945556640625, + "learning_rate": 5.025160942418872e-07, + "loss": 16.11, + "step": 435230 + }, + { + "epoch": 0.8792123369303927, + "grad_norm": 464.5129089355469, + "learning_rate": 5.023635887725259e-07, + "loss": 18.668, + "step": 435240 + }, + { + "epoch": 0.8792325375630765, + "grad_norm": 263.7014465332031, + "learning_rate": 5.022111052240985e-07, + "loss": 19.4779, + "step": 435250 + }, + { + "epoch": 0.8792527381957603, + "grad_norm": 104.21866607666016, + "learning_rate": 5.020586435973491e-07, + "loss": 20.5533, + "step": 435260 + }, + { + "epoch": 0.8792729388284442, + "grad_norm": 420.8669738769531, + "learning_rate": 5.019062038930195e-07, + "loss": 21.5184, + "step": 435270 + }, + { + "epoch": 0.879293139461128, + "grad_norm": 229.93014526367188, + "learning_rate": 5.017537861118543e-07, + "loss": 15.564, + "step": 435280 + }, + { + "epoch": 0.8793133400938118, + "grad_norm": 79.67237854003906, + "learning_rate": 5.016013902545957e-07, + "loss": 13.9606, + "step": 435290 + }, + { + "epoch": 0.8793335407264955, + "grad_norm": 224.75228881835938, + "learning_rate": 5.014490163219854e-07, + "loss": 18.3317, + "step": 435300 + }, + { + "epoch": 0.8793537413591793, + "grad_norm": 92.64762115478516, + "learning_rate": 5.012966643147682e-07, + "loss": 10.4126, + "step": 435310 + }, + { + "epoch": 0.8793739419918631, + "grad_norm": 328.7536926269531, + "learning_rate": 5.011443342336852e-07, + "loss": 9.2842, + "step": 435320 + }, + { + "epoch": 0.879394142624547, + "grad_norm": 136.69464111328125, + "learning_rate": 5.009920260794782e-07, + "loss": 10.1657, + "step": 435330 + }, + { + "epoch": 0.8794143432572308, + "grad_norm": 213.4883270263672, + "learning_rate": 5.008397398528903e-07, + "loss": 11.9219, + "step": 435340 + }, + { + "epoch": 0.8794345438899146, + "grad_norm": 344.8109436035156, + "learning_rate": 5.006874755546654e-07, + "loss": 16.0257, + "step": 435350 + }, + { + "epoch": 0.8794547445225984, + "grad_norm": 716.1160278320312, + "learning_rate": 5.005352331855423e-07, + "loss": 18.9226, + "step": 435360 + }, + { + "epoch": 0.8794749451552822, + "grad_norm": 349.8487854003906, + "learning_rate": 5.00383012746265e-07, + "loss": 30.1648, + "step": 435370 + }, + { + "epoch": 0.8794951457879661, + "grad_norm": 137.87156677246094, + "learning_rate": 5.002308142375762e-07, + "loss": 13.6947, + "step": 435380 + }, + { + "epoch": 0.8795153464206499, + "grad_norm": 49.19867706298828, + "learning_rate": 5.000786376602162e-07, + "loss": 13.3448, + "step": 435390 + }, + { + "epoch": 0.8795355470533337, + "grad_norm": 270.9266052246094, + "learning_rate": 4.99926483014927e-07, + "loss": 13.8228, + "step": 435400 + }, + { + "epoch": 0.8795557476860175, + "grad_norm": 215.8948516845703, + "learning_rate": 4.997743503024494e-07, + "loss": 18.7824, + "step": 435410 + }, + { + "epoch": 0.8795759483187013, + "grad_norm": 124.60919952392578, + "learning_rate": 4.996222395235283e-07, + "loss": 22.5475, + "step": 435420 + }, + { + "epoch": 0.8795961489513852, + "grad_norm": 195.76988220214844, + "learning_rate": 4.994701506789007e-07, + "loss": 16.7493, + "step": 435430 + }, + { + "epoch": 0.879616349584069, + "grad_norm": 357.6434020996094, + "learning_rate": 4.99318083769309e-07, + "loss": 13.8251, + "step": 435440 + }, + { + "epoch": 0.8796365502167528, + "grad_norm": 467.0783386230469, + "learning_rate": 4.991660387954967e-07, + "loss": 21.4056, + "step": 435450 + }, + { + "epoch": 0.8796567508494366, + "grad_norm": 240.75360107421875, + "learning_rate": 4.990140157582036e-07, + "loss": 13.7297, + "step": 435460 + }, + { + "epoch": 0.8796769514821204, + "grad_norm": 85.39241027832031, + "learning_rate": 4.988620146581685e-07, + "loss": 15.4104, + "step": 435470 + }, + { + "epoch": 0.8796971521148043, + "grad_norm": 165.2989044189453, + "learning_rate": 4.987100354961355e-07, + "loss": 15.2054, + "step": 435480 + }, + { + "epoch": 0.8797173527474881, + "grad_norm": 404.2278137207031, + "learning_rate": 4.985580782728433e-07, + "loss": 18.9878, + "step": 435490 + }, + { + "epoch": 0.8797375533801719, + "grad_norm": 328.2271423339844, + "learning_rate": 4.984061429890324e-07, + "loss": 13.1969, + "step": 435500 + }, + { + "epoch": 0.8797577540128557, + "grad_norm": 207.3946075439453, + "learning_rate": 4.98254229645444e-07, + "loss": 23.9554, + "step": 435510 + }, + { + "epoch": 0.8797779546455395, + "grad_norm": 303.2662048339844, + "learning_rate": 4.981023382428196e-07, + "loss": 26.0353, + "step": 435520 + }, + { + "epoch": 0.8797981552782234, + "grad_norm": 275.2373962402344, + "learning_rate": 4.979504687818987e-07, + "loss": 21.5372, + "step": 435530 + }, + { + "epoch": 0.8798183559109072, + "grad_norm": 281.1122131347656, + "learning_rate": 4.977986212634195e-07, + "loss": 19.5761, + "step": 435540 + }, + { + "epoch": 0.879838556543591, + "grad_norm": 407.7804260253906, + "learning_rate": 4.976467956881254e-07, + "loss": 19.3831, + "step": 435550 + }, + { + "epoch": 0.8798587571762747, + "grad_norm": 180.14730834960938, + "learning_rate": 4.97494992056754e-07, + "loss": 18.2869, + "step": 435560 + }, + { + "epoch": 0.8798789578089585, + "grad_norm": 247.82423400878906, + "learning_rate": 4.973432103700454e-07, + "loss": 26.2631, + "step": 435570 + }, + { + "epoch": 0.8798991584416423, + "grad_norm": 193.4473419189453, + "learning_rate": 4.971914506287407e-07, + "loss": 22.6933, + "step": 435580 + }, + { + "epoch": 0.8799193590743262, + "grad_norm": 227.37025451660156, + "learning_rate": 4.97039712833578e-07, + "loss": 29.4937, + "step": 435590 + }, + { + "epoch": 0.87993955970701, + "grad_norm": 331.55841064453125, + "learning_rate": 4.968879969852985e-07, + "loss": 20.8808, + "step": 435600 + }, + { + "epoch": 0.8799597603396938, + "grad_norm": 24.882539749145508, + "learning_rate": 4.967363030846406e-07, + "loss": 21.8269, + "step": 435610 + }, + { + "epoch": 0.8799799609723776, + "grad_norm": 351.8355712890625, + "learning_rate": 4.965846311323431e-07, + "loss": 24.9885, + "step": 435620 + }, + { + "epoch": 0.8800001616050614, + "grad_norm": 178.48428344726562, + "learning_rate": 4.964329811291463e-07, + "loss": 10.7361, + "step": 435630 + }, + { + "epoch": 0.8800203622377453, + "grad_norm": 39.56497573852539, + "learning_rate": 4.962813530757893e-07, + "loss": 15.5416, + "step": 435640 + }, + { + "epoch": 0.8800405628704291, + "grad_norm": 33.96110916137695, + "learning_rate": 4.961297469730097e-07, + "loss": 14.3991, + "step": 435650 + }, + { + "epoch": 0.8800607635031129, + "grad_norm": 327.0130310058594, + "learning_rate": 4.959781628215476e-07, + "loss": 18.6497, + "step": 435660 + }, + { + "epoch": 0.8800809641357967, + "grad_norm": 71.4016342163086, + "learning_rate": 4.95826600622143e-07, + "loss": 16.6944, + "step": 435670 + }, + { + "epoch": 0.8801011647684805, + "grad_norm": 6.107741355895996, + "learning_rate": 4.956750603755328e-07, + "loss": 13.4702, + "step": 435680 + }, + { + "epoch": 0.8801213654011644, + "grad_norm": 260.7806701660156, + "learning_rate": 4.95523542082455e-07, + "loss": 15.5015, + "step": 435690 + }, + { + "epoch": 0.8801415660338482, + "grad_norm": 563.1996459960938, + "learning_rate": 4.9537204574365e-07, + "loss": 25.0301, + "step": 435700 + }, + { + "epoch": 0.880161766666532, + "grad_norm": 576.7715454101562, + "learning_rate": 4.952205713598557e-07, + "loss": 22.6019, + "step": 435710 + }, + { + "epoch": 0.8801819672992158, + "grad_norm": 203.0591278076172, + "learning_rate": 4.950691189318086e-07, + "loss": 12.8879, + "step": 435720 + }, + { + "epoch": 0.8802021679318996, + "grad_norm": 191.82142639160156, + "learning_rate": 4.949176884602486e-07, + "loss": 40.915, + "step": 435730 + }, + { + "epoch": 0.8802223685645835, + "grad_norm": 185.26254272460938, + "learning_rate": 4.947662799459152e-07, + "loss": 18.2691, + "step": 435740 + }, + { + "epoch": 0.8802425691972673, + "grad_norm": 203.30308532714844, + "learning_rate": 4.946148933895423e-07, + "loss": 23.5285, + "step": 435750 + }, + { + "epoch": 0.8802627698299511, + "grad_norm": 70.34530639648438, + "learning_rate": 4.944635287918703e-07, + "loss": 9.6555, + "step": 435760 + }, + { + "epoch": 0.8802829704626349, + "grad_norm": 172.27745056152344, + "learning_rate": 4.943121861536376e-07, + "loss": 19.9147, + "step": 435770 + }, + { + "epoch": 0.8803031710953187, + "grad_norm": 197.68466186523438, + "learning_rate": 4.941608654755808e-07, + "loss": 10.9635, + "step": 435780 + }, + { + "epoch": 0.8803233717280026, + "grad_norm": 509.8621826171875, + "learning_rate": 4.940095667584366e-07, + "loss": 21.1964, + "step": 435790 + }, + { + "epoch": 0.8803435723606864, + "grad_norm": 197.72409057617188, + "learning_rate": 4.938582900029437e-07, + "loss": 20.0291, + "step": 435800 + }, + { + "epoch": 0.8803637729933701, + "grad_norm": 461.6060485839844, + "learning_rate": 4.937070352098384e-07, + "loss": 23.1458, + "step": 435810 + }, + { + "epoch": 0.8803839736260539, + "grad_norm": 458.0393981933594, + "learning_rate": 4.935558023798592e-07, + "loss": 18.0468, + "step": 435820 + }, + { + "epoch": 0.8804041742587377, + "grad_norm": 129.13357543945312, + "learning_rate": 4.934045915137419e-07, + "loss": 8.1902, + "step": 435830 + }, + { + "epoch": 0.8804243748914216, + "grad_norm": 549.9965209960938, + "learning_rate": 4.932534026122249e-07, + "loss": 11.7202, + "step": 435840 + }, + { + "epoch": 0.8804445755241054, + "grad_norm": 248.71759033203125, + "learning_rate": 4.931022356760439e-07, + "loss": 24.2159, + "step": 435850 + }, + { + "epoch": 0.8804647761567892, + "grad_norm": 260.09674072265625, + "learning_rate": 4.929510907059354e-07, + "loss": 19.5019, + "step": 435860 + }, + { + "epoch": 0.880484976789473, + "grad_norm": 16.353187561035156, + "learning_rate": 4.927999677026374e-07, + "loss": 14.9975, + "step": 435870 + }, + { + "epoch": 0.8805051774221568, + "grad_norm": 2.6839330196380615, + "learning_rate": 4.926488666668844e-07, + "loss": 12.1912, + "step": 435880 + }, + { + "epoch": 0.8805253780548407, + "grad_norm": 370.10015869140625, + "learning_rate": 4.924977875994159e-07, + "loss": 17.5826, + "step": 435890 + }, + { + "epoch": 0.8805455786875245, + "grad_norm": 219.7120819091797, + "learning_rate": 4.92346730500966e-07, + "loss": 8.7014, + "step": 435900 + }, + { + "epoch": 0.8805657793202083, + "grad_norm": 147.40560913085938, + "learning_rate": 4.921956953722701e-07, + "loss": 10.3238, + "step": 435910 + }, + { + "epoch": 0.8805859799528921, + "grad_norm": 225.93197631835938, + "learning_rate": 4.920446822140673e-07, + "loss": 10.7507, + "step": 435920 + }, + { + "epoch": 0.8806061805855759, + "grad_norm": 371.28826904296875, + "learning_rate": 4.918936910270916e-07, + "loss": 11.0804, + "step": 435930 + }, + { + "epoch": 0.8806263812182598, + "grad_norm": 154.0360565185547, + "learning_rate": 4.917427218120785e-07, + "loss": 22.1393, + "step": 435940 + }, + { + "epoch": 0.8806465818509436, + "grad_norm": 220.62680053710938, + "learning_rate": 4.915917745697645e-07, + "loss": 21.1326, + "step": 435950 + }, + { + "epoch": 0.8806667824836274, + "grad_norm": 273.08770751953125, + "learning_rate": 4.914408493008871e-07, + "loss": 26.6632, + "step": 435960 + }, + { + "epoch": 0.8806869831163112, + "grad_norm": 73.09130859375, + "learning_rate": 4.912899460061787e-07, + "loss": 13.6529, + "step": 435970 + }, + { + "epoch": 0.880707183748995, + "grad_norm": 258.6308288574219, + "learning_rate": 4.911390646863757e-07, + "loss": 11.7623, + "step": 435980 + }, + { + "epoch": 0.8807273843816789, + "grad_norm": 500.3720703125, + "learning_rate": 4.909882053422154e-07, + "loss": 17.3371, + "step": 435990 + }, + { + "epoch": 0.8807475850143627, + "grad_norm": 126.74539184570312, + "learning_rate": 4.908373679744316e-07, + "loss": 17.5183, + "step": 436000 + }, + { + "epoch": 0.8807677856470465, + "grad_norm": 88.03468322753906, + "learning_rate": 4.90686552583759e-07, + "loss": 16.3623, + "step": 436010 + }, + { + "epoch": 0.8807879862797303, + "grad_norm": 169.06326293945312, + "learning_rate": 4.905357591709325e-07, + "loss": 43.2152, + "step": 436020 + }, + { + "epoch": 0.8808081869124141, + "grad_norm": 191.628662109375, + "learning_rate": 4.9038498773669e-07, + "loss": 16.7811, + "step": 436030 + }, + { + "epoch": 0.880828387545098, + "grad_norm": 302.7685546875, + "learning_rate": 4.902342382817626e-07, + "loss": 33.722, + "step": 436040 + }, + { + "epoch": 0.8808485881777818, + "grad_norm": 218.401611328125, + "learning_rate": 4.900835108068863e-07, + "loss": 9.8418, + "step": 436050 + }, + { + "epoch": 0.8808687888104656, + "grad_norm": 253.24652099609375, + "learning_rate": 4.899328053127966e-07, + "loss": 20.1009, + "step": 436060 + }, + { + "epoch": 0.8808889894431493, + "grad_norm": 271.9893493652344, + "learning_rate": 4.89782121800228e-07, + "loss": 14.6151, + "step": 436070 + }, + { + "epoch": 0.8809091900758331, + "grad_norm": 162.2383575439453, + "learning_rate": 4.896314602699126e-07, + "loss": 15.5056, + "step": 436080 + }, + { + "epoch": 0.880929390708517, + "grad_norm": 372.5563659667969, + "learning_rate": 4.894808207225882e-07, + "loss": 22.343, + "step": 436090 + }, + { + "epoch": 0.8809495913412008, + "grad_norm": 243.0578155517578, + "learning_rate": 4.893302031589864e-07, + "loss": 14.2327, + "step": 436100 + }, + { + "epoch": 0.8809697919738846, + "grad_norm": 303.23931884765625, + "learning_rate": 4.891796075798416e-07, + "loss": 41.0122, + "step": 436110 + }, + { + "epoch": 0.8809899926065684, + "grad_norm": 368.97467041015625, + "learning_rate": 4.890290339858883e-07, + "loss": 14.1473, + "step": 436120 + }, + { + "epoch": 0.8810101932392522, + "grad_norm": 220.75567626953125, + "learning_rate": 4.888784823778614e-07, + "loss": 14.5418, + "step": 436130 + }, + { + "epoch": 0.881030393871936, + "grad_norm": 265.8623352050781, + "learning_rate": 4.887279527564936e-07, + "loss": 8.7599, + "step": 436140 + }, + { + "epoch": 0.8810505945046199, + "grad_norm": 377.706787109375, + "learning_rate": 4.885774451225178e-07, + "loss": 17.3379, + "step": 436150 + }, + { + "epoch": 0.8810707951373037, + "grad_norm": 299.96881103515625, + "learning_rate": 4.884269594766689e-07, + "loss": 17.8336, + "step": 436160 + }, + { + "epoch": 0.8810909957699875, + "grad_norm": 175.4420928955078, + "learning_rate": 4.8827649581968e-07, + "loss": 18.5962, + "step": 436170 + }, + { + "epoch": 0.8811111964026713, + "grad_norm": 351.2290344238281, + "learning_rate": 4.881260541522831e-07, + "loss": 27.2167, + "step": 436180 + }, + { + "epoch": 0.8811313970353551, + "grad_norm": 373.7426452636719, + "learning_rate": 4.87975634475214e-07, + "loss": 22.5026, + "step": 436190 + }, + { + "epoch": 0.881151597668039, + "grad_norm": 329.24462890625, + "learning_rate": 4.878252367892033e-07, + "loss": 16.8049, + "step": 436200 + }, + { + "epoch": 0.8811717983007228, + "grad_norm": 270.5513610839844, + "learning_rate": 4.87674861094986e-07, + "loss": 11.1077, + "step": 436210 + }, + { + "epoch": 0.8811919989334066, + "grad_norm": 193.58004760742188, + "learning_rate": 4.875245073932944e-07, + "loss": 10.6014, + "step": 436220 + }, + { + "epoch": 0.8812121995660904, + "grad_norm": 215.2318115234375, + "learning_rate": 4.873741756848594e-07, + "loss": 22.3199, + "step": 436230 + }, + { + "epoch": 0.8812324001987742, + "grad_norm": 210.8828125, + "learning_rate": 4.87223865970417e-07, + "loss": 12.7562, + "step": 436240 + }, + { + "epoch": 0.8812526008314581, + "grad_norm": 309.1809997558594, + "learning_rate": 4.87073578250698e-07, + "loss": 12.5274, + "step": 436250 + }, + { + "epoch": 0.8812728014641419, + "grad_norm": 217.73388671875, + "learning_rate": 4.869233125264339e-07, + "loss": 9.0682, + "step": 436260 + }, + { + "epoch": 0.8812930020968257, + "grad_norm": 276.2313537597656, + "learning_rate": 4.867730687983585e-07, + "loss": 11.0931, + "step": 436270 + }, + { + "epoch": 0.8813132027295095, + "grad_norm": 293.5137023925781, + "learning_rate": 4.866228470672041e-07, + "loss": 11.4537, + "step": 436280 + }, + { + "epoch": 0.8813334033621933, + "grad_norm": 161.2014923095703, + "learning_rate": 4.864726473337034e-07, + "loss": 20.0816, + "step": 436290 + }, + { + "epoch": 0.8813536039948772, + "grad_norm": 303.9931640625, + "learning_rate": 4.863224695985858e-07, + "loss": 18.5378, + "step": 436300 + }, + { + "epoch": 0.881373804627561, + "grad_norm": 268.39166259765625, + "learning_rate": 4.861723138625862e-07, + "loss": 16.5742, + "step": 436310 + }, + { + "epoch": 0.8813940052602448, + "grad_norm": 335.0809631347656, + "learning_rate": 4.860221801264358e-07, + "loss": 17.8246, + "step": 436320 + }, + { + "epoch": 0.8814142058929285, + "grad_norm": 234.76483154296875, + "learning_rate": 4.858720683908646e-07, + "loss": 21.3723, + "step": 436330 + }, + { + "epoch": 0.8814344065256123, + "grad_norm": 373.08587646484375, + "learning_rate": 4.857219786566053e-07, + "loss": 22.7518, + "step": 436340 + }, + { + "epoch": 0.8814546071582962, + "grad_norm": 366.5385437011719, + "learning_rate": 4.855719109243917e-07, + "loss": 22.0361, + "step": 436350 + }, + { + "epoch": 0.88147480779098, + "grad_norm": 180.44496154785156, + "learning_rate": 4.85421865194951e-07, + "loss": 26.9665, + "step": 436360 + }, + { + "epoch": 0.8814950084236638, + "grad_norm": 551.3493041992188, + "learning_rate": 4.852718414690166e-07, + "loss": 12.4436, + "step": 436370 + }, + { + "epoch": 0.8815152090563476, + "grad_norm": 233.517578125, + "learning_rate": 4.851218397473206e-07, + "loss": 34.3805, + "step": 436380 + }, + { + "epoch": 0.8815354096890314, + "grad_norm": 461.6867980957031, + "learning_rate": 4.84971860030593e-07, + "loss": 23.0374, + "step": 436390 + }, + { + "epoch": 0.8815556103217153, + "grad_norm": 128.4858856201172, + "learning_rate": 4.848219023195644e-07, + "loss": 9.9658, + "step": 436400 + }, + { + "epoch": 0.8815758109543991, + "grad_norm": 190.66598510742188, + "learning_rate": 4.846719666149668e-07, + "loss": 15.6482, + "step": 436410 + }, + { + "epoch": 0.8815960115870829, + "grad_norm": 330.7430725097656, + "learning_rate": 4.845220529175304e-07, + "loss": 16.6642, + "step": 436420 + }, + { + "epoch": 0.8816162122197667, + "grad_norm": 388.33587646484375, + "learning_rate": 4.84372161227985e-07, + "loss": 13.6561, + "step": 436430 + }, + { + "epoch": 0.8816364128524505, + "grad_norm": 1.1295737028121948, + "learning_rate": 4.842222915470618e-07, + "loss": 12.1377, + "step": 436440 + }, + { + "epoch": 0.8816566134851344, + "grad_norm": 228.8750457763672, + "learning_rate": 4.840724438754929e-07, + "loss": 21.4074, + "step": 436450 + }, + { + "epoch": 0.8816768141178182, + "grad_norm": 345.87420654296875, + "learning_rate": 4.839226182140072e-07, + "loss": 14.3642, + "step": 436460 + }, + { + "epoch": 0.881697014750502, + "grad_norm": 318.98095703125, + "learning_rate": 4.837728145633335e-07, + "loss": 20.8921, + "step": 436470 + }, + { + "epoch": 0.8817172153831858, + "grad_norm": 273.87939453125, + "learning_rate": 4.836230329242042e-07, + "loss": 21.3956, + "step": 436480 + }, + { + "epoch": 0.8817374160158696, + "grad_norm": 107.8420181274414, + "learning_rate": 4.83473273297348e-07, + "loss": 18.0669, + "step": 436490 + }, + { + "epoch": 0.8817576166485535, + "grad_norm": 210.99746704101562, + "learning_rate": 4.833235356834959e-07, + "loss": 16.3307, + "step": 436500 + }, + { + "epoch": 0.8817778172812373, + "grad_norm": 315.83880615234375, + "learning_rate": 4.831738200833775e-07, + "loss": 14.4722, + "step": 436510 + }, + { + "epoch": 0.8817980179139211, + "grad_norm": 388.5218505859375, + "learning_rate": 4.830241264977209e-07, + "loss": 19.1907, + "step": 436520 + }, + { + "epoch": 0.8818182185466049, + "grad_norm": 235.53318786621094, + "learning_rate": 4.828744549272579e-07, + "loss": 21.654, + "step": 436530 + }, + { + "epoch": 0.8818384191792887, + "grad_norm": 498.8838806152344, + "learning_rate": 4.827248053727168e-07, + "loss": 18.7685, + "step": 436540 + }, + { + "epoch": 0.8818586198119726, + "grad_norm": 276.6015319824219, + "learning_rate": 4.825751778348259e-07, + "loss": 16.0047, + "step": 436550 + }, + { + "epoch": 0.8818788204446564, + "grad_norm": 286.6764221191406, + "learning_rate": 4.824255723143162e-07, + "loss": 18.0164, + "step": 436560 + }, + { + "epoch": 0.8818990210773402, + "grad_norm": 358.30426025390625, + "learning_rate": 4.822759888119171e-07, + "loss": 14.5864, + "step": 436570 + }, + { + "epoch": 0.8819192217100239, + "grad_norm": 66.34335327148438, + "learning_rate": 4.821264273283566e-07, + "loss": 10.056, + "step": 436580 + }, + { + "epoch": 0.8819394223427077, + "grad_norm": 0.0, + "learning_rate": 4.819768878643633e-07, + "loss": 19.3382, + "step": 436590 + }, + { + "epoch": 0.8819596229753915, + "grad_norm": 172.5146484375, + "learning_rate": 4.818273704206678e-07, + "loss": 11.7529, + "step": 436600 + }, + { + "epoch": 0.8819798236080754, + "grad_norm": 97.26643371582031, + "learning_rate": 4.816778749979973e-07, + "loss": 17.0861, + "step": 436610 + }, + { + "epoch": 0.8820000242407592, + "grad_norm": 45.69021987915039, + "learning_rate": 4.815284015970801e-07, + "loss": 10.7153, + "step": 436620 + }, + { + "epoch": 0.882020224873443, + "grad_norm": 490.1202697753906, + "learning_rate": 4.813789502186456e-07, + "loss": 12.2839, + "step": 436630 + }, + { + "epoch": 0.8820404255061268, + "grad_norm": 163.06820678710938, + "learning_rate": 4.812295208634238e-07, + "loss": 19.1205, + "step": 436640 + }, + { + "epoch": 0.8820606261388106, + "grad_norm": 229.81317138671875, + "learning_rate": 4.810801135321391e-07, + "loss": 26.0161, + "step": 436650 + }, + { + "epoch": 0.8820808267714945, + "grad_norm": 256.9088439941406, + "learning_rate": 4.809307282255221e-07, + "loss": 12.7764, + "step": 436660 + }, + { + "epoch": 0.8821010274041783, + "grad_norm": 494.6328125, + "learning_rate": 4.807813649443016e-07, + "loss": 24.5318, + "step": 436670 + }, + { + "epoch": 0.8821212280368621, + "grad_norm": 661.3333740234375, + "learning_rate": 4.806320236892048e-07, + "loss": 24.8753, + "step": 436680 + }, + { + "epoch": 0.8821414286695459, + "grad_norm": 251.65919494628906, + "learning_rate": 4.804827044609578e-07, + "loss": 16.3106, + "step": 436690 + }, + { + "epoch": 0.8821616293022297, + "grad_norm": 275.04315185546875, + "learning_rate": 4.803334072602917e-07, + "loss": 14.7408, + "step": 436700 + }, + { + "epoch": 0.8821818299349136, + "grad_norm": 592.0226440429688, + "learning_rate": 4.801841320879319e-07, + "loss": 21.9463, + "step": 436710 + }, + { + "epoch": 0.8822020305675974, + "grad_norm": 362.9889831542969, + "learning_rate": 4.800348789446058e-07, + "loss": 15.9184, + "step": 436720 + }, + { + "epoch": 0.8822222312002812, + "grad_norm": 105.56087493896484, + "learning_rate": 4.798856478310409e-07, + "loss": 18.2868, + "step": 436730 + }, + { + "epoch": 0.882242431832965, + "grad_norm": 428.64385986328125, + "learning_rate": 4.797364387479664e-07, + "loss": 25.9225, + "step": 436740 + }, + { + "epoch": 0.8822626324656488, + "grad_norm": 281.4597473144531, + "learning_rate": 4.795872516961087e-07, + "loss": 12.4037, + "step": 436750 + }, + { + "epoch": 0.8822828330983327, + "grad_norm": 430.5715026855469, + "learning_rate": 4.794380866761928e-07, + "loss": 13.3098, + "step": 436760 + }, + { + "epoch": 0.8823030337310165, + "grad_norm": 285.2254638671875, + "learning_rate": 4.792889436889487e-07, + "loss": 7.0888, + "step": 436770 + }, + { + "epoch": 0.8823232343637003, + "grad_norm": 167.1251678466797, + "learning_rate": 4.791398227351024e-07, + "loss": 10.9766, + "step": 436780 + }, + { + "epoch": 0.8823434349963841, + "grad_norm": 247.575439453125, + "learning_rate": 4.789907238153785e-07, + "loss": 21.4148, + "step": 436790 + }, + { + "epoch": 0.8823636356290679, + "grad_norm": 445.097412109375, + "learning_rate": 4.788416469305068e-07, + "loss": 22.3645, + "step": 436800 + }, + { + "epoch": 0.8823838362617518, + "grad_norm": 122.95352172851562, + "learning_rate": 4.786925920812119e-07, + "loss": 12.1629, + "step": 436810 + }, + { + "epoch": 0.8824040368944356, + "grad_norm": 658.0155029296875, + "learning_rate": 4.78543559268222e-07, + "loss": 23.374, + "step": 436820 + }, + { + "epoch": 0.8824242375271194, + "grad_norm": 192.72930908203125, + "learning_rate": 4.78394548492262e-07, + "loss": 18.5223, + "step": 436830 + }, + { + "epoch": 0.8824444381598031, + "grad_norm": 466.5446472167969, + "learning_rate": 4.782455597540576e-07, + "loss": 30.9484, + "step": 436840 + }, + { + "epoch": 0.8824646387924869, + "grad_norm": 350.6367492675781, + "learning_rate": 4.780965930543369e-07, + "loss": 13.1498, + "step": 436850 + }, + { + "epoch": 0.8824848394251708, + "grad_norm": 283.62823486328125, + "learning_rate": 4.779476483938251e-07, + "loss": 13.2417, + "step": 436860 + }, + { + "epoch": 0.8825050400578546, + "grad_norm": 564.380126953125, + "learning_rate": 4.777987257732469e-07, + "loss": 20.0031, + "step": 436870 + }, + { + "epoch": 0.8825252406905384, + "grad_norm": 112.91577911376953, + "learning_rate": 4.776498251933292e-07, + "loss": 19.3378, + "step": 436880 + }, + { + "epoch": 0.8825454413232222, + "grad_norm": 480.53521728515625, + "learning_rate": 4.775009466547986e-07, + "loss": 21.5935, + "step": 436890 + }, + { + "epoch": 0.882565641955906, + "grad_norm": 91.14021301269531, + "learning_rate": 4.773520901583801e-07, + "loss": 19.3872, + "step": 436900 + }, + { + "epoch": 0.8825858425885899, + "grad_norm": 446.8163757324219, + "learning_rate": 4.772032557047984e-07, + "loss": 23.1284, + "step": 436910 + }, + { + "epoch": 0.8826060432212737, + "grad_norm": 435.9635314941406, + "learning_rate": 4.770544432947799e-07, + "loss": 15.7814, + "step": 436920 + }, + { + "epoch": 0.8826262438539575, + "grad_norm": 419.1697692871094, + "learning_rate": 4.769056529290495e-07, + "loss": 16.4134, + "step": 436930 + }, + { + "epoch": 0.8826464444866413, + "grad_norm": 353.28460693359375, + "learning_rate": 4.7675688460833145e-07, + "loss": 21.9125, + "step": 436940 + }, + { + "epoch": 0.8826666451193251, + "grad_norm": 260.09649658203125, + "learning_rate": 4.766081383333521e-07, + "loss": 28.3239, + "step": 436950 + }, + { + "epoch": 0.882686845752009, + "grad_norm": 375.9036560058594, + "learning_rate": 4.7645941410483733e-07, + "loss": 17.1259, + "step": 436960 + }, + { + "epoch": 0.8827070463846928, + "grad_norm": 186.19960021972656, + "learning_rate": 4.7631071192350943e-07, + "loss": 19.0688, + "step": 436970 + }, + { + "epoch": 0.8827272470173766, + "grad_norm": 89.21056365966797, + "learning_rate": 4.7616203179009445e-07, + "loss": 21.1033, + "step": 436980 + }, + { + "epoch": 0.8827474476500604, + "grad_norm": 96.50436401367188, + "learning_rate": 4.760133737053174e-07, + "loss": 8.5029, + "step": 436990 + }, + { + "epoch": 0.8827676482827442, + "grad_norm": 78.41710662841797, + "learning_rate": 4.758647376699033e-07, + "loss": 14.3095, + "step": 437000 + }, + { + "epoch": 0.882787848915428, + "grad_norm": 168.3367919921875, + "learning_rate": 4.757161236845742e-07, + "loss": 13.7055, + "step": 437010 + }, + { + "epoch": 0.8828080495481119, + "grad_norm": 1005.8809814453125, + "learning_rate": 4.755675317500569e-07, + "loss": 31.0843, + "step": 437020 + }, + { + "epoch": 0.8828282501807957, + "grad_norm": 252.2699432373047, + "learning_rate": 4.7541896186707517e-07, + "loss": 19.0612, + "step": 437030 + }, + { + "epoch": 0.8828484508134795, + "grad_norm": 394.3023986816406, + "learning_rate": 4.752704140363512e-07, + "loss": 25.8683, + "step": 437040 + }, + { + "epoch": 0.8828686514461633, + "grad_norm": 297.669921875, + "learning_rate": 4.751218882586106e-07, + "loss": 10.4874, + "step": 437050 + }, + { + "epoch": 0.8828888520788472, + "grad_norm": 200.75941467285156, + "learning_rate": 4.749733845345783e-07, + "loss": 17.5828, + "step": 437060 + }, + { + "epoch": 0.882909052711531, + "grad_norm": 963.4557495117188, + "learning_rate": 4.748249028649765e-07, + "loss": 22.735, + "step": 437070 + }, + { + "epoch": 0.8829292533442148, + "grad_norm": 240.87510681152344, + "learning_rate": 4.7467644325052855e-07, + "loss": 16.0939, + "step": 437080 + }, + { + "epoch": 0.8829494539768985, + "grad_norm": 223.8863525390625, + "learning_rate": 4.7452800569195987e-07, + "loss": 13.908, + "step": 437090 + }, + { + "epoch": 0.8829696546095823, + "grad_norm": 433.0531005859375, + "learning_rate": 4.743795901899928e-07, + "loss": 20.2243, + "step": 437100 + }, + { + "epoch": 0.8829898552422661, + "grad_norm": 233.21742248535156, + "learning_rate": 4.742311967453495e-07, + "loss": 14.2396, + "step": 437110 + }, + { + "epoch": 0.88301005587495, + "grad_norm": 329.5322570800781, + "learning_rate": 4.7408282535875593e-07, + "loss": 21.1513, + "step": 437120 + }, + { + "epoch": 0.8830302565076338, + "grad_norm": 52.65104293823242, + "learning_rate": 4.739344760309322e-07, + "loss": 14.8902, + "step": 437130 + }, + { + "epoch": 0.8830504571403176, + "grad_norm": 41.04833984375, + "learning_rate": 4.737861487626039e-07, + "loss": 11.3489, + "step": 437140 + }, + { + "epoch": 0.8830706577730014, + "grad_norm": 256.1357421875, + "learning_rate": 4.7363784355449303e-07, + "loss": 19.813, + "step": 437150 + }, + { + "epoch": 0.8830908584056852, + "grad_norm": 418.2151184082031, + "learning_rate": 4.734895604073214e-07, + "loss": 14.3052, + "step": 437160 + }, + { + "epoch": 0.8831110590383691, + "grad_norm": 422.72125244140625, + "learning_rate": 4.7334129932181283e-07, + "loss": 31.8319, + "step": 437170 + }, + { + "epoch": 0.8831312596710529, + "grad_norm": 189.4805145263672, + "learning_rate": 4.731930602986906e-07, + "loss": 10.4662, + "step": 437180 + }, + { + "epoch": 0.8831514603037367, + "grad_norm": 305.8288879394531, + "learning_rate": 4.730448433386764e-07, + "loss": 14.6863, + "step": 437190 + }, + { + "epoch": 0.8831716609364205, + "grad_norm": 363.3165283203125, + "learning_rate": 4.728966484424913e-07, + "loss": 27.3676, + "step": 437200 + }, + { + "epoch": 0.8831918615691043, + "grad_norm": 323.6328125, + "learning_rate": 4.727484756108602e-07, + "loss": 15.2025, + "step": 437210 + }, + { + "epoch": 0.8832120622017882, + "grad_norm": 195.5129852294922, + "learning_rate": 4.726003248445038e-07, + "loss": 17.5062, + "step": 437220 + }, + { + "epoch": 0.883232262834472, + "grad_norm": 144.901611328125, + "learning_rate": 4.724521961441436e-07, + "loss": 16.6498, + "step": 437230 + }, + { + "epoch": 0.8832524634671558, + "grad_norm": 9.249590873718262, + "learning_rate": 4.723040895105019e-07, + "loss": 19.7237, + "step": 437240 + }, + { + "epoch": 0.8832726640998396, + "grad_norm": 296.315185546875, + "learning_rate": 4.72156004944303e-07, + "loss": 14.0651, + "step": 437250 + }, + { + "epoch": 0.8832928647325234, + "grad_norm": 241.59246826171875, + "learning_rate": 4.720079424462648e-07, + "loss": 18.9439, + "step": 437260 + }, + { + "epoch": 0.8833130653652073, + "grad_norm": 283.0418701171875, + "learning_rate": 4.718599020171105e-07, + "loss": 17.934, + "step": 437270 + }, + { + "epoch": 0.8833332659978911, + "grad_norm": 101.30632781982422, + "learning_rate": 4.7171188365756235e-07, + "loss": 19.9709, + "step": 437280 + }, + { + "epoch": 0.8833534666305749, + "grad_norm": 0.0, + "learning_rate": 4.71563887368342e-07, + "loss": 9.3875, + "step": 437290 + }, + { + "epoch": 0.8833736672632587, + "grad_norm": 20.22136688232422, + "learning_rate": 4.714159131501689e-07, + "loss": 7.5542, + "step": 437300 + }, + { + "epoch": 0.8833938678959425, + "grad_norm": 229.6137237548828, + "learning_rate": 4.7126796100376625e-07, + "loss": 15.3226, + "step": 437310 + }, + { + "epoch": 0.8834140685286264, + "grad_norm": 3.1302549839019775, + "learning_rate": 4.7112003092985414e-07, + "loss": 14.4668, + "step": 437320 + }, + { + "epoch": 0.8834342691613102, + "grad_norm": 221.1377716064453, + "learning_rate": 4.7097212292915307e-07, + "loss": 16.8592, + "step": 437330 + }, + { + "epoch": 0.883454469793994, + "grad_norm": 258.5196533203125, + "learning_rate": 4.7082423700238413e-07, + "loss": 13.0835, + "step": 437340 + }, + { + "epoch": 0.8834746704266777, + "grad_norm": 318.5444030761719, + "learning_rate": 4.7067637315027005e-07, + "loss": 12.993, + "step": 437350 + }, + { + "epoch": 0.8834948710593615, + "grad_norm": 204.92083740234375, + "learning_rate": 4.705285313735297e-07, + "loss": 9.7958, + "step": 437360 + }, + { + "epoch": 0.8835150716920454, + "grad_norm": 429.11932373046875, + "learning_rate": 4.703807116728831e-07, + "loss": 14.774, + "step": 437370 + }, + { + "epoch": 0.8835352723247292, + "grad_norm": 182.92518615722656, + "learning_rate": 4.7023291404905245e-07, + "loss": 12.807, + "step": 437380 + }, + { + "epoch": 0.883555472957413, + "grad_norm": 0.0, + "learning_rate": 4.700851385027566e-07, + "loss": 15.9494, + "step": 437390 + }, + { + "epoch": 0.8835756735900968, + "grad_norm": 277.5395812988281, + "learning_rate": 4.699373850347161e-07, + "loss": 17.1926, + "step": 437400 + }, + { + "epoch": 0.8835958742227806, + "grad_norm": 423.8408508300781, + "learning_rate": 4.69789653645652e-07, + "loss": 20.0163, + "step": 437410 + }, + { + "epoch": 0.8836160748554645, + "grad_norm": 439.76751708984375, + "learning_rate": 4.6964194433628317e-07, + "loss": 11.1602, + "step": 437420 + }, + { + "epoch": 0.8836362754881483, + "grad_norm": 369.44580078125, + "learning_rate": 4.6949425710733076e-07, + "loss": 8.9738, + "step": 437430 + }, + { + "epoch": 0.8836564761208321, + "grad_norm": 239.2298126220703, + "learning_rate": 4.693465919595136e-07, + "loss": 10.1647, + "step": 437440 + }, + { + "epoch": 0.8836766767535159, + "grad_norm": 292.31805419921875, + "learning_rate": 4.691989488935511e-07, + "loss": 16.8424, + "step": 437450 + }, + { + "epoch": 0.8836968773861997, + "grad_norm": 199.2280731201172, + "learning_rate": 4.690513279101638e-07, + "loss": 13.3979, + "step": 437460 + }, + { + "epoch": 0.8837170780188836, + "grad_norm": 0.0, + "learning_rate": 4.689037290100712e-07, + "loss": 16.1303, + "step": 437470 + }, + { + "epoch": 0.8837372786515674, + "grad_norm": 319.63580322265625, + "learning_rate": 4.687561521939915e-07, + "loss": 14.6304, + "step": 437480 + }, + { + "epoch": 0.8837574792842512, + "grad_norm": 86.21988677978516, + "learning_rate": 4.686085974626442e-07, + "loss": 16.5076, + "step": 437490 + }, + { + "epoch": 0.883777679916935, + "grad_norm": 683.4754638671875, + "learning_rate": 4.6846106481675035e-07, + "loss": 36.1261, + "step": 437500 + }, + { + "epoch": 0.8837978805496188, + "grad_norm": 14.934298515319824, + "learning_rate": 4.683135542570277e-07, + "loss": 14.7263, + "step": 437510 + }, + { + "epoch": 0.8838180811823027, + "grad_norm": 414.68798828125, + "learning_rate": 4.681660657841941e-07, + "loss": 15.5596, + "step": 437520 + }, + { + "epoch": 0.8838382818149865, + "grad_norm": 350.0975341796875, + "learning_rate": 4.6801859939896997e-07, + "loss": 20.273, + "step": 437530 + }, + { + "epoch": 0.8838584824476703, + "grad_norm": 189.8002471923828, + "learning_rate": 4.678711551020743e-07, + "loss": 19.346, + "step": 437540 + }, + { + "epoch": 0.8838786830803541, + "grad_norm": 1298.6451416015625, + "learning_rate": 4.677237328942236e-07, + "loss": 12.3551, + "step": 437550 + }, + { + "epoch": 0.8838988837130379, + "grad_norm": 343.4154357910156, + "learning_rate": 4.6757633277613734e-07, + "loss": 14.7542, + "step": 437560 + }, + { + "epoch": 0.8839190843457218, + "grad_norm": 411.26324462890625, + "learning_rate": 4.674289547485367e-07, + "loss": 18.4803, + "step": 437570 + }, + { + "epoch": 0.8839392849784056, + "grad_norm": 543.5770874023438, + "learning_rate": 4.672815988121354e-07, + "loss": 20.6617, + "step": 437580 + }, + { + "epoch": 0.8839594856110894, + "grad_norm": 30.991418838500977, + "learning_rate": 4.6713426496765413e-07, + "loss": 19.4316, + "step": 437590 + }, + { + "epoch": 0.8839796862437731, + "grad_norm": 0.0, + "learning_rate": 4.6698695321581165e-07, + "loss": 20.5669, + "step": 437600 + }, + { + "epoch": 0.8839998868764569, + "grad_norm": 265.6228942871094, + "learning_rate": 4.6683966355732466e-07, + "loss": 12.0727, + "step": 437610 + }, + { + "epoch": 0.8840200875091407, + "grad_norm": 168.62765502929688, + "learning_rate": 4.6669239599291093e-07, + "loss": 20.7217, + "step": 437620 + }, + { + "epoch": 0.8840402881418246, + "grad_norm": 334.17340087890625, + "learning_rate": 4.665451505232882e-07, + "loss": 15.5412, + "step": 437630 + }, + { + "epoch": 0.8840604887745084, + "grad_norm": 299.30450439453125, + "learning_rate": 4.663979271491764e-07, + "loss": 21.0342, + "step": 437640 + }, + { + "epoch": 0.8840806894071922, + "grad_norm": 317.87091064453125, + "learning_rate": 4.662507258712895e-07, + "loss": 11.1997, + "step": 437650 + }, + { + "epoch": 0.884100890039876, + "grad_norm": 528.5407104492188, + "learning_rate": 4.6610354669034686e-07, + "loss": 24.1234, + "step": 437660 + }, + { + "epoch": 0.8841210906725598, + "grad_norm": 506.4310302734375, + "learning_rate": 4.6595638960706624e-07, + "loss": 18.4001, + "step": 437670 + }, + { + "epoch": 0.8841412913052437, + "grad_norm": 547.1437377929688, + "learning_rate": 4.6580925462216487e-07, + "loss": 21.1345, + "step": 437680 + }, + { + "epoch": 0.8841614919379275, + "grad_norm": 210.5221710205078, + "learning_rate": 4.656621417363577e-07, + "loss": 31.4187, + "step": 437690 + }, + { + "epoch": 0.8841816925706113, + "grad_norm": 25.963443756103516, + "learning_rate": 4.655150509503642e-07, + "loss": 7.2942, + "step": 437700 + }, + { + "epoch": 0.8842018932032951, + "grad_norm": 337.7308044433594, + "learning_rate": 4.65367982264901e-07, + "loss": 12.6394, + "step": 437710 + }, + { + "epoch": 0.8842220938359789, + "grad_norm": 339.14752197265625, + "learning_rate": 4.6522093568068307e-07, + "loss": 18.168, + "step": 437720 + }, + { + "epoch": 0.8842422944686628, + "grad_norm": 231.02685546875, + "learning_rate": 4.650739111984287e-07, + "loss": 16.6801, + "step": 437730 + }, + { + "epoch": 0.8842624951013466, + "grad_norm": 349.98480224609375, + "learning_rate": 4.649269088188535e-07, + "loss": 12.3236, + "step": 437740 + }, + { + "epoch": 0.8842826957340304, + "grad_norm": 234.3077850341797, + "learning_rate": 4.647799285426757e-07, + "loss": 18.8477, + "step": 437750 + }, + { + "epoch": 0.8843028963667142, + "grad_norm": 482.90771484375, + "learning_rate": 4.646329703706104e-07, + "loss": 38.1848, + "step": 437760 + }, + { + "epoch": 0.884323096999398, + "grad_norm": 187.89315795898438, + "learning_rate": 4.644860343033725e-07, + "loss": 8.9286, + "step": 437770 + }, + { + "epoch": 0.8843432976320819, + "grad_norm": 86.51445770263672, + "learning_rate": 4.6433912034168083e-07, + "loss": 10.5808, + "step": 437780 + }, + { + "epoch": 0.8843634982647657, + "grad_norm": 283.5401611328125, + "learning_rate": 4.6419222848624933e-07, + "loss": 13.4795, + "step": 437790 + }, + { + "epoch": 0.8843836988974495, + "grad_norm": 356.6964416503906, + "learning_rate": 4.640453587377958e-07, + "loss": 17.2985, + "step": 437800 + }, + { + "epoch": 0.8844038995301333, + "grad_norm": 459.8682861328125, + "learning_rate": 4.63898511097034e-07, + "loss": 19.477, + "step": 437810 + }, + { + "epoch": 0.8844241001628171, + "grad_norm": 171.66075134277344, + "learning_rate": 4.6375168556468175e-07, + "loss": 24.7088, + "step": 437820 + }, + { + "epoch": 0.884444300795501, + "grad_norm": 596.6193237304688, + "learning_rate": 4.636048821414535e-07, + "loss": 16.6327, + "step": 437830 + }, + { + "epoch": 0.8844645014281848, + "grad_norm": 359.46356201171875, + "learning_rate": 4.6345810082806363e-07, + "loss": 17.6766, + "step": 437840 + }, + { + "epoch": 0.8844847020608686, + "grad_norm": 394.1361083984375, + "learning_rate": 4.6331134162522994e-07, + "loss": 14.17, + "step": 437850 + }, + { + "epoch": 0.8845049026935523, + "grad_norm": 244.41360473632812, + "learning_rate": 4.631646045336663e-07, + "loss": 15.9952, + "step": 437860 + }, + { + "epoch": 0.8845251033262361, + "grad_norm": 228.95028686523438, + "learning_rate": 4.6301788955408765e-07, + "loss": 22.1281, + "step": 437870 + }, + { + "epoch": 0.88454530395892, + "grad_norm": 470.039306640625, + "learning_rate": 4.62871196687209e-07, + "loss": 18.5186, + "step": 437880 + }, + { + "epoch": 0.8845655045916038, + "grad_norm": 213.72068786621094, + "learning_rate": 4.6272452593374763e-07, + "loss": 15.4759, + "step": 437890 + }, + { + "epoch": 0.8845857052242876, + "grad_norm": 152.71107482910156, + "learning_rate": 4.625778772944156e-07, + "loss": 16.1006, + "step": 437900 + }, + { + "epoch": 0.8846059058569714, + "grad_norm": 114.55770111083984, + "learning_rate": 4.6243125076992857e-07, + "loss": 9.6908, + "step": 437910 + }, + { + "epoch": 0.8846261064896552, + "grad_norm": 112.05704498291016, + "learning_rate": 4.62284646361002e-07, + "loss": 13.8483, + "step": 437920 + }, + { + "epoch": 0.884646307122339, + "grad_norm": 312.3384094238281, + "learning_rate": 4.6213806406834926e-07, + "loss": 24.441, + "step": 437930 + }, + { + "epoch": 0.8846665077550229, + "grad_norm": 289.9527587890625, + "learning_rate": 4.6199150389268476e-07, + "loss": 13.0452, + "step": 437940 + }, + { + "epoch": 0.8846867083877067, + "grad_norm": 366.7112731933594, + "learning_rate": 4.6184496583472293e-07, + "loss": 32.1492, + "step": 437950 + }, + { + "epoch": 0.8847069090203905, + "grad_norm": 31.300445556640625, + "learning_rate": 4.616984498951793e-07, + "loss": 11.4439, + "step": 437960 + }, + { + "epoch": 0.8847271096530743, + "grad_norm": 639.13623046875, + "learning_rate": 4.6155195607476723e-07, + "loss": 12.9683, + "step": 437970 + }, + { + "epoch": 0.8847473102857581, + "grad_norm": 304.33856201171875, + "learning_rate": 4.614054843741994e-07, + "loss": 10.8003, + "step": 437980 + }, + { + "epoch": 0.884767510918442, + "grad_norm": 62.580081939697266, + "learning_rate": 4.61259034794192e-07, + "loss": 12.7833, + "step": 437990 + }, + { + "epoch": 0.8847877115511258, + "grad_norm": 411.7159118652344, + "learning_rate": 4.6111260733545714e-07, + "loss": 29.6107, + "step": 438000 + }, + { + "epoch": 0.8848079121838096, + "grad_norm": 129.64031982421875, + "learning_rate": 4.6096620199870824e-07, + "loss": 9.6474, + "step": 438010 + }, + { + "epoch": 0.8848281128164934, + "grad_norm": 163.2689208984375, + "learning_rate": 4.6081981878466077e-07, + "loss": 16.459, + "step": 438020 + }, + { + "epoch": 0.8848483134491772, + "grad_norm": 327.9782409667969, + "learning_rate": 4.606734576940253e-07, + "loss": 17.2054, + "step": 438030 + }, + { + "epoch": 0.8848685140818611, + "grad_norm": 0.0, + "learning_rate": 4.6052711872751843e-07, + "loss": 9.4346, + "step": 438040 + }, + { + "epoch": 0.8848887147145449, + "grad_norm": 242.32127380371094, + "learning_rate": 4.6038080188585135e-07, + "loss": 22.6262, + "step": 438050 + }, + { + "epoch": 0.8849089153472287, + "grad_norm": 167.4984893798828, + "learning_rate": 4.602345071697373e-07, + "loss": 12.9228, + "step": 438060 + }, + { + "epoch": 0.8849291159799125, + "grad_norm": 128.27276611328125, + "learning_rate": 4.600882345798902e-07, + "loss": 23.6155, + "step": 438070 + }, + { + "epoch": 0.8849493166125963, + "grad_norm": 116.59937286376953, + "learning_rate": 4.599419841170216e-07, + "loss": 14.5997, + "step": 438080 + }, + { + "epoch": 0.8849695172452802, + "grad_norm": 353.3071594238281, + "learning_rate": 4.5979575578184554e-07, + "loss": 18.4459, + "step": 438090 + }, + { + "epoch": 0.884989717877964, + "grad_norm": 451.44781494140625, + "learning_rate": 4.5964954957507414e-07, + "loss": 24.7281, + "step": 438100 + }, + { + "epoch": 0.8850099185106478, + "grad_norm": 559.2554321289062, + "learning_rate": 4.595033654974207e-07, + "loss": 12.8119, + "step": 438110 + }, + { + "epoch": 0.8850301191433315, + "grad_norm": 220.00465393066406, + "learning_rate": 4.593572035495969e-07, + "loss": 15.8552, + "step": 438120 + }, + { + "epoch": 0.8850503197760153, + "grad_norm": 163.78689575195312, + "learning_rate": 4.592110637323149e-07, + "loss": 9.1616, + "step": 438130 + }, + { + "epoch": 0.8850705204086992, + "grad_norm": 330.65069580078125, + "learning_rate": 4.5906494604628816e-07, + "loss": 17.3459, + "step": 438140 + }, + { + "epoch": 0.885090721041383, + "grad_norm": 313.33856201171875, + "learning_rate": 4.5891885049222815e-07, + "loss": 10.0967, + "step": 438150 + }, + { + "epoch": 0.8851109216740668, + "grad_norm": 459.80084228515625, + "learning_rate": 4.587727770708461e-07, + "loss": 12.1347, + "step": 438160 + }, + { + "epoch": 0.8851311223067506, + "grad_norm": 378.82647705078125, + "learning_rate": 4.5862672578285475e-07, + "loss": 18.2424, + "step": 438170 + }, + { + "epoch": 0.8851513229394344, + "grad_norm": 377.5517272949219, + "learning_rate": 4.5848069662896786e-07, + "loss": 18.4155, + "step": 438180 + }, + { + "epoch": 0.8851715235721183, + "grad_norm": 230.7869110107422, + "learning_rate": 4.5833468960989333e-07, + "loss": 20.4322, + "step": 438190 + }, + { + "epoch": 0.8851917242048021, + "grad_norm": 185.71571350097656, + "learning_rate": 4.581887047263445e-07, + "loss": 36.8069, + "step": 438200 + }, + { + "epoch": 0.8852119248374859, + "grad_norm": 222.73947143554688, + "learning_rate": 4.5804274197903396e-07, + "loss": 10.4, + "step": 438210 + }, + { + "epoch": 0.8852321254701697, + "grad_norm": 309.8128356933594, + "learning_rate": 4.5789680136867245e-07, + "loss": 13.4804, + "step": 438220 + }, + { + "epoch": 0.8852523261028535, + "grad_norm": 131.28533935546875, + "learning_rate": 4.577508828959698e-07, + "loss": 11.2862, + "step": 438230 + }, + { + "epoch": 0.8852725267355374, + "grad_norm": 464.95977783203125, + "learning_rate": 4.5760498656163886e-07, + "loss": 29.445, + "step": 438240 + }, + { + "epoch": 0.8852927273682212, + "grad_norm": 556.691650390625, + "learning_rate": 4.5745911236639186e-07, + "loss": 24.9435, + "step": 438250 + }, + { + "epoch": 0.885312928000905, + "grad_norm": 409.10546875, + "learning_rate": 4.5731326031093645e-07, + "loss": 18.3854, + "step": 438260 + }, + { + "epoch": 0.8853331286335888, + "grad_norm": 270.2962341308594, + "learning_rate": 4.57167430395985e-07, + "loss": 17.4197, + "step": 438270 + }, + { + "epoch": 0.8853533292662726, + "grad_norm": 513.6214599609375, + "learning_rate": 4.5702162262224957e-07, + "loss": 16.8293, + "step": 438280 + }, + { + "epoch": 0.8853735298989565, + "grad_norm": 317.1377868652344, + "learning_rate": 4.5687583699044027e-07, + "loss": 19.0154, + "step": 438290 + }, + { + "epoch": 0.8853937305316403, + "grad_norm": 459.9891052246094, + "learning_rate": 4.567300735012653e-07, + "loss": 17.5073, + "step": 438300 + }, + { + "epoch": 0.8854139311643241, + "grad_norm": 335.3582458496094, + "learning_rate": 4.565843321554386e-07, + "loss": 7.7538, + "step": 438310 + }, + { + "epoch": 0.8854341317970079, + "grad_norm": 335.785400390625, + "learning_rate": 4.5643861295366854e-07, + "loss": 21.6278, + "step": 438320 + }, + { + "epoch": 0.8854543324296917, + "grad_norm": 595.5294799804688, + "learning_rate": 4.562929158966645e-07, + "loss": 19.6184, + "step": 438330 + }, + { + "epoch": 0.8854745330623756, + "grad_norm": 570.8287963867188, + "learning_rate": 4.561472409851386e-07, + "loss": 28.1183, + "step": 438340 + }, + { + "epoch": 0.8854947336950594, + "grad_norm": 185.64492797851562, + "learning_rate": 4.5600158821979933e-07, + "loss": 20.9304, + "step": 438350 + }, + { + "epoch": 0.8855149343277432, + "grad_norm": 22.53156280517578, + "learning_rate": 4.5585595760135825e-07, + "loss": 12.0694, + "step": 438360 + }, + { + "epoch": 0.8855351349604269, + "grad_norm": 179.72341918945312, + "learning_rate": 4.557103491305237e-07, + "loss": 23.1451, + "step": 438370 + }, + { + "epoch": 0.8855553355931107, + "grad_norm": 263.9036865234375, + "learning_rate": 4.555647628080051e-07, + "loss": 8.5029, + "step": 438380 + }, + { + "epoch": 0.8855755362257945, + "grad_norm": 118.56056213378906, + "learning_rate": 4.554191986345136e-07, + "loss": 18.9355, + "step": 438390 + }, + { + "epoch": 0.8855957368584784, + "grad_norm": 185.22677612304688, + "learning_rate": 4.552736566107563e-07, + "loss": 10.1332, + "step": 438400 + }, + { + "epoch": 0.8856159374911622, + "grad_norm": 14.288604736328125, + "learning_rate": 4.551281367374455e-07, + "loss": 8.2533, + "step": 438410 + }, + { + "epoch": 0.885636138123846, + "grad_norm": 175.18568420410156, + "learning_rate": 4.5498263901528784e-07, + "loss": 13.3607, + "step": 438420 + }, + { + "epoch": 0.8856563387565298, + "grad_norm": 251.22047424316406, + "learning_rate": 4.548371634449944e-07, + "loss": 12.3873, + "step": 438430 + }, + { + "epoch": 0.8856765393892136, + "grad_norm": 319.089111328125, + "learning_rate": 4.546917100272735e-07, + "loss": 8.8985, + "step": 438440 + }, + { + "epoch": 0.8856967400218975, + "grad_norm": 176.71875, + "learning_rate": 4.5454627876283295e-07, + "loss": 18.8316, + "step": 438450 + }, + { + "epoch": 0.8857169406545813, + "grad_norm": 354.1561279296875, + "learning_rate": 4.5440086965238326e-07, + "loss": 13.3039, + "step": 438460 + }, + { + "epoch": 0.8857371412872651, + "grad_norm": 706.0140991210938, + "learning_rate": 4.542554826966328e-07, + "loss": 14.5978, + "step": 438470 + }, + { + "epoch": 0.8857573419199489, + "grad_norm": 330.8226623535156, + "learning_rate": 4.541101178962887e-07, + "loss": 20.6027, + "step": 438480 + }, + { + "epoch": 0.8857775425526327, + "grad_norm": 191.33779907226562, + "learning_rate": 4.539647752520604e-07, + "loss": 14.3021, + "step": 438490 + }, + { + "epoch": 0.8857977431853166, + "grad_norm": 78.56890869140625, + "learning_rate": 4.538194547646574e-07, + "loss": 31.2673, + "step": 438500 + }, + { + "epoch": 0.8858179438180004, + "grad_norm": 304.31842041015625, + "learning_rate": 4.5367415643478683e-07, + "loss": 28.3919, + "step": 438510 + }, + { + "epoch": 0.8858381444506842, + "grad_norm": 619.509033203125, + "learning_rate": 4.5352888026315654e-07, + "loss": 26.8117, + "step": 438520 + }, + { + "epoch": 0.885858345083368, + "grad_norm": 136.1430206298828, + "learning_rate": 4.533836262504759e-07, + "loss": 15.978, + "step": 438530 + }, + { + "epoch": 0.8858785457160518, + "grad_norm": 127.50112915039062, + "learning_rate": 4.5323839439745163e-07, + "loss": 43.1338, + "step": 438540 + }, + { + "epoch": 0.8858987463487357, + "grad_norm": 525.2925415039062, + "learning_rate": 4.5309318470479144e-07, + "loss": 19.3131, + "step": 438550 + }, + { + "epoch": 0.8859189469814195, + "grad_norm": 124.81487274169922, + "learning_rate": 4.529479971732031e-07, + "loss": 15.0068, + "step": 438560 + }, + { + "epoch": 0.8859391476141033, + "grad_norm": 440.443603515625, + "learning_rate": 4.528028318033961e-07, + "loss": 18.4901, + "step": 438570 + }, + { + "epoch": 0.8859593482467871, + "grad_norm": 203.86892700195312, + "learning_rate": 4.526576885960765e-07, + "loss": 12.414, + "step": 438580 + }, + { + "epoch": 0.885979548879471, + "grad_norm": 482.45123291015625, + "learning_rate": 4.5251256755195093e-07, + "loss": 15.0141, + "step": 438590 + }, + { + "epoch": 0.8859997495121548, + "grad_norm": 397.09820556640625, + "learning_rate": 4.523674686717283e-07, + "loss": 14.082, + "step": 438600 + }, + { + "epoch": 0.8860199501448386, + "grad_norm": 184.38796997070312, + "learning_rate": 4.522223919561153e-07, + "loss": 8.2287, + "step": 438610 + }, + { + "epoch": 0.8860401507775224, + "grad_norm": 179.822509765625, + "learning_rate": 4.520773374058179e-07, + "loss": 12.3596, + "step": 438620 + }, + { + "epoch": 0.8860603514102061, + "grad_norm": 267.5096435546875, + "learning_rate": 4.519323050215446e-07, + "loss": 9.7496, + "step": 438630 + }, + { + "epoch": 0.8860805520428899, + "grad_norm": 1529.614013671875, + "learning_rate": 4.5178729480400084e-07, + "loss": 43.2379, + "step": 438640 + }, + { + "epoch": 0.8861007526755738, + "grad_norm": 303.8629455566406, + "learning_rate": 4.51642306753895e-07, + "loss": 11.7304, + "step": 438650 + }, + { + "epoch": 0.8861209533082576, + "grad_norm": 57.34650421142578, + "learning_rate": 4.514973408719331e-07, + "loss": 19.2086, + "step": 438660 + }, + { + "epoch": 0.8861411539409414, + "grad_norm": 291.76080322265625, + "learning_rate": 4.513523971588202e-07, + "loss": 14.6775, + "step": 438670 + }, + { + "epoch": 0.8861613545736252, + "grad_norm": 784.0736694335938, + "learning_rate": 4.512074756152651e-07, + "loss": 80.4675, + "step": 438680 + }, + { + "epoch": 0.886181555206309, + "grad_norm": 216.25804138183594, + "learning_rate": 4.5106257624197237e-07, + "loss": 16.916, + "step": 438690 + }, + { + "epoch": 0.8862017558389929, + "grad_norm": 420.09442138671875, + "learning_rate": 4.5091769903964965e-07, + "loss": 18.0371, + "step": 438700 + }, + { + "epoch": 0.8862219564716767, + "grad_norm": 21.776302337646484, + "learning_rate": 4.5077284400900147e-07, + "loss": 33.2878, + "step": 438710 + }, + { + "epoch": 0.8862421571043605, + "grad_norm": 222.6472625732422, + "learning_rate": 4.5062801115073607e-07, + "loss": 17.3187, + "step": 438720 + }, + { + "epoch": 0.8862623577370443, + "grad_norm": 164.23194885253906, + "learning_rate": 4.504832004655574e-07, + "loss": 10.0259, + "step": 438730 + }, + { + "epoch": 0.8862825583697281, + "grad_norm": 326.33038330078125, + "learning_rate": 4.503384119541709e-07, + "loss": 12.61, + "step": 438740 + }, + { + "epoch": 0.886302759002412, + "grad_norm": 243.9933624267578, + "learning_rate": 4.501936456172845e-07, + "loss": 24.4791, + "step": 438750 + }, + { + "epoch": 0.8863229596350958, + "grad_norm": 432.9656677246094, + "learning_rate": 4.50048901455602e-07, + "loss": 19.3517, + "step": 438760 + }, + { + "epoch": 0.8863431602677796, + "grad_norm": 174.36346435546875, + "learning_rate": 4.4990417946982836e-07, + "loss": 9.2157, + "step": 438770 + }, + { + "epoch": 0.8863633609004634, + "grad_norm": 803.330322265625, + "learning_rate": 4.4975947966067023e-07, + "loss": 30.4556, + "step": 438780 + }, + { + "epoch": 0.8863835615331472, + "grad_norm": 331.22705078125, + "learning_rate": 4.4961480202883434e-07, + "loss": 14.0299, + "step": 438790 + }, + { + "epoch": 0.886403762165831, + "grad_norm": 437.1974182128906, + "learning_rate": 4.494701465750217e-07, + "loss": 25.7276, + "step": 438800 + }, + { + "epoch": 0.8864239627985149, + "grad_norm": 129.12181091308594, + "learning_rate": 4.4932551329994023e-07, + "loss": 12.1632, + "step": 438810 + }, + { + "epoch": 0.8864441634311987, + "grad_norm": 211.15261840820312, + "learning_rate": 4.4918090220429476e-07, + "loss": 10.5796, + "step": 438820 + }, + { + "epoch": 0.8864643640638825, + "grad_norm": 124.55679321289062, + "learning_rate": 4.490363132887904e-07, + "loss": 10.6447, + "step": 438830 + }, + { + "epoch": 0.8864845646965663, + "grad_norm": 690.0352783203125, + "learning_rate": 4.4889174655412924e-07, + "loss": 19.7262, + "step": 438840 + }, + { + "epoch": 0.8865047653292502, + "grad_norm": 204.7061004638672, + "learning_rate": 4.487472020010181e-07, + "loss": 9.3996, + "step": 438850 + }, + { + "epoch": 0.886524965961934, + "grad_norm": 252.89378356933594, + "learning_rate": 4.4860267963016293e-07, + "loss": 13.7279, + "step": 438860 + }, + { + "epoch": 0.8865451665946178, + "grad_norm": 115.66488647460938, + "learning_rate": 4.484581794422643e-07, + "loss": 9.3315, + "step": 438870 + }, + { + "epoch": 0.8865653672273015, + "grad_norm": 881.0204467773438, + "learning_rate": 4.48313701438029e-07, + "loss": 21.6291, + "step": 438880 + }, + { + "epoch": 0.8865855678599853, + "grad_norm": 255.3397979736328, + "learning_rate": 4.4816924561816076e-07, + "loss": 16.0989, + "step": 438890 + }, + { + "epoch": 0.8866057684926691, + "grad_norm": 393.5469055175781, + "learning_rate": 4.480248119833641e-07, + "loss": 12.8053, + "step": 438900 + }, + { + "epoch": 0.886625969125353, + "grad_norm": 118.80579376220703, + "learning_rate": 4.4788040053434124e-07, + "loss": 14.7572, + "step": 438910 + }, + { + "epoch": 0.8866461697580368, + "grad_norm": 154.90057373046875, + "learning_rate": 4.477360112717982e-07, + "loss": 20.8149, + "step": 438920 + }, + { + "epoch": 0.8866663703907206, + "grad_norm": 412.5389709472656, + "learning_rate": 4.475916441964379e-07, + "loss": 18.8692, + "step": 438930 + }, + { + "epoch": 0.8866865710234044, + "grad_norm": 312.3932189941406, + "learning_rate": 4.474472993089629e-07, + "loss": 15.959, + "step": 438940 + }, + { + "epoch": 0.8867067716560882, + "grad_norm": 548.4207153320312, + "learning_rate": 4.473029766100784e-07, + "loss": 36.9365, + "step": 438950 + }, + { + "epoch": 0.8867269722887721, + "grad_norm": 67.55258178710938, + "learning_rate": 4.471586761004859e-07, + "loss": 6.5442, + "step": 438960 + }, + { + "epoch": 0.8867471729214559, + "grad_norm": 121.97498321533203, + "learning_rate": 4.4701439778089105e-07, + "loss": 18.2159, + "step": 438970 + }, + { + "epoch": 0.8867673735541397, + "grad_norm": 12.259499549865723, + "learning_rate": 4.4687014165199547e-07, + "loss": 11.6851, + "step": 438980 + }, + { + "epoch": 0.8867875741868235, + "grad_norm": 229.6267547607422, + "learning_rate": 4.46725907714502e-07, + "loss": 31.3248, + "step": 438990 + }, + { + "epoch": 0.8868077748195073, + "grad_norm": 301.18377685546875, + "learning_rate": 4.4658169596911493e-07, + "loss": 18.4136, + "step": 439000 + }, + { + "epoch": 0.8868279754521912, + "grad_norm": 295.84893798828125, + "learning_rate": 4.464375064165355e-07, + "loss": 17.0115, + "step": 439010 + }, + { + "epoch": 0.886848176084875, + "grad_norm": 521.0673217773438, + "learning_rate": 4.4629333905746864e-07, + "loss": 20.9325, + "step": 439020 + }, + { + "epoch": 0.8868683767175588, + "grad_norm": 626.5892333984375, + "learning_rate": 4.461491938926144e-07, + "loss": 23.6057, + "step": 439030 + }, + { + "epoch": 0.8868885773502426, + "grad_norm": 107.52507781982422, + "learning_rate": 4.4600507092267767e-07, + "loss": 14.1313, + "step": 439040 + }, + { + "epoch": 0.8869087779829264, + "grad_norm": 322.2767639160156, + "learning_rate": 4.4586097014836017e-07, + "loss": 9.7177, + "step": 439050 + }, + { + "epoch": 0.8869289786156103, + "grad_norm": 217.96405029296875, + "learning_rate": 4.4571689157036244e-07, + "loss": 9.5048, + "step": 439060 + }, + { + "epoch": 0.8869491792482941, + "grad_norm": 83.07560729980469, + "learning_rate": 4.455728351893895e-07, + "loss": 18.3758, + "step": 439070 + }, + { + "epoch": 0.8869693798809779, + "grad_norm": 870.941650390625, + "learning_rate": 4.454288010061425e-07, + "loss": 33.5819, + "step": 439080 + }, + { + "epoch": 0.8869895805136617, + "grad_norm": 263.697021484375, + "learning_rate": 4.4528478902132187e-07, + "loss": 11.3598, + "step": 439090 + }, + { + "epoch": 0.8870097811463455, + "grad_norm": 109.19169616699219, + "learning_rate": 4.4514079923563103e-07, + "loss": 16.8128, + "step": 439100 + }, + { + "epoch": 0.8870299817790294, + "grad_norm": 266.2052001953125, + "learning_rate": 4.449968316497721e-07, + "loss": 13.9623, + "step": 439110 + }, + { + "epoch": 0.8870501824117132, + "grad_norm": 316.3713073730469, + "learning_rate": 4.448528862644458e-07, + "loss": 11.0006, + "step": 439120 + }, + { + "epoch": 0.887070383044397, + "grad_norm": 274.6880187988281, + "learning_rate": 4.447089630803536e-07, + "loss": 25.449, + "step": 439130 + }, + { + "epoch": 0.8870905836770807, + "grad_norm": 915.1109619140625, + "learning_rate": 4.445650620981984e-07, + "loss": 16.723, + "step": 439140 + }, + { + "epoch": 0.8871107843097645, + "grad_norm": 226.83387756347656, + "learning_rate": 4.444211833186807e-07, + "loss": 16.3436, + "step": 439150 + }, + { + "epoch": 0.8871309849424484, + "grad_norm": 470.22509765625, + "learning_rate": 4.4427732674250045e-07, + "loss": 20.8528, + "step": 439160 + }, + { + "epoch": 0.8871511855751322, + "grad_norm": 277.1685485839844, + "learning_rate": 4.4413349237036e-07, + "loss": 14.1202, + "step": 439170 + }, + { + "epoch": 0.887171386207816, + "grad_norm": 228.85626220703125, + "learning_rate": 4.4398968020296143e-07, + "loss": 17.8448, + "step": 439180 + }, + { + "epoch": 0.8871915868404998, + "grad_norm": 489.93524169921875, + "learning_rate": 4.4384589024100423e-07, + "loss": 29.0556, + "step": 439190 + }, + { + "epoch": 0.8872117874731836, + "grad_norm": 304.1772155761719, + "learning_rate": 4.4370212248518895e-07, + "loss": 15.8423, + "step": 439200 + }, + { + "epoch": 0.8872319881058675, + "grad_norm": 461.1487731933594, + "learning_rate": 4.4355837693621786e-07, + "loss": 19.2313, + "step": 439210 + }, + { + "epoch": 0.8872521887385513, + "grad_norm": 301.6158447265625, + "learning_rate": 4.434146535947903e-07, + "loss": 19.8144, + "step": 439220 + }, + { + "epoch": 0.8872723893712351, + "grad_norm": 96.90778350830078, + "learning_rate": 4.4327095246160636e-07, + "loss": 16.7332, + "step": 439230 + }, + { + "epoch": 0.8872925900039189, + "grad_norm": 243.5862579345703, + "learning_rate": 4.4312727353736816e-07, + "loss": 41.9337, + "step": 439240 + }, + { + "epoch": 0.8873127906366027, + "grad_norm": 367.57672119140625, + "learning_rate": 4.4298361682277355e-07, + "loss": 18.4597, + "step": 439250 + }, + { + "epoch": 0.8873329912692866, + "grad_norm": 388.3193359375, + "learning_rate": 4.428399823185253e-07, + "loss": 16.5243, + "step": 439260 + }, + { + "epoch": 0.8873531919019704, + "grad_norm": 170.0860595703125, + "learning_rate": 4.426963700253223e-07, + "loss": 19.4929, + "step": 439270 + }, + { + "epoch": 0.8873733925346542, + "grad_norm": 500.71966552734375, + "learning_rate": 4.425527799438639e-07, + "loss": 13.3557, + "step": 439280 + }, + { + "epoch": 0.887393593167338, + "grad_norm": 320.1374816894531, + "learning_rate": 4.4240921207485077e-07, + "loss": 27.7214, + "step": 439290 + }, + { + "epoch": 0.8874137938000218, + "grad_norm": 295.4293518066406, + "learning_rate": 4.4226566641898173e-07, + "loss": 9.7501, + "step": 439300 + }, + { + "epoch": 0.8874339944327057, + "grad_norm": 33.95865249633789, + "learning_rate": 4.421221429769579e-07, + "loss": 13.4921, + "step": 439310 + }, + { + "epoch": 0.8874541950653895, + "grad_norm": 363.8642883300781, + "learning_rate": 4.4197864174947755e-07, + "loss": 10.8632, + "step": 439320 + }, + { + "epoch": 0.8874743956980733, + "grad_norm": 210.2339324951172, + "learning_rate": 4.418351627372408e-07, + "loss": 19.4186, + "step": 439330 + }, + { + "epoch": 0.8874945963307571, + "grad_norm": 893.2373657226562, + "learning_rate": 4.416917059409465e-07, + "loss": 21.4893, + "step": 439340 + }, + { + "epoch": 0.8875147969634409, + "grad_norm": 80.63335418701172, + "learning_rate": 4.415482713612934e-07, + "loss": 15.3269, + "step": 439350 + }, + { + "epoch": 0.8875349975961248, + "grad_norm": 154.1669921875, + "learning_rate": 4.414048589989822e-07, + "loss": 13.8406, + "step": 439360 + }, + { + "epoch": 0.8875551982288086, + "grad_norm": 251.88331604003906, + "learning_rate": 4.4126146885471067e-07, + "loss": 24.2011, + "step": 439370 + }, + { + "epoch": 0.8875753988614924, + "grad_norm": 256.91546630859375, + "learning_rate": 4.411181009291765e-07, + "loss": 28.8828, + "step": 439380 + }, + { + "epoch": 0.8875955994941762, + "grad_norm": 30.955636978149414, + "learning_rate": 4.409747552230803e-07, + "loss": 11.8671, + "step": 439390 + }, + { + "epoch": 0.8876158001268599, + "grad_norm": 472.96087646484375, + "learning_rate": 4.4083143173712207e-07, + "loss": 16.3747, + "step": 439400 + }, + { + "epoch": 0.8876360007595437, + "grad_norm": 366.6407165527344, + "learning_rate": 4.406881304719962e-07, + "loss": 15.4536, + "step": 439410 + }, + { + "epoch": 0.8876562013922276, + "grad_norm": 16.067556381225586, + "learning_rate": 4.405448514284039e-07, + "loss": 33.2922, + "step": 439420 + }, + { + "epoch": 0.8876764020249114, + "grad_norm": 382.92041015625, + "learning_rate": 4.404015946070439e-07, + "loss": 22.2967, + "step": 439430 + }, + { + "epoch": 0.8876966026575952, + "grad_norm": 343.244140625, + "learning_rate": 4.40258360008613e-07, + "loss": 27.397, + "step": 439440 + }, + { + "epoch": 0.887716803290279, + "grad_norm": 517.2798461914062, + "learning_rate": 4.401151476338095e-07, + "loss": 41.0492, + "step": 439450 + }, + { + "epoch": 0.8877370039229628, + "grad_norm": 895.5753173828125, + "learning_rate": 4.3997195748333113e-07, + "loss": 10.4305, + "step": 439460 + }, + { + "epoch": 0.8877572045556467, + "grad_norm": 11.679781913757324, + "learning_rate": 4.3982878955787844e-07, + "loss": 14.3762, + "step": 439470 + }, + { + "epoch": 0.8877774051883305, + "grad_norm": 38.89082717895508, + "learning_rate": 4.396856438581454e-07, + "loss": 15.0276, + "step": 439480 + }, + { + "epoch": 0.8877976058210143, + "grad_norm": 209.6109619140625, + "learning_rate": 4.395425203848314e-07, + "loss": 15.5432, + "step": 439490 + }, + { + "epoch": 0.8878178064536981, + "grad_norm": 285.5658874511719, + "learning_rate": 4.3939941913863525e-07, + "loss": 17.9952, + "step": 439500 + }, + { + "epoch": 0.8878380070863819, + "grad_norm": 131.74278259277344, + "learning_rate": 4.392563401202526e-07, + "loss": 27.507, + "step": 439510 + }, + { + "epoch": 0.8878582077190658, + "grad_norm": 145.62322998046875, + "learning_rate": 4.391132833303807e-07, + "loss": 14.4187, + "step": 439520 + }, + { + "epoch": 0.8878784083517496, + "grad_norm": 242.29408264160156, + "learning_rate": 4.389702487697189e-07, + "loss": 12.3936, + "step": 439530 + }, + { + "epoch": 0.8878986089844334, + "grad_norm": 31.640588760375977, + "learning_rate": 4.388272364389623e-07, + "loss": 13.26, + "step": 439540 + }, + { + "epoch": 0.8879188096171172, + "grad_norm": 241.95538330078125, + "learning_rate": 4.38684246338808e-07, + "loss": 17.4089, + "step": 439550 + }, + { + "epoch": 0.887939010249801, + "grad_norm": 305.1247863769531, + "learning_rate": 4.385412784699544e-07, + "loss": 26.7816, + "step": 439560 + }, + { + "epoch": 0.8879592108824849, + "grad_norm": 542.28271484375, + "learning_rate": 4.3839833283309597e-07, + "loss": 28.6824, + "step": 439570 + }, + { + "epoch": 0.8879794115151687, + "grad_norm": 271.184326171875, + "learning_rate": 4.3825540942893206e-07, + "loss": 19.0649, + "step": 439580 + }, + { + "epoch": 0.8879996121478525, + "grad_norm": 171.00033569335938, + "learning_rate": 4.381125082581583e-07, + "loss": 19.832, + "step": 439590 + }, + { + "epoch": 0.8880198127805363, + "grad_norm": 387.65185546875, + "learning_rate": 4.379696293214697e-07, + "loss": 21.0981, + "step": 439600 + }, + { + "epoch": 0.8880400134132201, + "grad_norm": 409.376953125, + "learning_rate": 4.378267726195645e-07, + "loss": 15.5741, + "step": 439610 + }, + { + "epoch": 0.888060214045904, + "grad_norm": 470.7140808105469, + "learning_rate": 4.3768393815313723e-07, + "loss": 17.937, + "step": 439620 + }, + { + "epoch": 0.8880804146785878, + "grad_norm": 86.91465759277344, + "learning_rate": 4.375411259228868e-07, + "loss": 13.9544, + "step": 439630 + }, + { + "epoch": 0.8881006153112716, + "grad_norm": 152.19589233398438, + "learning_rate": 4.373983359295059e-07, + "loss": 18.4752, + "step": 439640 + }, + { + "epoch": 0.8881208159439553, + "grad_norm": 101.01200103759766, + "learning_rate": 4.372555681736934e-07, + "loss": 22.6062, + "step": 439650 + }, + { + "epoch": 0.8881410165766391, + "grad_norm": 378.2396240234375, + "learning_rate": 4.3711282265614385e-07, + "loss": 18.0084, + "step": 439660 + }, + { + "epoch": 0.888161217209323, + "grad_norm": 195.88209533691406, + "learning_rate": 4.369700993775522e-07, + "loss": 11.7541, + "step": 439670 + }, + { + "epoch": 0.8881814178420068, + "grad_norm": 106.99616241455078, + "learning_rate": 4.368273983386157e-07, + "loss": 24.8551, + "step": 439680 + }, + { + "epoch": 0.8882016184746906, + "grad_norm": 80.48089599609375, + "learning_rate": 4.3668471954002864e-07, + "loss": 16.0395, + "step": 439690 + }, + { + "epoch": 0.8882218191073744, + "grad_norm": 434.21148681640625, + "learning_rate": 4.3654206298248625e-07, + "loss": 17.1421, + "step": 439700 + }, + { + "epoch": 0.8882420197400582, + "grad_norm": 172.45501708984375, + "learning_rate": 4.363994286666845e-07, + "loss": 20.3014, + "step": 439710 + }, + { + "epoch": 0.888262220372742, + "grad_norm": 436.064697265625, + "learning_rate": 4.3625681659331895e-07, + "loss": 8.8036, + "step": 439720 + }, + { + "epoch": 0.8882824210054259, + "grad_norm": 189.44009399414062, + "learning_rate": 4.3611422676308413e-07, + "loss": 18.125, + "step": 439730 + }, + { + "epoch": 0.8883026216381097, + "grad_norm": 154.24684143066406, + "learning_rate": 4.359716591766744e-07, + "loss": 10.3431, + "step": 439740 + }, + { + "epoch": 0.8883228222707935, + "grad_norm": 45.151947021484375, + "learning_rate": 4.3582911383478646e-07, + "loss": 22.5793, + "step": 439750 + }, + { + "epoch": 0.8883430229034773, + "grad_norm": 101.4354248046875, + "learning_rate": 4.3568659073811306e-07, + "loss": 14.692, + "step": 439760 + }, + { + "epoch": 0.8883632235361612, + "grad_norm": 332.0010681152344, + "learning_rate": 4.355440898873492e-07, + "loss": 8.9394, + "step": 439770 + }, + { + "epoch": 0.888383424168845, + "grad_norm": 231.61651611328125, + "learning_rate": 4.354016112831899e-07, + "loss": 8.167, + "step": 439780 + }, + { + "epoch": 0.8884036248015288, + "grad_norm": 618.9818725585938, + "learning_rate": 4.352591549263302e-07, + "loss": 14.1208, + "step": 439790 + }, + { + "epoch": 0.8884238254342126, + "grad_norm": 507.2235107421875, + "learning_rate": 4.3511672081746393e-07, + "loss": 12.3601, + "step": 439800 + }, + { + "epoch": 0.8884440260668964, + "grad_norm": 224.71241760253906, + "learning_rate": 4.3497430895728444e-07, + "loss": 13.6204, + "step": 439810 + }, + { + "epoch": 0.8884642266995803, + "grad_norm": 222.20713806152344, + "learning_rate": 4.348319193464867e-07, + "loss": 28.523, + "step": 439820 + }, + { + "epoch": 0.8884844273322641, + "grad_norm": 107.95604705810547, + "learning_rate": 4.3468955198576524e-07, + "loss": 8.9733, + "step": 439830 + }, + { + "epoch": 0.8885046279649479, + "grad_norm": 290.3319091796875, + "learning_rate": 4.3454720687581165e-07, + "loss": 22.8961, + "step": 439840 + }, + { + "epoch": 0.8885248285976317, + "grad_norm": 553.7430419921875, + "learning_rate": 4.344048840173226e-07, + "loss": 11.9736, + "step": 439850 + }, + { + "epoch": 0.8885450292303155, + "grad_norm": 217.2595977783203, + "learning_rate": 4.3426258341098925e-07, + "loss": 6.9081, + "step": 439860 + }, + { + "epoch": 0.8885652298629994, + "grad_norm": 445.9281005859375, + "learning_rate": 4.341203050575077e-07, + "loss": 19.6621, + "step": 439870 + }, + { + "epoch": 0.8885854304956832, + "grad_norm": 0.0, + "learning_rate": 4.3397804895756957e-07, + "loss": 21.3665, + "step": 439880 + }, + { + "epoch": 0.888605631128367, + "grad_norm": 14.04886531829834, + "learning_rate": 4.338358151118677e-07, + "loss": 7.1894, + "step": 439890 + }, + { + "epoch": 0.8886258317610508, + "grad_norm": 244.17861938476562, + "learning_rate": 4.33693603521097e-07, + "loss": 8.8234, + "step": 439900 + }, + { + "epoch": 0.8886460323937345, + "grad_norm": 162.6847381591797, + "learning_rate": 4.3355141418594926e-07, + "loss": 20.4846, + "step": 439910 + }, + { + "epoch": 0.8886662330264183, + "grad_norm": 238.5950469970703, + "learning_rate": 4.334092471071194e-07, + "loss": 15.8418, + "step": 439920 + }, + { + "epoch": 0.8886864336591022, + "grad_norm": 372.736083984375, + "learning_rate": 4.3326710228529746e-07, + "loss": 10.6589, + "step": 439930 + }, + { + "epoch": 0.888706634291786, + "grad_norm": 485.205078125, + "learning_rate": 4.3312497972117895e-07, + "loss": 16.4431, + "step": 439940 + }, + { + "epoch": 0.8887268349244698, + "grad_norm": 219.19924926757812, + "learning_rate": 4.32982879415455e-07, + "loss": 20.38, + "step": 439950 + }, + { + "epoch": 0.8887470355571536, + "grad_norm": 171.46522521972656, + "learning_rate": 4.3284080136881847e-07, + "loss": 26.0539, + "step": 439960 + }, + { + "epoch": 0.8887672361898374, + "grad_norm": 299.0748291015625, + "learning_rate": 4.32698745581962e-07, + "loss": 14.9703, + "step": 439970 + }, + { + "epoch": 0.8887874368225213, + "grad_norm": 352.73876953125, + "learning_rate": 4.325567120555785e-07, + "loss": 10.8353, + "step": 439980 + }, + { + "epoch": 0.8888076374552051, + "grad_norm": 268.17730712890625, + "learning_rate": 4.324147007903584e-07, + "loss": 16.2466, + "step": 439990 + }, + { + "epoch": 0.8888278380878889, + "grad_norm": 114.64710998535156, + "learning_rate": 4.322727117869951e-07, + "loss": 9.285, + "step": 440000 + }, + { + "epoch": 0.8888480387205727, + "grad_norm": 28.741056442260742, + "learning_rate": 4.3213074504618256e-07, + "loss": 12.5165, + "step": 440010 + }, + { + "epoch": 0.8888682393532565, + "grad_norm": 334.08123779296875, + "learning_rate": 4.31988800568609e-07, + "loss": 22.8308, + "step": 440020 + }, + { + "epoch": 0.8888884399859404, + "grad_norm": 30.762502670288086, + "learning_rate": 4.3184687835496784e-07, + "loss": 23.0419, + "step": 440030 + }, + { + "epoch": 0.8889086406186242, + "grad_norm": 163.64968872070312, + "learning_rate": 4.317049784059518e-07, + "loss": 12.5823, + "step": 440040 + }, + { + "epoch": 0.888928841251308, + "grad_norm": 113.00885772705078, + "learning_rate": 4.315631007222515e-07, + "loss": 15.331, + "step": 440050 + }, + { + "epoch": 0.8889490418839918, + "grad_norm": 205.9576873779297, + "learning_rate": 4.31421245304558e-07, + "loss": 19.476, + "step": 440060 + }, + { + "epoch": 0.8889692425166756, + "grad_norm": 227.61708068847656, + "learning_rate": 4.3127941215356296e-07, + "loss": 17.8654, + "step": 440070 + }, + { + "epoch": 0.8889894431493595, + "grad_norm": 0.0, + "learning_rate": 4.3113760126995974e-07, + "loss": 18.4632, + "step": 440080 + }, + { + "epoch": 0.8890096437820433, + "grad_norm": 160.78369140625, + "learning_rate": 4.309958126544361e-07, + "loss": 19.9924, + "step": 440090 + }, + { + "epoch": 0.8890298444147271, + "grad_norm": 12.122901916503906, + "learning_rate": 4.308540463076849e-07, + "loss": 8.1445, + "step": 440100 + }, + { + "epoch": 0.8890500450474109, + "grad_norm": 390.04315185546875, + "learning_rate": 4.3071230223039774e-07, + "loss": 20.182, + "step": 440110 + }, + { + "epoch": 0.8890702456800947, + "grad_norm": 179.36801147460938, + "learning_rate": 4.3057058042326407e-07, + "loss": 18.0014, + "step": 440120 + }, + { + "epoch": 0.8890904463127786, + "grad_norm": 143.66476440429688, + "learning_rate": 4.30428880886975e-07, + "loss": 9.581, + "step": 440130 + }, + { + "epoch": 0.8891106469454624, + "grad_norm": 133.86337280273438, + "learning_rate": 4.3028720362222166e-07, + "loss": 20.8693, + "step": 440140 + }, + { + "epoch": 0.8891308475781462, + "grad_norm": 314.28997802734375, + "learning_rate": 4.301455486296946e-07, + "loss": 19.0603, + "step": 440150 + }, + { + "epoch": 0.8891510482108299, + "grad_norm": 341.1095275878906, + "learning_rate": 4.300039159100827e-07, + "loss": 16.8484, + "step": 440160 + }, + { + "epoch": 0.8891712488435137, + "grad_norm": 292.7345886230469, + "learning_rate": 4.298623054640788e-07, + "loss": 13.9937, + "step": 440170 + }, + { + "epoch": 0.8891914494761975, + "grad_norm": 326.391845703125, + "learning_rate": 4.2972071729237065e-07, + "loss": 10.7691, + "step": 440180 + }, + { + "epoch": 0.8892116501088814, + "grad_norm": 6.493969917297363, + "learning_rate": 4.295791513956504e-07, + "loss": 18.682, + "step": 440190 + }, + { + "epoch": 0.8892318507415652, + "grad_norm": 216.3207550048828, + "learning_rate": 4.29437607774606e-07, + "loss": 13.1934, + "step": 440200 + }, + { + "epoch": 0.889252051374249, + "grad_norm": 411.6736145019531, + "learning_rate": 4.2929608642992894e-07, + "loss": 26.305, + "step": 440210 + }, + { + "epoch": 0.8892722520069328, + "grad_norm": 487.83905029296875, + "learning_rate": 4.291545873623087e-07, + "loss": 16.1887, + "step": 440220 + }, + { + "epoch": 0.8892924526396166, + "grad_norm": 248.68894958496094, + "learning_rate": 4.2901311057243377e-07, + "loss": 9.9636, + "step": 440230 + }, + { + "epoch": 0.8893126532723005, + "grad_norm": 347.11932373046875, + "learning_rate": 4.2887165606099513e-07, + "loss": 18.2997, + "step": 440240 + }, + { + "epoch": 0.8893328539049843, + "grad_norm": 180.7696990966797, + "learning_rate": 4.2873022382868115e-07, + "loss": 17.6919, + "step": 440250 + }, + { + "epoch": 0.8893530545376681, + "grad_norm": 361.1531066894531, + "learning_rate": 4.2858881387618235e-07, + "loss": 17.0809, + "step": 440260 + }, + { + "epoch": 0.8893732551703519, + "grad_norm": 309.6268005371094, + "learning_rate": 4.284474262041871e-07, + "loss": 22.6974, + "step": 440270 + }, + { + "epoch": 0.8893934558030357, + "grad_norm": 241.9208221435547, + "learning_rate": 4.283060608133843e-07, + "loss": 17.4749, + "step": 440280 + }, + { + "epoch": 0.8894136564357196, + "grad_norm": 363.3228759765625, + "learning_rate": 4.2816471770446343e-07, + "loss": 15.3348, + "step": 440290 + }, + { + "epoch": 0.8894338570684034, + "grad_norm": 190.2127227783203, + "learning_rate": 4.280233968781139e-07, + "loss": 12.9646, + "step": 440300 + }, + { + "epoch": 0.8894540577010872, + "grad_norm": 124.24407196044922, + "learning_rate": 4.2788209833502237e-07, + "loss": 15.3975, + "step": 440310 + }, + { + "epoch": 0.889474258333771, + "grad_norm": 312.8065185546875, + "learning_rate": 4.277408220758794e-07, + "loss": 15.6786, + "step": 440320 + }, + { + "epoch": 0.8894944589664548, + "grad_norm": 134.03599548339844, + "learning_rate": 4.275995681013745e-07, + "loss": 28.8227, + "step": 440330 + }, + { + "epoch": 0.8895146595991387, + "grad_norm": 473.7370910644531, + "learning_rate": 4.2745833641219317e-07, + "loss": 16.5285, + "step": 440340 + }, + { + "epoch": 0.8895348602318225, + "grad_norm": 75.65824127197266, + "learning_rate": 4.273171270090254e-07, + "loss": 8.0494, + "step": 440350 + }, + { + "epoch": 0.8895550608645063, + "grad_norm": 310.4652404785156, + "learning_rate": 4.271759398925601e-07, + "loss": 14.2664, + "step": 440360 + }, + { + "epoch": 0.8895752614971901, + "grad_norm": 300.664794921875, + "learning_rate": 4.270347750634846e-07, + "loss": 14.1265, + "step": 440370 + }, + { + "epoch": 0.889595462129874, + "grad_norm": 379.99761962890625, + "learning_rate": 4.2689363252248595e-07, + "loss": 19.6016, + "step": 440380 + }, + { + "epoch": 0.8896156627625578, + "grad_norm": 148.29335021972656, + "learning_rate": 4.2675251227025315e-07, + "loss": 17.414, + "step": 440390 + }, + { + "epoch": 0.8896358633952416, + "grad_norm": 300.0272521972656, + "learning_rate": 4.266114143074751e-07, + "loss": 6.6113, + "step": 440400 + }, + { + "epoch": 0.8896560640279254, + "grad_norm": 137.89877319335938, + "learning_rate": 4.264703386348384e-07, + "loss": 9.8072, + "step": 440410 + }, + { + "epoch": 0.8896762646606091, + "grad_norm": 168.51852416992188, + "learning_rate": 4.263292852530293e-07, + "loss": 21.7177, + "step": 440420 + }, + { + "epoch": 0.8896964652932929, + "grad_norm": 107.62776184082031, + "learning_rate": 4.261882541627377e-07, + "loss": 7.3128, + "step": 440430 + }, + { + "epoch": 0.8897166659259768, + "grad_norm": 332.1779479980469, + "learning_rate": 4.260472453646497e-07, + "loss": 27.318, + "step": 440440 + }, + { + "epoch": 0.8897368665586606, + "grad_norm": 286.0569152832031, + "learning_rate": 4.2590625885945205e-07, + "loss": 17.3097, + "step": 440450 + }, + { + "epoch": 0.8897570671913444, + "grad_norm": 243.79681396484375, + "learning_rate": 4.25765294647833e-07, + "loss": 28.8181, + "step": 440460 + }, + { + "epoch": 0.8897772678240282, + "grad_norm": 461.97052001953125, + "learning_rate": 4.256243527304782e-07, + "loss": 17.8837, + "step": 440470 + }, + { + "epoch": 0.889797468456712, + "grad_norm": 232.52359008789062, + "learning_rate": 4.2548343310807704e-07, + "loss": 10.1855, + "step": 440480 + }, + { + "epoch": 0.8898176690893959, + "grad_norm": 423.13519287109375, + "learning_rate": 4.25342535781314e-07, + "loss": 18.6517, + "step": 440490 + }, + { + "epoch": 0.8898378697220797, + "grad_norm": 238.52865600585938, + "learning_rate": 4.2520166075087635e-07, + "loss": 14.1518, + "step": 440500 + }, + { + "epoch": 0.8898580703547635, + "grad_norm": 369.58734130859375, + "learning_rate": 4.250608080174512e-07, + "loss": 19.9182, + "step": 440510 + }, + { + "epoch": 0.8898782709874473, + "grad_norm": 257.09002685546875, + "learning_rate": 4.249199775817242e-07, + "loss": 15.1414, + "step": 440520 + }, + { + "epoch": 0.8898984716201311, + "grad_norm": 66.41230773925781, + "learning_rate": 4.247791694443837e-07, + "loss": 14.5972, + "step": 440530 + }, + { + "epoch": 0.889918672252815, + "grad_norm": 563.6635131835938, + "learning_rate": 4.24638383606113e-07, + "loss": 22.6768, + "step": 440540 + }, + { + "epoch": 0.8899388728854988, + "grad_norm": 479.9045104980469, + "learning_rate": 4.24497620067601e-07, + "loss": 38.3795, + "step": 440550 + }, + { + "epoch": 0.8899590735181826, + "grad_norm": 494.77960205078125, + "learning_rate": 4.2435687882953327e-07, + "loss": 17.7069, + "step": 440560 + }, + { + "epoch": 0.8899792741508664, + "grad_norm": 126.67586517333984, + "learning_rate": 4.242161598925937e-07, + "loss": 16.1657, + "step": 440570 + }, + { + "epoch": 0.8899994747835502, + "grad_norm": 265.52288818359375, + "learning_rate": 4.240754632574706e-07, + "loss": 9.9768, + "step": 440580 + }, + { + "epoch": 0.8900196754162341, + "grad_norm": 64.7813491821289, + "learning_rate": 4.239347889248485e-07, + "loss": 13.0563, + "step": 440590 + }, + { + "epoch": 0.8900398760489179, + "grad_norm": 128.74452209472656, + "learning_rate": 4.237941368954124e-07, + "loss": 22.1379, + "step": 440600 + }, + { + "epoch": 0.8900600766816017, + "grad_norm": 9.655394554138184, + "learning_rate": 4.236535071698489e-07, + "loss": 16.6747, + "step": 440610 + }, + { + "epoch": 0.8900802773142855, + "grad_norm": 334.9057922363281, + "learning_rate": 4.2351289974884467e-07, + "loss": 25.0396, + "step": 440620 + }, + { + "epoch": 0.8901004779469693, + "grad_norm": 144.74742126464844, + "learning_rate": 4.2337231463308147e-07, + "loss": 16.4383, + "step": 440630 + }, + { + "epoch": 0.8901206785796532, + "grad_norm": 564.4386596679688, + "learning_rate": 4.2323175182324706e-07, + "loss": 27.9278, + "step": 440640 + }, + { + "epoch": 0.890140879212337, + "grad_norm": 235.91358947753906, + "learning_rate": 4.2309121132002695e-07, + "loss": 12.3268, + "step": 440650 + }, + { + "epoch": 0.8901610798450208, + "grad_norm": 32.54255294799805, + "learning_rate": 4.2295069312410455e-07, + "loss": 14.3396, + "step": 440660 + }, + { + "epoch": 0.8901812804777045, + "grad_norm": 18.373422622680664, + "learning_rate": 4.228101972361648e-07, + "loss": 6.3706, + "step": 440670 + }, + { + "epoch": 0.8902014811103883, + "grad_norm": 241.1278533935547, + "learning_rate": 4.226697236568933e-07, + "loss": 16.4846, + "step": 440680 + }, + { + "epoch": 0.8902216817430721, + "grad_norm": 343.79498291015625, + "learning_rate": 4.225292723869762e-07, + "loss": 19.9639, + "step": 440690 + }, + { + "epoch": 0.890241882375756, + "grad_norm": 473.7337951660156, + "learning_rate": 4.2238884342709397e-07, + "loss": 24.4123, + "step": 440700 + }, + { + "epoch": 0.8902620830084398, + "grad_norm": 259.67449951171875, + "learning_rate": 4.222484367779334e-07, + "loss": 35.9583, + "step": 440710 + }, + { + "epoch": 0.8902822836411236, + "grad_norm": 284.6575927734375, + "learning_rate": 4.2210805244017993e-07, + "loss": 19.8381, + "step": 440720 + }, + { + "epoch": 0.8903024842738074, + "grad_norm": 339.5075988769531, + "learning_rate": 4.219676904145165e-07, + "loss": 31.5925, + "step": 440730 + }, + { + "epoch": 0.8903226849064912, + "grad_norm": 111.9180908203125, + "learning_rate": 4.218273507016263e-07, + "loss": 33.4121, + "step": 440740 + }, + { + "epoch": 0.8903428855391751, + "grad_norm": 363.7783203125, + "learning_rate": 4.2168703330219494e-07, + "loss": 18.3535, + "step": 440750 + }, + { + "epoch": 0.8903630861718589, + "grad_norm": 288.3460998535156, + "learning_rate": 4.2154673821690585e-07, + "loss": 22.4761, + "step": 440760 + }, + { + "epoch": 0.8903832868045427, + "grad_norm": 241.97601318359375, + "learning_rate": 4.2140646544644227e-07, + "loss": 16.7243, + "step": 440770 + }, + { + "epoch": 0.8904034874372265, + "grad_norm": 361.3790588378906, + "learning_rate": 4.212662149914887e-07, + "loss": 13.1733, + "step": 440780 + }, + { + "epoch": 0.8904236880699103, + "grad_norm": 166.67642211914062, + "learning_rate": 4.211259868527273e-07, + "loss": 28.0201, + "step": 440790 + }, + { + "epoch": 0.8904438887025942, + "grad_norm": 495.0302429199219, + "learning_rate": 4.2098578103084376e-07, + "loss": 21.8701, + "step": 440800 + }, + { + "epoch": 0.890464089335278, + "grad_norm": 33.01707458496094, + "learning_rate": 4.208455975265191e-07, + "loss": 18.4406, + "step": 440810 + }, + { + "epoch": 0.8904842899679618, + "grad_norm": 126.27222442626953, + "learning_rate": 4.2070543634043834e-07, + "loss": 9.5508, + "step": 440820 + }, + { + "epoch": 0.8905044906006456, + "grad_norm": 429.9417419433594, + "learning_rate": 4.205652974732838e-07, + "loss": 18.9119, + "step": 440830 + }, + { + "epoch": 0.8905246912333294, + "grad_norm": 257.1548767089844, + "learning_rate": 4.2042518092573814e-07, + "loss": 25.3358, + "step": 440840 + }, + { + "epoch": 0.8905448918660133, + "grad_norm": 124.42353820800781, + "learning_rate": 4.202850866984853e-07, + "loss": 20.7962, + "step": 440850 + }, + { + "epoch": 0.8905650924986971, + "grad_norm": 228.07293701171875, + "learning_rate": 4.201450147922065e-07, + "loss": 14.201, + "step": 440860 + }, + { + "epoch": 0.8905852931313809, + "grad_norm": 159.50985717773438, + "learning_rate": 4.200049652075866e-07, + "loss": 15.3572, + "step": 440870 + }, + { + "epoch": 0.8906054937640647, + "grad_norm": 300.2088317871094, + "learning_rate": 4.198649379453068e-07, + "loss": 12.1017, + "step": 440880 + }, + { + "epoch": 0.8906256943967485, + "grad_norm": 347.2508239746094, + "learning_rate": 4.1972493300604877e-07, + "loss": 20.4435, + "step": 440890 + }, + { + "epoch": 0.8906458950294324, + "grad_norm": 177.9487762451172, + "learning_rate": 4.195849503904975e-07, + "loss": 12.7254, + "step": 440900 + }, + { + "epoch": 0.8906660956621162, + "grad_norm": 133.84117126464844, + "learning_rate": 4.1944499009933303e-07, + "loss": 8.6773, + "step": 440910 + }, + { + "epoch": 0.8906862962948, + "grad_norm": 56.217750549316406, + "learning_rate": 4.19305052133237e-07, + "loss": 8.5564, + "step": 440920 + }, + { + "epoch": 0.8907064969274837, + "grad_norm": 324.2454528808594, + "learning_rate": 4.1916513649289334e-07, + "loss": 17.785, + "step": 440930 + }, + { + "epoch": 0.8907266975601675, + "grad_norm": 248.8067169189453, + "learning_rate": 4.1902524317898427e-07, + "loss": 17.8755, + "step": 440940 + }, + { + "epoch": 0.8907468981928514, + "grad_norm": 194.91070556640625, + "learning_rate": 4.188853721921893e-07, + "loss": 18.099, + "step": 440950 + }, + { + "epoch": 0.8907670988255352, + "grad_norm": 371.50653076171875, + "learning_rate": 4.1874552353319107e-07, + "loss": 21.8642, + "step": 440960 + }, + { + "epoch": 0.890787299458219, + "grad_norm": 440.4020690917969, + "learning_rate": 4.186056972026725e-07, + "loss": 38.5145, + "step": 440970 + }, + { + "epoch": 0.8908075000909028, + "grad_norm": 276.4840087890625, + "learning_rate": 4.1846589320131415e-07, + "loss": 15.2821, + "step": 440980 + }, + { + "epoch": 0.8908277007235866, + "grad_norm": 531.48828125, + "learning_rate": 4.1832611152979655e-07, + "loss": 20.814, + "step": 440990 + }, + { + "epoch": 0.8908479013562705, + "grad_norm": 295.4017639160156, + "learning_rate": 4.1818635218880186e-07, + "loss": 18.2705, + "step": 441000 + }, + { + "epoch": 0.8908681019889543, + "grad_norm": 51.23957061767578, + "learning_rate": 4.1804661517901244e-07, + "loss": 41.4554, + "step": 441010 + }, + { + "epoch": 0.8908883026216381, + "grad_norm": 0.0, + "learning_rate": 4.179069005011066e-07, + "loss": 11.2908, + "step": 441020 + }, + { + "epoch": 0.8909085032543219, + "grad_norm": 697.160400390625, + "learning_rate": 4.177672081557671e-07, + "loss": 12.2288, + "step": 441030 + }, + { + "epoch": 0.8909287038870057, + "grad_norm": 356.4187316894531, + "learning_rate": 4.176275381436751e-07, + "loss": 10.0519, + "step": 441040 + }, + { + "epoch": 0.8909489045196896, + "grad_norm": 22.14446449279785, + "learning_rate": 4.1748789046551055e-07, + "loss": 21.3258, + "step": 441050 + }, + { + "epoch": 0.8909691051523734, + "grad_norm": 172.09939575195312, + "learning_rate": 4.173482651219535e-07, + "loss": 13.7005, + "step": 441060 + }, + { + "epoch": 0.8909893057850572, + "grad_norm": 384.9477233886719, + "learning_rate": 4.1720866211368615e-07, + "loss": 21.885, + "step": 441070 + }, + { + "epoch": 0.891009506417741, + "grad_norm": 295.076171875, + "learning_rate": 4.1706908144138804e-07, + "loss": 12.893, + "step": 441080 + }, + { + "epoch": 0.8910297070504248, + "grad_norm": 280.5708923339844, + "learning_rate": 4.1692952310573854e-07, + "loss": 21.7029, + "step": 441090 + }, + { + "epoch": 0.8910499076831087, + "grad_norm": 366.3118591308594, + "learning_rate": 4.1678998710741936e-07, + "loss": 19.4737, + "step": 441100 + }, + { + "epoch": 0.8910701083157925, + "grad_norm": 669.0502319335938, + "learning_rate": 4.1665047344710887e-07, + "loss": 26.9611, + "step": 441110 + }, + { + "epoch": 0.8910903089484763, + "grad_norm": 194.59312438964844, + "learning_rate": 4.1651098212548923e-07, + "loss": 11.9774, + "step": 441120 + }, + { + "epoch": 0.8911105095811601, + "grad_norm": 146.01962280273438, + "learning_rate": 4.163715131432383e-07, + "loss": 10.5464, + "step": 441130 + }, + { + "epoch": 0.8911307102138439, + "grad_norm": 301.42669677734375, + "learning_rate": 4.162320665010372e-07, + "loss": 20.7333, + "step": 441140 + }, + { + "epoch": 0.8911509108465278, + "grad_norm": 5.441578388214111, + "learning_rate": 4.160926421995648e-07, + "loss": 17.515, + "step": 441150 + }, + { + "epoch": 0.8911711114792116, + "grad_norm": 526.412109375, + "learning_rate": 4.159532402395011e-07, + "loss": 27.2499, + "step": 441160 + }, + { + "epoch": 0.8911913121118954, + "grad_norm": 1.964666485786438, + "learning_rate": 4.158138606215256e-07, + "loss": 8.5801, + "step": 441170 + }, + { + "epoch": 0.8912115127445792, + "grad_norm": 0.0, + "learning_rate": 4.1567450334631667e-07, + "loss": 21.3488, + "step": 441180 + }, + { + "epoch": 0.8912317133772629, + "grad_norm": 707.3838500976562, + "learning_rate": 4.155351684145548e-07, + "loss": 16.148, + "step": 441190 + }, + { + "epoch": 0.8912519140099467, + "grad_norm": 286.784912109375, + "learning_rate": 4.153958558269189e-07, + "loss": 14.0193, + "step": 441200 + }, + { + "epoch": 0.8912721146426306, + "grad_norm": 188.17913818359375, + "learning_rate": 4.1525656558408624e-07, + "loss": 11.3886, + "step": 441210 + }, + { + "epoch": 0.8912923152753144, + "grad_norm": 395.32012939453125, + "learning_rate": 4.151172976867374e-07, + "loss": 14.509, + "step": 441220 + }, + { + "epoch": 0.8913125159079982, + "grad_norm": 268.0252380371094, + "learning_rate": 4.149780521355523e-07, + "loss": 13.7314, + "step": 441230 + }, + { + "epoch": 0.891332716540682, + "grad_norm": 167.34649658203125, + "learning_rate": 4.1483882893120606e-07, + "loss": 11.0642, + "step": 441240 + }, + { + "epoch": 0.8913529171733658, + "grad_norm": 282.3281555175781, + "learning_rate": 4.146996280743798e-07, + "loss": 24.9861, + "step": 441250 + }, + { + "epoch": 0.8913731178060497, + "grad_norm": 104.86113739013672, + "learning_rate": 4.145604495657518e-07, + "loss": 4.6084, + "step": 441260 + }, + { + "epoch": 0.8913933184387335, + "grad_norm": 35.9450569152832, + "learning_rate": 4.144212934060005e-07, + "loss": 17.0768, + "step": 441270 + }, + { + "epoch": 0.8914135190714173, + "grad_norm": 32.53382873535156, + "learning_rate": 4.142821595958024e-07, + "loss": 20.1245, + "step": 441280 + }, + { + "epoch": 0.8914337197041011, + "grad_norm": 225.77322387695312, + "learning_rate": 4.1414304813583663e-07, + "loss": 25.8087, + "step": 441290 + }, + { + "epoch": 0.8914539203367849, + "grad_norm": 74.63481903076172, + "learning_rate": 4.140039590267836e-07, + "loss": 21.3402, + "step": 441300 + }, + { + "epoch": 0.8914741209694688, + "grad_norm": 31.45956039428711, + "learning_rate": 4.1386489226931723e-07, + "loss": 19.7698, + "step": 441310 + }, + { + "epoch": 0.8914943216021526, + "grad_norm": 160.2866668701172, + "learning_rate": 4.137258478641176e-07, + "loss": 19.4852, + "step": 441320 + }, + { + "epoch": 0.8915145222348364, + "grad_norm": 261.996826171875, + "learning_rate": 4.135868258118625e-07, + "loss": 12.7521, + "step": 441330 + }, + { + "epoch": 0.8915347228675202, + "grad_norm": 41.85097885131836, + "learning_rate": 4.1344782611322855e-07, + "loss": 9.3961, + "step": 441340 + }, + { + "epoch": 0.891554923500204, + "grad_norm": 211.323974609375, + "learning_rate": 4.13308848768893e-07, + "loss": 19.9661, + "step": 441350 + }, + { + "epoch": 0.8915751241328879, + "grad_norm": 190.90200805664062, + "learning_rate": 4.1316989377953477e-07, + "loss": 8.9747, + "step": 441360 + }, + { + "epoch": 0.8915953247655717, + "grad_norm": 70.90047454833984, + "learning_rate": 4.1303096114583e-07, + "loss": 15.8222, + "step": 441370 + }, + { + "epoch": 0.8916155253982555, + "grad_norm": 345.0062561035156, + "learning_rate": 4.128920508684553e-07, + "loss": 16.628, + "step": 441380 + }, + { + "epoch": 0.8916357260309393, + "grad_norm": 280.53509521484375, + "learning_rate": 4.127531629480891e-07, + "loss": 16.726, + "step": 441390 + }, + { + "epoch": 0.8916559266636231, + "grad_norm": 578.4691772460938, + "learning_rate": 4.1261429738540694e-07, + "loss": 30.5171, + "step": 441400 + }, + { + "epoch": 0.891676127296307, + "grad_norm": 389.1383056640625, + "learning_rate": 4.1247545418108715e-07, + "loss": 20.0731, + "step": 441410 + }, + { + "epoch": 0.8916963279289908, + "grad_norm": 173.65341186523438, + "learning_rate": 4.1233663333580474e-07, + "loss": 12.1688, + "step": 441420 + }, + { + "epoch": 0.8917165285616746, + "grad_norm": 350.42218017578125, + "learning_rate": 4.121978348502381e-07, + "loss": 27.4812, + "step": 441430 + }, + { + "epoch": 0.8917367291943583, + "grad_norm": 235.80104064941406, + "learning_rate": 4.1205905872506224e-07, + "loss": 18.2688, + "step": 441440 + }, + { + "epoch": 0.8917569298270421, + "grad_norm": 396.60870361328125, + "learning_rate": 4.119203049609538e-07, + "loss": 17.07, + "step": 441450 + }, + { + "epoch": 0.891777130459726, + "grad_norm": 4.170270919799805, + "learning_rate": 4.1178157355859005e-07, + "loss": 13.0384, + "step": 441460 + }, + { + "epoch": 0.8917973310924098, + "grad_norm": 277.4400329589844, + "learning_rate": 4.1164286451864543e-07, + "loss": 15.3368, + "step": 441470 + }, + { + "epoch": 0.8918175317250936, + "grad_norm": 169.8954620361328, + "learning_rate": 4.1150417784179776e-07, + "loss": 20.122, + "step": 441480 + }, + { + "epoch": 0.8918377323577774, + "grad_norm": 595.0029296875, + "learning_rate": 4.1136551352872256e-07, + "loss": 17.5802, + "step": 441490 + }, + { + "epoch": 0.8918579329904612, + "grad_norm": 519.4209594726562, + "learning_rate": 4.112268715800943e-07, + "loss": 23.3711, + "step": 441500 + }, + { + "epoch": 0.891878133623145, + "grad_norm": 114.85365295410156, + "learning_rate": 4.1108825199659087e-07, + "loss": 16.6366, + "step": 441510 + }, + { + "epoch": 0.8918983342558289, + "grad_norm": 384.9375915527344, + "learning_rate": 4.1094965477888605e-07, + "loss": 19.2176, + "step": 441520 + }, + { + "epoch": 0.8919185348885127, + "grad_norm": 127.04666900634766, + "learning_rate": 4.1081107992765546e-07, + "loss": 16.5187, + "step": 441530 + }, + { + "epoch": 0.8919387355211965, + "grad_norm": 259.2323303222656, + "learning_rate": 4.1067252744357524e-07, + "loss": 16.7474, + "step": 441540 + }, + { + "epoch": 0.8919589361538803, + "grad_norm": 260.97119140625, + "learning_rate": 4.10533997327322e-07, + "loss": 9.9041, + "step": 441550 + }, + { + "epoch": 0.8919791367865642, + "grad_norm": 232.7197265625, + "learning_rate": 4.1039548957956807e-07, + "loss": 15.5778, + "step": 441560 + }, + { + "epoch": 0.891999337419248, + "grad_norm": 57.10182571411133, + "learning_rate": 4.102570042009896e-07, + "loss": 18.1468, + "step": 441570 + }, + { + "epoch": 0.8920195380519318, + "grad_norm": 248.76600646972656, + "learning_rate": 4.101185411922626e-07, + "loss": 14.0383, + "step": 441580 + }, + { + "epoch": 0.8920397386846156, + "grad_norm": 70.76747131347656, + "learning_rate": 4.099801005540616e-07, + "loss": 15.9473, + "step": 441590 + }, + { + "epoch": 0.8920599393172994, + "grad_norm": 300.2578430175781, + "learning_rate": 4.0984168228705934e-07, + "loss": 14.8233, + "step": 441600 + }, + { + "epoch": 0.8920801399499833, + "grad_norm": 288.84088134765625, + "learning_rate": 4.0970328639193255e-07, + "loss": 17.224, + "step": 441610 + }, + { + "epoch": 0.8921003405826671, + "grad_norm": 341.37408447265625, + "learning_rate": 4.0956491286935687e-07, + "loss": 42.7241, + "step": 441620 + }, + { + "epoch": 0.8921205412153509, + "grad_norm": 716.47509765625, + "learning_rate": 4.0942656172000273e-07, + "loss": 18.3528, + "step": 441630 + }, + { + "epoch": 0.8921407418480347, + "grad_norm": 211.32870483398438, + "learning_rate": 4.0928823294454743e-07, + "loss": 23.5649, + "step": 441640 + }, + { + "epoch": 0.8921609424807185, + "grad_norm": 467.16729736328125, + "learning_rate": 4.091499265436649e-07, + "loss": 15.5355, + "step": 441650 + }, + { + "epoch": 0.8921811431134024, + "grad_norm": 570.491455078125, + "learning_rate": 4.0901164251802905e-07, + "loss": 17.3162, + "step": 441660 + }, + { + "epoch": 0.8922013437460862, + "grad_norm": 166.6530303955078, + "learning_rate": 4.088733808683132e-07, + "loss": 19.6374, + "step": 441670 + }, + { + "epoch": 0.89222154437877, + "grad_norm": 326.330322265625, + "learning_rate": 4.087351415951918e-07, + "loss": 26.1327, + "step": 441680 + }, + { + "epoch": 0.8922417450114538, + "grad_norm": 527.7655029296875, + "learning_rate": 4.085969246993388e-07, + "loss": 20.6709, + "step": 441690 + }, + { + "epoch": 0.8922619456441375, + "grad_norm": 493.05120849609375, + "learning_rate": 4.084587301814269e-07, + "loss": 17.5031, + "step": 441700 + }, + { + "epoch": 0.8922821462768213, + "grad_norm": 280.71697998046875, + "learning_rate": 4.0832055804212957e-07, + "loss": 12.9486, + "step": 441710 + }, + { + "epoch": 0.8923023469095052, + "grad_norm": 18.699125289916992, + "learning_rate": 4.081824082821223e-07, + "loss": 11.9697, + "step": 441720 + }, + { + "epoch": 0.892322547542189, + "grad_norm": 311.6136169433594, + "learning_rate": 4.080442809020774e-07, + "loss": 14.3622, + "step": 441730 + }, + { + "epoch": 0.8923427481748728, + "grad_norm": 170.95773315429688, + "learning_rate": 4.079061759026659e-07, + "loss": 18.422, + "step": 441740 + }, + { + "epoch": 0.8923629488075566, + "grad_norm": 278.82080078125, + "learning_rate": 4.0776809328456455e-07, + "loss": 12.0484, + "step": 441750 + }, + { + "epoch": 0.8923831494402404, + "grad_norm": 412.0937805175781, + "learning_rate": 4.0763003304844395e-07, + "loss": 12.5635, + "step": 441760 + }, + { + "epoch": 0.8924033500729243, + "grad_norm": 340.64300537109375, + "learning_rate": 4.0749199519497686e-07, + "loss": 13.7271, + "step": 441770 + }, + { + "epoch": 0.8924235507056081, + "grad_norm": 208.55242919921875, + "learning_rate": 4.073539797248377e-07, + "loss": 21.6009, + "step": 441780 + }, + { + "epoch": 0.8924437513382919, + "grad_norm": 89.96759033203125, + "learning_rate": 4.0721598663869764e-07, + "loss": 18.0797, + "step": 441790 + }, + { + "epoch": 0.8924639519709757, + "grad_norm": 251.16778564453125, + "learning_rate": 4.0707801593723006e-07, + "loss": 32.1788, + "step": 441800 + }, + { + "epoch": 0.8924841526036595, + "grad_norm": 251.69451904296875, + "learning_rate": 4.069400676211077e-07, + "loss": 20.4469, + "step": 441810 + }, + { + "epoch": 0.8925043532363434, + "grad_norm": 183.2567138671875, + "learning_rate": 4.0680214169100117e-07, + "loss": 15.515, + "step": 441820 + }, + { + "epoch": 0.8925245538690272, + "grad_norm": 547.2921142578125, + "learning_rate": 4.0666423814758436e-07, + "loss": 30.1629, + "step": 441830 + }, + { + "epoch": 0.892544754501711, + "grad_norm": 125.34368896484375, + "learning_rate": 4.065263569915301e-07, + "loss": 16.8468, + "step": 441840 + }, + { + "epoch": 0.8925649551343948, + "grad_norm": 188.44284057617188, + "learning_rate": 4.063884982235078e-07, + "loss": 21.6664, + "step": 441850 + }, + { + "epoch": 0.8925851557670786, + "grad_norm": 150.7265167236328, + "learning_rate": 4.062506618441908e-07, + "loss": 14.6142, + "step": 441860 + }, + { + "epoch": 0.8926053563997625, + "grad_norm": 239.54351806640625, + "learning_rate": 4.06112847854252e-07, + "loss": 30.2596, + "step": 441870 + }, + { + "epoch": 0.8926255570324463, + "grad_norm": 310.3310852050781, + "learning_rate": 4.059750562543618e-07, + "loss": 41.8212, + "step": 441880 + }, + { + "epoch": 0.8926457576651301, + "grad_norm": 230.776611328125, + "learning_rate": 4.05837287045191e-07, + "loss": 12.8379, + "step": 441890 + }, + { + "epoch": 0.8926659582978139, + "grad_norm": 278.3397521972656, + "learning_rate": 4.056995402274122e-07, + "loss": 21.7668, + "step": 441900 + }, + { + "epoch": 0.8926861589304977, + "grad_norm": 269.46240234375, + "learning_rate": 4.0556181580169885e-07, + "loss": 15.8528, + "step": 441910 + }, + { + "epoch": 0.8927063595631816, + "grad_norm": 320.7786865234375, + "learning_rate": 4.054241137687176e-07, + "loss": 22.1815, + "step": 441920 + }, + { + "epoch": 0.8927265601958654, + "grad_norm": 91.73960876464844, + "learning_rate": 4.052864341291418e-07, + "loss": 10.3726, + "step": 441930 + }, + { + "epoch": 0.8927467608285492, + "grad_norm": 195.0965118408203, + "learning_rate": 4.051487768836443e-07, + "loss": 23.2232, + "step": 441940 + }, + { + "epoch": 0.8927669614612329, + "grad_norm": 161.50064086914062, + "learning_rate": 4.0501114203289395e-07, + "loss": 15.2331, + "step": 441950 + }, + { + "epoch": 0.8927871620939167, + "grad_norm": 282.3250427246094, + "learning_rate": 4.048735295775608e-07, + "loss": 17.1713, + "step": 441960 + }, + { + "epoch": 0.8928073627266006, + "grad_norm": 280.8865661621094, + "learning_rate": 4.0473593951831814e-07, + "loss": 11.15, + "step": 441970 + }, + { + "epoch": 0.8928275633592844, + "grad_norm": 420.7433776855469, + "learning_rate": 4.0459837185583497e-07, + "loss": 10.429, + "step": 441980 + }, + { + "epoch": 0.8928477639919682, + "grad_norm": 452.8355712890625, + "learning_rate": 4.044608265907807e-07, + "loss": 26.0573, + "step": 441990 + }, + { + "epoch": 0.892867964624652, + "grad_norm": 143.9297332763672, + "learning_rate": 4.043233037238281e-07, + "loss": 18.3739, + "step": 442000 + }, + { + "epoch": 0.8928881652573358, + "grad_norm": 476.40283203125, + "learning_rate": 4.041858032556456e-07, + "loss": 18.2946, + "step": 442010 + }, + { + "epoch": 0.8929083658900197, + "grad_norm": 242.421630859375, + "learning_rate": 4.040483251869054e-07, + "loss": 13.0868, + "step": 442020 + }, + { + "epoch": 0.8929285665227035, + "grad_norm": 52.56117630004883, + "learning_rate": 4.0391086951827474e-07, + "loss": 31.286, + "step": 442030 + }, + { + "epoch": 0.8929487671553873, + "grad_norm": 228.5247344970703, + "learning_rate": 4.0377343625042587e-07, + "loss": 13.4995, + "step": 442040 + }, + { + "epoch": 0.8929689677880711, + "grad_norm": 114.69009399414062, + "learning_rate": 4.0363602538402823e-07, + "loss": 9.915, + "step": 442050 + }, + { + "epoch": 0.8929891684207549, + "grad_norm": 537.7235107421875, + "learning_rate": 4.034986369197502e-07, + "loss": 26.735, + "step": 442060 + }, + { + "epoch": 0.8930093690534388, + "grad_norm": 307.6634826660156, + "learning_rate": 4.0336127085826294e-07, + "loss": 23.8693, + "step": 442070 + }, + { + "epoch": 0.8930295696861226, + "grad_norm": 304.4604187011719, + "learning_rate": 4.032239272002347e-07, + "loss": 13.2142, + "step": 442080 + }, + { + "epoch": 0.8930497703188064, + "grad_norm": 394.4139099121094, + "learning_rate": 4.030866059463362e-07, + "loss": 25.4436, + "step": 442090 + }, + { + "epoch": 0.8930699709514902, + "grad_norm": 234.47674560546875, + "learning_rate": 4.029493070972362e-07, + "loss": 12.9756, + "step": 442100 + }, + { + "epoch": 0.893090171584174, + "grad_norm": 197.06240844726562, + "learning_rate": 4.0281203065360265e-07, + "loss": 16.0591, + "step": 442110 + }, + { + "epoch": 0.8931103722168579, + "grad_norm": 34.24604415893555, + "learning_rate": 4.026747766161071e-07, + "loss": 16.2573, + "step": 442120 + }, + { + "epoch": 0.8931305728495417, + "grad_norm": 9.292876243591309, + "learning_rate": 4.025375449854163e-07, + "loss": 6.0698, + "step": 442130 + }, + { + "epoch": 0.8931507734822255, + "grad_norm": 363.0008544921875, + "learning_rate": 4.0240033576219974e-07, + "loss": 14.518, + "step": 442140 + }, + { + "epoch": 0.8931709741149093, + "grad_norm": 394.5640869140625, + "learning_rate": 4.022631489471257e-07, + "loss": 20.8416, + "step": 442150 + }, + { + "epoch": 0.8931911747475931, + "grad_norm": 129.547607421875, + "learning_rate": 4.0212598454086596e-07, + "loss": 17.2825, + "step": 442160 + }, + { + "epoch": 0.893211375380277, + "grad_norm": 305.1719970703125, + "learning_rate": 4.019888425440838e-07, + "loss": 12.5452, + "step": 442170 + }, + { + "epoch": 0.8932315760129608, + "grad_norm": 335.4129943847656, + "learning_rate": 4.018517229574509e-07, + "loss": 11.5626, + "step": 442180 + }, + { + "epoch": 0.8932517766456446, + "grad_norm": 311.81048583984375, + "learning_rate": 4.0171462578163624e-07, + "loss": 17.4888, + "step": 442190 + }, + { + "epoch": 0.8932719772783284, + "grad_norm": 236.4404296875, + "learning_rate": 4.0157755101730645e-07, + "loss": 14.4155, + "step": 442200 + }, + { + "epoch": 0.8932921779110121, + "grad_norm": 326.996337890625, + "learning_rate": 4.014404986651288e-07, + "loss": 18.2013, + "step": 442210 + }, + { + "epoch": 0.8933123785436959, + "grad_norm": 352.307373046875, + "learning_rate": 4.013034687257727e-07, + "loss": 22.2774, + "step": 442220 + }, + { + "epoch": 0.8933325791763798, + "grad_norm": 277.7940673828125, + "learning_rate": 4.011664611999072e-07, + "loss": 30.8169, + "step": 442230 + }, + { + "epoch": 0.8933527798090636, + "grad_norm": 538.7009887695312, + "learning_rate": 4.010294760881972e-07, + "loss": 24.6857, + "step": 442240 + }, + { + "epoch": 0.8933729804417474, + "grad_norm": 176.78065490722656, + "learning_rate": 4.0089251339131164e-07, + "loss": 20.1488, + "step": 442250 + }, + { + "epoch": 0.8933931810744312, + "grad_norm": 3.870227336883545, + "learning_rate": 4.0075557310991886e-07, + "loss": 17.3812, + "step": 442260 + }, + { + "epoch": 0.893413381707115, + "grad_norm": 439.2266845703125, + "learning_rate": 4.006186552446861e-07, + "loss": 16.0918, + "step": 442270 + }, + { + "epoch": 0.8934335823397989, + "grad_norm": 258.6006774902344, + "learning_rate": 4.00481759796279e-07, + "loss": 21.6943, + "step": 442280 + }, + { + "epoch": 0.8934537829724827, + "grad_norm": 341.0641174316406, + "learning_rate": 4.003448867653664e-07, + "loss": 18.1336, + "step": 442290 + }, + { + "epoch": 0.8934739836051665, + "grad_norm": 437.7100830078125, + "learning_rate": 4.002080361526156e-07, + "loss": 15.1572, + "step": 442300 + }, + { + "epoch": 0.8934941842378503, + "grad_norm": 347.60882568359375, + "learning_rate": 4.000712079586916e-07, + "loss": 16.401, + "step": 442310 + }, + { + "epoch": 0.8935143848705341, + "grad_norm": 480.7490539550781, + "learning_rate": 3.999344021842627e-07, + "loss": 24.8384, + "step": 442320 + }, + { + "epoch": 0.893534585503218, + "grad_norm": 192.32810974121094, + "learning_rate": 3.997976188299968e-07, + "loss": 18.4758, + "step": 442330 + }, + { + "epoch": 0.8935547861359018, + "grad_norm": 847.3984375, + "learning_rate": 3.996608578965594e-07, + "loss": 22.622, + "step": 442340 + }, + { + "epoch": 0.8935749867685856, + "grad_norm": 223.96299743652344, + "learning_rate": 3.9952411938461557e-07, + "loss": 18.4748, + "step": 442350 + }, + { + "epoch": 0.8935951874012694, + "grad_norm": 212.14646911621094, + "learning_rate": 3.9938740329483473e-07, + "loss": 18.2735, + "step": 442360 + }, + { + "epoch": 0.8936153880339532, + "grad_norm": 359.57940673828125, + "learning_rate": 3.992507096278814e-07, + "loss": 25.5224, + "step": 442370 + }, + { + "epoch": 0.8936355886666371, + "grad_norm": 271.06353759765625, + "learning_rate": 3.991140383844211e-07, + "loss": 9.497, + "step": 442380 + }, + { + "epoch": 0.8936557892993209, + "grad_norm": 422.99774169921875, + "learning_rate": 3.989773895651222e-07, + "loss": 16.8614, + "step": 442390 + }, + { + "epoch": 0.8936759899320047, + "grad_norm": 337.3374938964844, + "learning_rate": 3.9884076317064813e-07, + "loss": 20.38, + "step": 442400 + }, + { + "epoch": 0.8936961905646885, + "grad_norm": 299.5712890625, + "learning_rate": 3.9870415920166715e-07, + "loss": 16.9914, + "step": 442410 + }, + { + "epoch": 0.8937163911973723, + "grad_norm": 232.5764617919922, + "learning_rate": 3.9856757765884436e-07, + "loss": 10.033, + "step": 442420 + }, + { + "epoch": 0.8937365918300562, + "grad_norm": 7.127708435058594, + "learning_rate": 3.984310185428442e-07, + "loss": 31.2197, + "step": 442430 + }, + { + "epoch": 0.89375679246274, + "grad_norm": 354.21075439453125, + "learning_rate": 3.9829448185433385e-07, + "loss": 26.9723, + "step": 442440 + }, + { + "epoch": 0.8937769930954238, + "grad_norm": 299.904541015625, + "learning_rate": 3.9815796759397783e-07, + "loss": 8.4131, + "step": 442450 + }, + { + "epoch": 0.8937971937281076, + "grad_norm": 0.0, + "learning_rate": 3.980214757624412e-07, + "loss": 18.6585, + "step": 442460 + }, + { + "epoch": 0.8938173943607913, + "grad_norm": 162.68905639648438, + "learning_rate": 3.978850063603895e-07, + "loss": 16.8996, + "step": 442470 + }, + { + "epoch": 0.8938375949934751, + "grad_norm": 384.43865966796875, + "learning_rate": 3.977485593884889e-07, + "loss": 26.5161, + "step": 442480 + }, + { + "epoch": 0.893857795626159, + "grad_norm": 272.01507568359375, + "learning_rate": 3.9761213484740435e-07, + "loss": 19.0648, + "step": 442490 + }, + { + "epoch": 0.8938779962588428, + "grad_norm": 419.2401123046875, + "learning_rate": 3.9747573273779816e-07, + "loss": 14.2595, + "step": 442500 + }, + { + "epoch": 0.8938981968915266, + "grad_norm": 171.304443359375, + "learning_rate": 3.9733935306033756e-07, + "loss": 16.8611, + "step": 442510 + }, + { + "epoch": 0.8939183975242104, + "grad_norm": 401.70294189453125, + "learning_rate": 3.9720299581568865e-07, + "loss": 23.067, + "step": 442520 + }, + { + "epoch": 0.8939385981568942, + "grad_norm": 301.2441711425781, + "learning_rate": 3.970666610045121e-07, + "loss": 15.8445, + "step": 442530 + }, + { + "epoch": 0.8939587987895781, + "grad_norm": 430.2862548828125, + "learning_rate": 3.969303486274745e-07, + "loss": 14.9746, + "step": 442540 + }, + { + "epoch": 0.8939789994222619, + "grad_norm": 252.0267791748047, + "learning_rate": 3.967940586852409e-07, + "loss": 11.8626, + "step": 442550 + }, + { + "epoch": 0.8939992000549457, + "grad_norm": 84.86177062988281, + "learning_rate": 3.966577911784747e-07, + "loss": 19.9149, + "step": 442560 + }, + { + "epoch": 0.8940194006876295, + "grad_norm": 500.92437744140625, + "learning_rate": 3.965215461078392e-07, + "loss": 15.9517, + "step": 442570 + }, + { + "epoch": 0.8940396013203133, + "grad_norm": 490.483642578125, + "learning_rate": 3.963853234740006e-07, + "loss": 20.9653, + "step": 442580 + }, + { + "epoch": 0.8940598019529972, + "grad_norm": 330.5580749511719, + "learning_rate": 3.962491232776211e-07, + "loss": 18.0036, + "step": 442590 + }, + { + "epoch": 0.894080002585681, + "grad_norm": 343.3367919921875, + "learning_rate": 3.961129455193641e-07, + "loss": 32.6233, + "step": 442600 + }, + { + "epoch": 0.8941002032183648, + "grad_norm": 118.35514068603516, + "learning_rate": 3.959767901998957e-07, + "loss": 12.4153, + "step": 442610 + }, + { + "epoch": 0.8941204038510486, + "grad_norm": 51.02582931518555, + "learning_rate": 3.958406573198764e-07, + "loss": 16.2508, + "step": 442620 + }, + { + "epoch": 0.8941406044837324, + "grad_norm": 259.5372314453125, + "learning_rate": 3.957045468799725e-07, + "loss": 20.5187, + "step": 442630 + }, + { + "epoch": 0.8941608051164163, + "grad_norm": 215.82168579101562, + "learning_rate": 3.955684588808456e-07, + "loss": 31.4842, + "step": 442640 + }, + { + "epoch": 0.8941810057491001, + "grad_norm": 117.93922424316406, + "learning_rate": 3.954323933231602e-07, + "loss": 22.4197, + "step": 442650 + }, + { + "epoch": 0.8942012063817839, + "grad_norm": 405.8056945800781, + "learning_rate": 3.952963502075791e-07, + "loss": 22.5982, + "step": 442660 + }, + { + "epoch": 0.8942214070144677, + "grad_norm": 454.1044616699219, + "learning_rate": 3.951603295347639e-07, + "loss": 18.579, + "step": 442670 + }, + { + "epoch": 0.8942416076471515, + "grad_norm": 3.49399995803833, + "learning_rate": 3.9502433130537977e-07, + "loss": 11.4103, + "step": 442680 + }, + { + "epoch": 0.8942618082798354, + "grad_norm": 365.5223693847656, + "learning_rate": 3.9488835552008773e-07, + "loss": 18.178, + "step": 442690 + }, + { + "epoch": 0.8942820089125192, + "grad_norm": 42.808685302734375, + "learning_rate": 3.947524021795518e-07, + "loss": 17.3722, + "step": 442700 + }, + { + "epoch": 0.894302209545203, + "grad_norm": 162.291259765625, + "learning_rate": 3.946164712844347e-07, + "loss": 11.6861, + "step": 442710 + }, + { + "epoch": 0.8943224101778867, + "grad_norm": 207.251220703125, + "learning_rate": 3.9448056283539704e-07, + "loss": 18.4137, + "step": 442720 + }, + { + "epoch": 0.8943426108105705, + "grad_norm": 285.8092346191406, + "learning_rate": 3.9434467683310327e-07, + "loss": 12.0819, + "step": 442730 + }, + { + "epoch": 0.8943628114432544, + "grad_norm": 236.7385711669922, + "learning_rate": 3.942088132782157e-07, + "loss": 17.1582, + "step": 442740 + }, + { + "epoch": 0.8943830120759382, + "grad_norm": 283.231689453125, + "learning_rate": 3.9407297217139427e-07, + "loss": 14.0678, + "step": 442750 + }, + { + "epoch": 0.894403212708622, + "grad_norm": 245.2840576171875, + "learning_rate": 3.9393715351330243e-07, + "loss": 14.1923, + "step": 442760 + }, + { + "epoch": 0.8944234133413058, + "grad_norm": 167.3682861328125, + "learning_rate": 3.9380135730460347e-07, + "loss": 11.5076, + "step": 442770 + }, + { + "epoch": 0.8944436139739896, + "grad_norm": 377.32257080078125, + "learning_rate": 3.9366558354595797e-07, + "loss": 24.7849, + "step": 442780 + }, + { + "epoch": 0.8944638146066735, + "grad_norm": 106.98843383789062, + "learning_rate": 3.935298322380271e-07, + "loss": 9.0127, + "step": 442790 + }, + { + "epoch": 0.8944840152393573, + "grad_norm": 496.2410583496094, + "learning_rate": 3.9339410338147363e-07, + "loss": 14.5842, + "step": 442800 + }, + { + "epoch": 0.8945042158720411, + "grad_norm": 261.22564697265625, + "learning_rate": 3.9325839697695877e-07, + "loss": 11.0385, + "step": 442810 + }, + { + "epoch": 0.8945244165047249, + "grad_norm": 474.54119873046875, + "learning_rate": 3.931227130251425e-07, + "loss": 16.6529, + "step": 442820 + }, + { + "epoch": 0.8945446171374087, + "grad_norm": 47.422218322753906, + "learning_rate": 3.929870515266876e-07, + "loss": 20.1841, + "step": 442830 + }, + { + "epoch": 0.8945648177700926, + "grad_norm": 98.60272216796875, + "learning_rate": 3.928514124822569e-07, + "loss": 16.4046, + "step": 442840 + }, + { + "epoch": 0.8945850184027764, + "grad_norm": 25.00154685974121, + "learning_rate": 3.9271579589250817e-07, + "loss": 26.1975, + "step": 442850 + }, + { + "epoch": 0.8946052190354602, + "grad_norm": 351.44476318359375, + "learning_rate": 3.925802017581032e-07, + "loss": 14.8405, + "step": 442860 + }, + { + "epoch": 0.894625419668144, + "grad_norm": 233.4315643310547, + "learning_rate": 3.924446300797052e-07, + "loss": 11.589, + "step": 442870 + }, + { + "epoch": 0.8946456203008278, + "grad_norm": 166.7718048095703, + "learning_rate": 3.923090808579727e-07, + "loss": 12.9134, + "step": 442880 + }, + { + "epoch": 0.8946658209335117, + "grad_norm": 341.9676818847656, + "learning_rate": 3.9217355409356614e-07, + "loss": 26.3224, + "step": 442890 + }, + { + "epoch": 0.8946860215661955, + "grad_norm": 256.1241455078125, + "learning_rate": 3.920380497871473e-07, + "loss": 14.5901, + "step": 442900 + }, + { + "epoch": 0.8947062221988793, + "grad_norm": 338.17987060546875, + "learning_rate": 3.9190256793937675e-07, + "loss": 20.7812, + "step": 442910 + }, + { + "epoch": 0.8947264228315631, + "grad_norm": 214.90966796875, + "learning_rate": 3.9176710855091283e-07, + "loss": 10.1591, + "step": 442920 + }, + { + "epoch": 0.8947466234642469, + "grad_norm": 208.99169921875, + "learning_rate": 3.916316716224172e-07, + "loss": 17.3368, + "step": 442930 + }, + { + "epoch": 0.8947668240969308, + "grad_norm": 226.1060791015625, + "learning_rate": 3.9149625715455107e-07, + "loss": 20.4121, + "step": 442940 + }, + { + "epoch": 0.8947870247296146, + "grad_norm": 94.91813659667969, + "learning_rate": 3.913608651479733e-07, + "loss": 25.107, + "step": 442950 + }, + { + "epoch": 0.8948072253622984, + "grad_norm": 754.3956909179688, + "learning_rate": 3.912254956033423e-07, + "loss": 20.8499, + "step": 442960 + }, + { + "epoch": 0.8948274259949822, + "grad_norm": 310.0697021484375, + "learning_rate": 3.9109014852132035e-07, + "loss": 11.3727, + "step": 442970 + }, + { + "epoch": 0.8948476266276659, + "grad_norm": 413.1166687011719, + "learning_rate": 3.9095482390256624e-07, + "loss": 16.3572, + "step": 442980 + }, + { + "epoch": 0.8948678272603497, + "grad_norm": 380.8415832519531, + "learning_rate": 3.908195217477384e-07, + "loss": 18.5484, + "step": 442990 + }, + { + "epoch": 0.8948880278930336, + "grad_norm": 485.34539794921875, + "learning_rate": 3.90684242057498e-07, + "loss": 32.9813, + "step": 443000 + }, + { + "epoch": 0.8949082285257174, + "grad_norm": 279.1822814941406, + "learning_rate": 3.9054898483250224e-07, + "loss": 14.1602, + "step": 443010 + }, + { + "epoch": 0.8949284291584012, + "grad_norm": 396.0739440917969, + "learning_rate": 3.904137500734129e-07, + "loss": 22.0797, + "step": 443020 + }, + { + "epoch": 0.894948629791085, + "grad_norm": 409.1693115234375, + "learning_rate": 3.902785377808882e-07, + "loss": 26.4177, + "step": 443030 + }, + { + "epoch": 0.8949688304237688, + "grad_norm": 165.26966857910156, + "learning_rate": 3.901433479555855e-07, + "loss": 19.7893, + "step": 443040 + }, + { + "epoch": 0.8949890310564527, + "grad_norm": 278.30572509765625, + "learning_rate": 3.9000818059816593e-07, + "loss": 24.8908, + "step": 443050 + }, + { + "epoch": 0.8950092316891365, + "grad_norm": 140.10414123535156, + "learning_rate": 3.898730357092878e-07, + "loss": 20.9508, + "step": 443060 + }, + { + "epoch": 0.8950294323218203, + "grad_norm": 583.90380859375, + "learning_rate": 3.8973791328960786e-07, + "loss": 17.115, + "step": 443070 + }, + { + "epoch": 0.8950496329545041, + "grad_norm": 178.4078369140625, + "learning_rate": 3.8960281333978667e-07, + "loss": 18.1718, + "step": 443080 + }, + { + "epoch": 0.895069833587188, + "grad_norm": 96.53193664550781, + "learning_rate": 3.894677358604826e-07, + "loss": 13.6481, + "step": 443090 + }, + { + "epoch": 0.8950900342198718, + "grad_norm": 390.45806884765625, + "learning_rate": 3.89332680852354e-07, + "loss": 17.0324, + "step": 443100 + }, + { + "epoch": 0.8951102348525556, + "grad_norm": 155.85145568847656, + "learning_rate": 3.8919764831605754e-07, + "loss": 9.2945, + "step": 443110 + }, + { + "epoch": 0.8951304354852394, + "grad_norm": 243.15550231933594, + "learning_rate": 3.890626382522539e-07, + "loss": 21.3716, + "step": 443120 + }, + { + "epoch": 0.8951506361179232, + "grad_norm": 902.5233764648438, + "learning_rate": 3.889276506615991e-07, + "loss": 14.31, + "step": 443130 + }, + { + "epoch": 0.895170836750607, + "grad_norm": 323.9816589355469, + "learning_rate": 3.88792685544751e-07, + "loss": 23.5845, + "step": 443140 + }, + { + "epoch": 0.8951910373832909, + "grad_norm": 144.73451232910156, + "learning_rate": 3.88657742902368e-07, + "loss": 12.5715, + "step": 443150 + }, + { + "epoch": 0.8952112380159747, + "grad_norm": 317.0386657714844, + "learning_rate": 3.88522822735109e-07, + "loss": 28.2326, + "step": 443160 + }, + { + "epoch": 0.8952314386486585, + "grad_norm": 189.03042602539062, + "learning_rate": 3.8838792504363066e-07, + "loss": 5.949, + "step": 443170 + }, + { + "epoch": 0.8952516392813423, + "grad_norm": 712.6046142578125, + "learning_rate": 3.882530498285886e-07, + "loss": 15.2152, + "step": 443180 + }, + { + "epoch": 0.8952718399140261, + "grad_norm": 190.4026641845703, + "learning_rate": 3.8811819709064336e-07, + "loss": 12.2439, + "step": 443190 + }, + { + "epoch": 0.89529204054671, + "grad_norm": 193.0666046142578, + "learning_rate": 3.879833668304506e-07, + "loss": 27.4535, + "step": 443200 + }, + { + "epoch": 0.8953122411793938, + "grad_norm": 254.59495544433594, + "learning_rate": 3.8784855904866637e-07, + "loss": 14.8993, + "step": 443210 + }, + { + "epoch": 0.8953324418120776, + "grad_norm": 100.77217864990234, + "learning_rate": 3.877137737459502e-07, + "loss": 11.718, + "step": 443220 + }, + { + "epoch": 0.8953526424447613, + "grad_norm": 303.43316650390625, + "learning_rate": 3.875790109229566e-07, + "loss": 26.1313, + "step": 443230 + }, + { + "epoch": 0.8953728430774451, + "grad_norm": 54.86558532714844, + "learning_rate": 3.8744427058034384e-07, + "loss": 13.5601, + "step": 443240 + }, + { + "epoch": 0.895393043710129, + "grad_norm": 377.93890380859375, + "learning_rate": 3.8730955271876813e-07, + "loss": 9.8107, + "step": 443250 + }, + { + "epoch": 0.8954132443428128, + "grad_norm": 8.372718811035156, + "learning_rate": 3.871748573388867e-07, + "loss": 20.0752, + "step": 443260 + }, + { + "epoch": 0.8954334449754966, + "grad_norm": 473.22430419921875, + "learning_rate": 3.870401844413557e-07, + "loss": 19.557, + "step": 443270 + }, + { + "epoch": 0.8954536456081804, + "grad_norm": 374.2972106933594, + "learning_rate": 3.8690553402683015e-07, + "loss": 20.0601, + "step": 443280 + }, + { + "epoch": 0.8954738462408642, + "grad_norm": 256.11004638671875, + "learning_rate": 3.86770906095969e-07, + "loss": 20.1961, + "step": 443290 + }, + { + "epoch": 0.895494046873548, + "grad_norm": 355.7103576660156, + "learning_rate": 3.866363006494256e-07, + "loss": 17.2464, + "step": 443300 + }, + { + "epoch": 0.8955142475062319, + "grad_norm": 207.41500854492188, + "learning_rate": 3.8650171768785826e-07, + "loss": 28.9698, + "step": 443310 + }, + { + "epoch": 0.8955344481389157, + "grad_norm": 378.89422607421875, + "learning_rate": 3.863671572119221e-07, + "loss": 11.5894, + "step": 443320 + }, + { + "epoch": 0.8955546487715995, + "grad_norm": 364.7547912597656, + "learning_rate": 3.8623261922227204e-07, + "loss": 26.4094, + "step": 443330 + }, + { + "epoch": 0.8955748494042833, + "grad_norm": 231.82345581054688, + "learning_rate": 3.8609810371956544e-07, + "loss": 9.4566, + "step": 443340 + }, + { + "epoch": 0.8955950500369672, + "grad_norm": 21.13140106201172, + "learning_rate": 3.859636107044573e-07, + "loss": 23.3379, + "step": 443350 + }, + { + "epoch": 0.895615250669651, + "grad_norm": 290.96771240234375, + "learning_rate": 3.8582914017760154e-07, + "loss": 20.6907, + "step": 443360 + }, + { + "epoch": 0.8956354513023348, + "grad_norm": 441.9970703125, + "learning_rate": 3.856946921396554e-07, + "loss": 14.0148, + "step": 443370 + }, + { + "epoch": 0.8956556519350186, + "grad_norm": 223.53692626953125, + "learning_rate": 3.8556026659127445e-07, + "loss": 8.2351, + "step": 443380 + }, + { + "epoch": 0.8956758525677024, + "grad_norm": 229.99073791503906, + "learning_rate": 3.8542586353311264e-07, + "loss": 9.8784, + "step": 443390 + }, + { + "epoch": 0.8956960532003863, + "grad_norm": 234.25892639160156, + "learning_rate": 3.85291482965825e-07, + "loss": 15.4107, + "step": 443400 + }, + { + "epoch": 0.8957162538330701, + "grad_norm": 431.2823486328125, + "learning_rate": 3.851571248900676e-07, + "loss": 15.1119, + "step": 443410 + }, + { + "epoch": 0.8957364544657539, + "grad_norm": 353.539794921875, + "learning_rate": 3.8502278930649506e-07, + "loss": 19.6273, + "step": 443420 + }, + { + "epoch": 0.8957566550984377, + "grad_norm": 141.57339477539062, + "learning_rate": 3.8488847621576066e-07, + "loss": 8.7348, + "step": 443430 + }, + { + "epoch": 0.8957768557311215, + "grad_norm": 66.6593017578125, + "learning_rate": 3.8475418561851996e-07, + "loss": 45.1004, + "step": 443440 + }, + { + "epoch": 0.8957970563638054, + "grad_norm": 424.2586975097656, + "learning_rate": 3.846199175154297e-07, + "loss": 16.1736, + "step": 443450 + }, + { + "epoch": 0.8958172569964892, + "grad_norm": 98.96004486083984, + "learning_rate": 3.8448567190713993e-07, + "loss": 20.4798, + "step": 443460 + }, + { + "epoch": 0.895837457629173, + "grad_norm": 177.68402099609375, + "learning_rate": 3.843514487943079e-07, + "loss": 18.4954, + "step": 443470 + }, + { + "epoch": 0.8958576582618568, + "grad_norm": 93.97421264648438, + "learning_rate": 3.8421724817758745e-07, + "loss": 19.7879, + "step": 443480 + }, + { + "epoch": 0.8958778588945405, + "grad_norm": 300.3408203125, + "learning_rate": 3.84083070057632e-07, + "loss": 23.0815, + "step": 443490 + }, + { + "epoch": 0.8958980595272243, + "grad_norm": 266.1869201660156, + "learning_rate": 3.8394891443509554e-07, + "loss": 6.7015, + "step": 443500 + }, + { + "epoch": 0.8959182601599082, + "grad_norm": 319.9082946777344, + "learning_rate": 3.83814781310633e-07, + "loss": 17.6728, + "step": 443510 + }, + { + "epoch": 0.895938460792592, + "grad_norm": 24.413856506347656, + "learning_rate": 3.8368067068489724e-07, + "loss": 17.9294, + "step": 443520 + }, + { + "epoch": 0.8959586614252758, + "grad_norm": 230.49240112304688, + "learning_rate": 3.8354658255854105e-07, + "loss": 18.6986, + "step": 443530 + }, + { + "epoch": 0.8959788620579596, + "grad_norm": 369.7834777832031, + "learning_rate": 3.8341251693221893e-07, + "loss": 16.3494, + "step": 443540 + }, + { + "epoch": 0.8959990626906434, + "grad_norm": 257.3564758300781, + "learning_rate": 3.832784738065853e-07, + "loss": 19.3482, + "step": 443550 + }, + { + "epoch": 0.8960192633233273, + "grad_norm": 280.5361633300781, + "learning_rate": 3.83144453182292e-07, + "loss": 8.8472, + "step": 443560 + }, + { + "epoch": 0.8960394639560111, + "grad_norm": 549.4288940429688, + "learning_rate": 3.830104550599922e-07, + "loss": 16.389, + "step": 443570 + }, + { + "epoch": 0.8960596645886949, + "grad_norm": 44.95626449584961, + "learning_rate": 3.8287647944034054e-07, + "loss": 11.7476, + "step": 443580 + }, + { + "epoch": 0.8960798652213787, + "grad_norm": 93.65108489990234, + "learning_rate": 3.827425263239887e-07, + "loss": 14.6472, + "step": 443590 + }, + { + "epoch": 0.8961000658540625, + "grad_norm": 436.2303466796875, + "learning_rate": 3.8260859571158883e-07, + "loss": 22.2924, + "step": 443600 + }, + { + "epoch": 0.8961202664867464, + "grad_norm": 212.3765411376953, + "learning_rate": 3.824746876037955e-07, + "loss": 14.0277, + "step": 443610 + }, + { + "epoch": 0.8961404671194302, + "grad_norm": 432.84234619140625, + "learning_rate": 3.8234080200125977e-07, + "loss": 17.49, + "step": 443620 + }, + { + "epoch": 0.896160667752114, + "grad_norm": 166.82823181152344, + "learning_rate": 3.822069389046357e-07, + "loss": 20.0879, + "step": 443630 + }, + { + "epoch": 0.8961808683847978, + "grad_norm": 292.7633056640625, + "learning_rate": 3.8207309831457485e-07, + "loss": 12.7529, + "step": 443640 + }, + { + "epoch": 0.8962010690174816, + "grad_norm": 156.07223510742188, + "learning_rate": 3.8193928023172897e-07, + "loss": 28.1126, + "step": 443650 + }, + { + "epoch": 0.8962212696501655, + "grad_norm": 218.61766052246094, + "learning_rate": 3.818054846567515e-07, + "loss": 16.0304, + "step": 443660 + }, + { + "epoch": 0.8962414702828493, + "grad_norm": 225.5504150390625, + "learning_rate": 3.8167171159029405e-07, + "loss": 16.0139, + "step": 443670 + }, + { + "epoch": 0.8962616709155331, + "grad_norm": 214.22952270507812, + "learning_rate": 3.815379610330078e-07, + "loss": 14.4313, + "step": 443680 + }, + { + "epoch": 0.8962818715482169, + "grad_norm": 270.0863037109375, + "learning_rate": 3.814042329855455e-07, + "loss": 17.5856, + "step": 443690 + }, + { + "epoch": 0.8963020721809007, + "grad_norm": 436.82843017578125, + "learning_rate": 3.812705274485595e-07, + "loss": 19.8155, + "step": 443700 + }, + { + "epoch": 0.8963222728135846, + "grad_norm": 393.1731262207031, + "learning_rate": 3.811368444227009e-07, + "loss": 17.6372, + "step": 443710 + }, + { + "epoch": 0.8963424734462684, + "grad_norm": 292.5810241699219, + "learning_rate": 3.8100318390862033e-07, + "loss": 11.1767, + "step": 443720 + }, + { + "epoch": 0.8963626740789522, + "grad_norm": 188.14743041992188, + "learning_rate": 3.8086954590697057e-07, + "loss": 15.2354, + "step": 443730 + }, + { + "epoch": 0.8963828747116359, + "grad_norm": 292.0819396972656, + "learning_rate": 3.8073593041840274e-07, + "loss": 40.1812, + "step": 443740 + }, + { + "epoch": 0.8964030753443197, + "grad_norm": 382.9385070800781, + "learning_rate": 3.8060233744356634e-07, + "loss": 16.6645, + "step": 443750 + }, + { + "epoch": 0.8964232759770036, + "grad_norm": 17.047842025756836, + "learning_rate": 3.804687669831142e-07, + "loss": 12.4839, + "step": 443760 + }, + { + "epoch": 0.8964434766096874, + "grad_norm": 199.1194610595703, + "learning_rate": 3.80335219037698e-07, + "loss": 15.6125, + "step": 443770 + }, + { + "epoch": 0.8964636772423712, + "grad_norm": 389.44268798828125, + "learning_rate": 3.802016936079678e-07, + "loss": 22.794, + "step": 443780 + }, + { + "epoch": 0.896483877875055, + "grad_norm": 181.8019561767578, + "learning_rate": 3.8006819069457304e-07, + "loss": 20.7453, + "step": 443790 + }, + { + "epoch": 0.8965040785077388, + "grad_norm": 171.97613525390625, + "learning_rate": 3.7993471029816653e-07, + "loss": 10.4415, + "step": 443800 + }, + { + "epoch": 0.8965242791404227, + "grad_norm": 272.59613037109375, + "learning_rate": 3.798012524193978e-07, + "loss": 30.8633, + "step": 443810 + }, + { + "epoch": 0.8965444797731065, + "grad_norm": 126.36026000976562, + "learning_rate": 3.7966781705891684e-07, + "loss": 22.1035, + "step": 443820 + }, + { + "epoch": 0.8965646804057903, + "grad_norm": 564.8611450195312, + "learning_rate": 3.7953440421737433e-07, + "loss": 18.0288, + "step": 443830 + }, + { + "epoch": 0.8965848810384741, + "grad_norm": 155.26641845703125, + "learning_rate": 3.794010138954213e-07, + "loss": 19.7711, + "step": 443840 + }, + { + "epoch": 0.8966050816711579, + "grad_norm": 631.1908569335938, + "learning_rate": 3.792676460937078e-07, + "loss": 23.0908, + "step": 443850 + }, + { + "epoch": 0.8966252823038418, + "grad_norm": 11.287824630737305, + "learning_rate": 3.791343008128823e-07, + "loss": 11.334, + "step": 443860 + }, + { + "epoch": 0.8966454829365256, + "grad_norm": 18.347753524780273, + "learning_rate": 3.790009780535969e-07, + "loss": 18.2939, + "step": 443870 + }, + { + "epoch": 0.8966656835692094, + "grad_norm": 135.71646118164062, + "learning_rate": 3.7886767781650016e-07, + "loss": 23.7491, + "step": 443880 + }, + { + "epoch": 0.8966858842018932, + "grad_norm": 137.08241271972656, + "learning_rate": 3.787344001022408e-07, + "loss": 11.8424, + "step": 443890 + }, + { + "epoch": 0.896706084834577, + "grad_norm": 235.2169647216797, + "learning_rate": 3.7860114491147017e-07, + "loss": 15.3706, + "step": 443900 + }, + { + "epoch": 0.8967262854672609, + "grad_norm": 170.87989807128906, + "learning_rate": 3.784679122448365e-07, + "loss": 28.9176, + "step": 443910 + }, + { + "epoch": 0.8967464860999447, + "grad_norm": 7.418533802032471, + "learning_rate": 3.783347021029904e-07, + "loss": 10.0521, + "step": 443920 + }, + { + "epoch": 0.8967666867326285, + "grad_norm": 32.72330856323242, + "learning_rate": 3.782015144865808e-07, + "loss": 17.3912, + "step": 443930 + }, + { + "epoch": 0.8967868873653123, + "grad_norm": 132.7852325439453, + "learning_rate": 3.780683493962556e-07, + "loss": 12.4034, + "step": 443940 + }, + { + "epoch": 0.8968070879979961, + "grad_norm": 128.07769775390625, + "learning_rate": 3.779352068326653e-07, + "loss": 9.9278, + "step": 443950 + }, + { + "epoch": 0.89682728863068, + "grad_norm": 809.1480102539062, + "learning_rate": 3.7780208679645826e-07, + "loss": 24.2736, + "step": 443960 + }, + { + "epoch": 0.8968474892633638, + "grad_norm": 35.31715774536133, + "learning_rate": 3.776689892882823e-07, + "loss": 20.8586, + "step": 443970 + }, + { + "epoch": 0.8968676898960476, + "grad_norm": 446.0987854003906, + "learning_rate": 3.77535914308787e-07, + "loss": 16.3148, + "step": 443980 + }, + { + "epoch": 0.8968878905287314, + "grad_norm": 262.0650939941406, + "learning_rate": 3.774028618586217e-07, + "loss": 27.6167, + "step": 443990 + }, + { + "epoch": 0.8969080911614151, + "grad_norm": 382.62713623046875, + "learning_rate": 3.772698319384349e-07, + "loss": 12.7364, + "step": 444000 + }, + { + "epoch": 0.8969282917940989, + "grad_norm": 45.584442138671875, + "learning_rate": 3.7713682454887266e-07, + "loss": 10.9779, + "step": 444010 + }, + { + "epoch": 0.8969484924267828, + "grad_norm": 226.5741424560547, + "learning_rate": 3.770038396905862e-07, + "loss": 13.4693, + "step": 444020 + }, + { + "epoch": 0.8969686930594666, + "grad_norm": 198.7794952392578, + "learning_rate": 3.768708773642221e-07, + "loss": 16.6698, + "step": 444030 + }, + { + "epoch": 0.8969888936921504, + "grad_norm": 230.5293731689453, + "learning_rate": 3.767379375704278e-07, + "loss": 15.721, + "step": 444040 + }, + { + "epoch": 0.8970090943248342, + "grad_norm": 85.8768081665039, + "learning_rate": 3.7660502030985203e-07, + "loss": 13.5907, + "step": 444050 + }, + { + "epoch": 0.897029294957518, + "grad_norm": 278.4578552246094, + "learning_rate": 3.7647212558314493e-07, + "loss": 10.7476, + "step": 444060 + }, + { + "epoch": 0.8970494955902019, + "grad_norm": 248.03053283691406, + "learning_rate": 3.7633925339094936e-07, + "loss": 4.6229, + "step": 444070 + }, + { + "epoch": 0.8970696962228857, + "grad_norm": 298.5616455078125, + "learning_rate": 3.762064037339158e-07, + "loss": 26.9045, + "step": 444080 + }, + { + "epoch": 0.8970898968555695, + "grad_norm": 288.4778747558594, + "learning_rate": 3.760735766126927e-07, + "loss": 17.8781, + "step": 444090 + }, + { + "epoch": 0.8971100974882533, + "grad_norm": 234.10653686523438, + "learning_rate": 3.759407720279257e-07, + "loss": 15.303, + "step": 444100 + }, + { + "epoch": 0.8971302981209371, + "grad_norm": 507.3984680175781, + "learning_rate": 3.758079899802619e-07, + "loss": 18.7645, + "step": 444110 + }, + { + "epoch": 0.897150498753621, + "grad_norm": 251.6602020263672, + "learning_rate": 3.756752304703498e-07, + "loss": 18.8491, + "step": 444120 + }, + { + "epoch": 0.8971706993863048, + "grad_norm": 217.68797302246094, + "learning_rate": 3.755424934988355e-07, + "loss": 9.1092, + "step": 444130 + }, + { + "epoch": 0.8971909000189886, + "grad_norm": 35.48928451538086, + "learning_rate": 3.7540977906636576e-07, + "loss": 9.8827, + "step": 444140 + }, + { + "epoch": 0.8972111006516724, + "grad_norm": 197.29443359375, + "learning_rate": 3.752770871735878e-07, + "loss": 10.2756, + "step": 444150 + }, + { + "epoch": 0.8972313012843562, + "grad_norm": 134.35777282714844, + "learning_rate": 3.751444178211494e-07, + "loss": 8.3943, + "step": 444160 + }, + { + "epoch": 0.8972515019170401, + "grad_norm": 148.85157775878906, + "learning_rate": 3.7501177100969566e-07, + "loss": 8.8188, + "step": 444170 + }, + { + "epoch": 0.8972717025497239, + "grad_norm": 383.755615234375, + "learning_rate": 3.748791467398732e-07, + "loss": 12.4341, + "step": 444180 + }, + { + "epoch": 0.8972919031824077, + "grad_norm": 566.0632934570312, + "learning_rate": 3.747465450123294e-07, + "loss": 23.8997, + "step": 444190 + }, + { + "epoch": 0.8973121038150915, + "grad_norm": 174.40255737304688, + "learning_rate": 3.7461396582771035e-07, + "loss": 19.4514, + "step": 444200 + }, + { + "epoch": 0.8973323044477753, + "grad_norm": 217.28640747070312, + "learning_rate": 3.744814091866605e-07, + "loss": 22.4372, + "step": 444210 + }, + { + "epoch": 0.8973525050804592, + "grad_norm": 301.05712890625, + "learning_rate": 3.7434887508982886e-07, + "loss": 16.4227, + "step": 444220 + }, + { + "epoch": 0.897372705713143, + "grad_norm": 95.06523132324219, + "learning_rate": 3.7421636353785815e-07, + "loss": 16.4544, + "step": 444230 + }, + { + "epoch": 0.8973929063458268, + "grad_norm": 204.04043579101562, + "learning_rate": 3.740838745313974e-07, + "loss": 12.0667, + "step": 444240 + }, + { + "epoch": 0.8974131069785106, + "grad_norm": 166.67311096191406, + "learning_rate": 3.739514080710899e-07, + "loss": 11.7908, + "step": 444250 + }, + { + "epoch": 0.8974333076111943, + "grad_norm": 0.0, + "learning_rate": 3.738189641575818e-07, + "loss": 19.6684, + "step": 444260 + }, + { + "epoch": 0.8974535082438782, + "grad_norm": 305.39263916015625, + "learning_rate": 3.7368654279151985e-07, + "loss": 45.9074, + "step": 444270 + }, + { + "epoch": 0.897473708876562, + "grad_norm": 145.71751403808594, + "learning_rate": 3.7355414397354796e-07, + "loss": 10.9525, + "step": 444280 + }, + { + "epoch": 0.8974939095092458, + "grad_norm": 221.46197509765625, + "learning_rate": 3.7342176770431284e-07, + "loss": 15.7742, + "step": 444290 + }, + { + "epoch": 0.8975141101419296, + "grad_norm": 247.08724975585938, + "learning_rate": 3.732894139844578e-07, + "loss": 36.7726, + "step": 444300 + }, + { + "epoch": 0.8975343107746134, + "grad_norm": 141.1046905517578, + "learning_rate": 3.731570828146297e-07, + "loss": 17.5182, + "step": 444310 + }, + { + "epoch": 0.8975545114072973, + "grad_norm": 26.958621978759766, + "learning_rate": 3.730247741954729e-07, + "loss": 15.3155, + "step": 444320 + }, + { + "epoch": 0.8975747120399811, + "grad_norm": 357.8409423828125, + "learning_rate": 3.7289248812763137e-07, + "loss": 9.8675, + "step": 444330 + }, + { + "epoch": 0.8975949126726649, + "grad_norm": 534.7803344726562, + "learning_rate": 3.727602246117518e-07, + "loss": 20.979, + "step": 444340 + }, + { + "epoch": 0.8976151133053487, + "grad_norm": 622.3795776367188, + "learning_rate": 3.7262798364847753e-07, + "loss": 26.6488, + "step": 444350 + }, + { + "epoch": 0.8976353139380325, + "grad_norm": 172.6232147216797, + "learning_rate": 3.72495765238452e-07, + "loss": 15.5163, + "step": 444360 + }, + { + "epoch": 0.8976555145707164, + "grad_norm": 271.0767517089844, + "learning_rate": 3.723635693823213e-07, + "loss": 19.8816, + "step": 444370 + }, + { + "epoch": 0.8976757152034002, + "grad_norm": 241.2416229248047, + "learning_rate": 3.7223139608073e-07, + "loss": 18.3012, + "step": 444380 + }, + { + "epoch": 0.897695915836084, + "grad_norm": 317.7682189941406, + "learning_rate": 3.720992453343214e-07, + "loss": 20.9782, + "step": 444390 + }, + { + "epoch": 0.8977161164687678, + "grad_norm": 77.09332275390625, + "learning_rate": 3.7196711714373947e-07, + "loss": 11.7378, + "step": 444400 + }, + { + "epoch": 0.8977363171014516, + "grad_norm": 137.31317138671875, + "learning_rate": 3.7183501150962863e-07, + "loss": 13.5225, + "step": 444410 + }, + { + "epoch": 0.8977565177341355, + "grad_norm": 300.7694091796875, + "learning_rate": 3.7170292843263347e-07, + "loss": 22.7928, + "step": 444420 + }, + { + "epoch": 0.8977767183668193, + "grad_norm": 207.14952087402344, + "learning_rate": 3.715708679133956e-07, + "loss": 9.4938, + "step": 444430 + }, + { + "epoch": 0.8977969189995031, + "grad_norm": 202.46482849121094, + "learning_rate": 3.714388299525595e-07, + "loss": 11.2566, + "step": 444440 + }, + { + "epoch": 0.8978171196321869, + "grad_norm": 234.36447143554688, + "learning_rate": 3.713068145507709e-07, + "loss": 11.378, + "step": 444450 + }, + { + "epoch": 0.8978373202648707, + "grad_norm": 28.78504753112793, + "learning_rate": 3.7117482170867083e-07, + "loss": 12.4225, + "step": 444460 + }, + { + "epoch": 0.8978575208975546, + "grad_norm": 402.2554016113281, + "learning_rate": 3.710428514269027e-07, + "loss": 12.8743, + "step": 444470 + }, + { + "epoch": 0.8978777215302384, + "grad_norm": 594.9292602539062, + "learning_rate": 3.7091090370611093e-07, + "loss": 21.2218, + "step": 444480 + }, + { + "epoch": 0.8978979221629222, + "grad_norm": 208.42092895507812, + "learning_rate": 3.707789785469379e-07, + "loss": 16.3208, + "step": 444490 + }, + { + "epoch": 0.897918122795606, + "grad_norm": 215.97412109375, + "learning_rate": 3.7064707595002636e-07, + "loss": 19.2725, + "step": 444500 + }, + { + "epoch": 0.8979383234282897, + "grad_norm": 59.36453628540039, + "learning_rate": 3.705151959160197e-07, + "loss": 18.0953, + "step": 444510 + }, + { + "epoch": 0.8979585240609735, + "grad_norm": 330.7110900878906, + "learning_rate": 3.703833384455602e-07, + "loss": 19.0474, + "step": 444520 + }, + { + "epoch": 0.8979787246936574, + "grad_norm": 249.13511657714844, + "learning_rate": 3.702515035392912e-07, + "loss": 16.3551, + "step": 444530 + }, + { + "epoch": 0.8979989253263412, + "grad_norm": 0.0, + "learning_rate": 3.7011969119785496e-07, + "loss": 4.1604, + "step": 444540 + }, + { + "epoch": 0.898019125959025, + "grad_norm": 363.7060241699219, + "learning_rate": 3.6998790142189324e-07, + "loss": 12.8501, + "step": 444550 + }, + { + "epoch": 0.8980393265917088, + "grad_norm": 498.28857421875, + "learning_rate": 3.698561342120499e-07, + "loss": 18.1249, + "step": 444560 + }, + { + "epoch": 0.8980595272243926, + "grad_norm": 136.7294921875, + "learning_rate": 3.6972438956896563e-07, + "loss": 10.7324, + "step": 444570 + }, + { + "epoch": 0.8980797278570765, + "grad_norm": 400.9918518066406, + "learning_rate": 3.695926674932826e-07, + "loss": 34.5535, + "step": 444580 + }, + { + "epoch": 0.8980999284897603, + "grad_norm": 366.4157409667969, + "learning_rate": 3.694609679856431e-07, + "loss": 27.4047, + "step": 444590 + }, + { + "epoch": 0.8981201291224441, + "grad_norm": 477.45355224609375, + "learning_rate": 3.693292910466906e-07, + "loss": 21.5866, + "step": 444600 + }, + { + "epoch": 0.8981403297551279, + "grad_norm": 244.50428771972656, + "learning_rate": 3.69197636677065e-07, + "loss": 13.6325, + "step": 444610 + }, + { + "epoch": 0.8981605303878117, + "grad_norm": 117.06970977783203, + "learning_rate": 3.690660048774075e-07, + "loss": 12.3494, + "step": 444620 + }, + { + "epoch": 0.8981807310204956, + "grad_norm": 240.0150909423828, + "learning_rate": 3.6893439564836155e-07, + "loss": 8.5246, + "step": 444630 + }, + { + "epoch": 0.8982009316531794, + "grad_norm": 43.846248626708984, + "learning_rate": 3.688028089905682e-07, + "loss": 16.1283, + "step": 444640 + }, + { + "epoch": 0.8982211322858632, + "grad_norm": 75.04704284667969, + "learning_rate": 3.6867124490466697e-07, + "loss": 15.8183, + "step": 444650 + }, + { + "epoch": 0.898241332918547, + "grad_norm": 403.3542785644531, + "learning_rate": 3.685397033913002e-07, + "loss": 15.3458, + "step": 444660 + }, + { + "epoch": 0.8982615335512308, + "grad_norm": 298.10205078125, + "learning_rate": 3.6840818445111114e-07, + "loss": 17.4278, + "step": 444670 + }, + { + "epoch": 0.8982817341839147, + "grad_norm": 418.43408203125, + "learning_rate": 3.6827668808473714e-07, + "loss": 10.1699, + "step": 444680 + }, + { + "epoch": 0.8983019348165985, + "grad_norm": 406.7113037109375, + "learning_rate": 3.68145214292821e-07, + "loss": 15.6826, + "step": 444690 + }, + { + "epoch": 0.8983221354492823, + "grad_norm": 773.9559936523438, + "learning_rate": 3.680137630760039e-07, + "loss": 26.0593, + "step": 444700 + }, + { + "epoch": 0.8983423360819661, + "grad_norm": 348.9058837890625, + "learning_rate": 3.6788233443492583e-07, + "loss": 21.1771, + "step": 444710 + }, + { + "epoch": 0.8983625367146499, + "grad_norm": 135.27320861816406, + "learning_rate": 3.6775092837022685e-07, + "loss": 17.6119, + "step": 444720 + }, + { + "epoch": 0.8983827373473338, + "grad_norm": 143.0601806640625, + "learning_rate": 3.676195448825487e-07, + "loss": 14.7294, + "step": 444730 + }, + { + "epoch": 0.8984029379800176, + "grad_norm": 418.4273681640625, + "learning_rate": 3.674881839725314e-07, + "loss": 25.7207, + "step": 444740 + }, + { + "epoch": 0.8984231386127014, + "grad_norm": 343.03704833984375, + "learning_rate": 3.6735684564081385e-07, + "loss": 18.8208, + "step": 444750 + }, + { + "epoch": 0.8984433392453852, + "grad_norm": 83.89474487304688, + "learning_rate": 3.672255298880367e-07, + "loss": 32.372, + "step": 444760 + }, + { + "epoch": 0.8984635398780689, + "grad_norm": 209.2916259765625, + "learning_rate": 3.670942367148417e-07, + "loss": 21.1825, + "step": 444770 + }, + { + "epoch": 0.8984837405107527, + "grad_norm": 431.716796875, + "learning_rate": 3.669629661218671e-07, + "loss": 25.6026, + "step": 444780 + }, + { + "epoch": 0.8985039411434366, + "grad_norm": 30.953857421875, + "learning_rate": 3.66831718109753e-07, + "loss": 14.6427, + "step": 444790 + }, + { + "epoch": 0.8985241417761204, + "grad_norm": 259.8677673339844, + "learning_rate": 3.6670049267913954e-07, + "loss": 15.1308, + "step": 444800 + }, + { + "epoch": 0.8985443424088042, + "grad_norm": 243.4873809814453, + "learning_rate": 3.665692898306655e-07, + "loss": 18.857, + "step": 444810 + }, + { + "epoch": 0.898564543041488, + "grad_norm": 164.88819885253906, + "learning_rate": 3.664381095649705e-07, + "loss": 12.5417, + "step": 444820 + }, + { + "epoch": 0.8985847436741718, + "grad_norm": 18.60142707824707, + "learning_rate": 3.6630695188269505e-07, + "loss": 10.9017, + "step": 444830 + }, + { + "epoch": 0.8986049443068557, + "grad_norm": 168.12863159179688, + "learning_rate": 3.6617581678447647e-07, + "loss": 15.9924, + "step": 444840 + }, + { + "epoch": 0.8986251449395395, + "grad_norm": 36.90435791015625, + "learning_rate": 3.6604470427095587e-07, + "loss": 8.4565, + "step": 444850 + }, + { + "epoch": 0.8986453455722233, + "grad_norm": 238.035888671875, + "learning_rate": 3.6591361434277105e-07, + "loss": 16.2636, + "step": 444860 + }, + { + "epoch": 0.8986655462049071, + "grad_norm": 193.6130828857422, + "learning_rate": 3.6578254700056107e-07, + "loss": 8.3253, + "step": 444870 + }, + { + "epoch": 0.898685746837591, + "grad_norm": 323.912353515625, + "learning_rate": 3.6565150224496525e-07, + "loss": 7.1689, + "step": 444880 + }, + { + "epoch": 0.8987059474702748, + "grad_norm": 150.40097045898438, + "learning_rate": 3.65520480076621e-07, + "loss": 12.8619, + "step": 444890 + }, + { + "epoch": 0.8987261481029586, + "grad_norm": 278.6393127441406, + "learning_rate": 3.6538948049616886e-07, + "loss": 12.2635, + "step": 444900 + }, + { + "epoch": 0.8987463487356424, + "grad_norm": 319.26910400390625, + "learning_rate": 3.6525850350424554e-07, + "loss": 8.0543, + "step": 444910 + }, + { + "epoch": 0.8987665493683262, + "grad_norm": 612.7775268554688, + "learning_rate": 3.651275491014905e-07, + "loss": 27.059, + "step": 444920 + }, + { + "epoch": 0.89878675000101, + "grad_norm": 155.68157958984375, + "learning_rate": 3.649966172885422e-07, + "loss": 18.8907, + "step": 444930 + }, + { + "epoch": 0.8988069506336939, + "grad_norm": 174.67977905273438, + "learning_rate": 3.648657080660373e-07, + "loss": 15.0618, + "step": 444940 + }, + { + "epoch": 0.8988271512663777, + "grad_norm": 232.21981811523438, + "learning_rate": 3.6473482143461523e-07, + "loss": 13.615, + "step": 444950 + }, + { + "epoch": 0.8988473518990615, + "grad_norm": 184.6064453125, + "learning_rate": 3.6460395739491337e-07, + "loss": 7.788, + "step": 444960 + }, + { + "epoch": 0.8988675525317453, + "grad_norm": 239.39715576171875, + "learning_rate": 3.644731159475695e-07, + "loss": 13.8403, + "step": 444970 + }, + { + "epoch": 0.8988877531644291, + "grad_norm": 0.0, + "learning_rate": 3.643422970932209e-07, + "loss": 20.4715, + "step": 444980 + }, + { + "epoch": 0.898907953797113, + "grad_norm": 177.03160095214844, + "learning_rate": 3.6421150083250754e-07, + "loss": 11.254, + "step": 444990 + }, + { + "epoch": 0.8989281544297968, + "grad_norm": 0.0, + "learning_rate": 3.6408072716606346e-07, + "loss": 21.7134, + "step": 445000 + }, + { + "epoch": 0.8989483550624806, + "grad_norm": 640.3748779296875, + "learning_rate": 3.6394997609452755e-07, + "loss": 16.0379, + "step": 445010 + }, + { + "epoch": 0.8989685556951643, + "grad_norm": 279.4459228515625, + "learning_rate": 3.6381924761853814e-07, + "loss": 12.6227, + "step": 445020 + }, + { + "epoch": 0.8989887563278481, + "grad_norm": 451.8597412109375, + "learning_rate": 3.6368854173873094e-07, + "loss": 20.9938, + "step": 445030 + }, + { + "epoch": 0.899008956960532, + "grad_norm": 209.34207153320312, + "learning_rate": 3.635578584557431e-07, + "loss": 10.6094, + "step": 445040 + }, + { + "epoch": 0.8990291575932158, + "grad_norm": 352.37432861328125, + "learning_rate": 3.6342719777021194e-07, + "loss": 23.8625, + "step": 445050 + }, + { + "epoch": 0.8990493582258996, + "grad_norm": 145.17018127441406, + "learning_rate": 3.6329655968277477e-07, + "loss": 17.6126, + "step": 445060 + }, + { + "epoch": 0.8990695588585834, + "grad_norm": 27.934141159057617, + "learning_rate": 3.6316594419406826e-07, + "loss": 13.5671, + "step": 445070 + }, + { + "epoch": 0.8990897594912672, + "grad_norm": 271.7455139160156, + "learning_rate": 3.6303535130472743e-07, + "loss": 22.3715, + "step": 445080 + }, + { + "epoch": 0.8991099601239511, + "grad_norm": 201.5966033935547, + "learning_rate": 3.6290478101539073e-07, + "loss": 11.2014, + "step": 445090 + }, + { + "epoch": 0.8991301607566349, + "grad_norm": 255.765869140625, + "learning_rate": 3.627742333266937e-07, + "loss": 12.2225, + "step": 445100 + }, + { + "epoch": 0.8991503613893187, + "grad_norm": 55.76103591918945, + "learning_rate": 3.6264370823927196e-07, + "loss": 17.4078, + "step": 445110 + }, + { + "epoch": 0.8991705620220025, + "grad_norm": 205.4469451904297, + "learning_rate": 3.6251320575376336e-07, + "loss": 23.2812, + "step": 445120 + }, + { + "epoch": 0.8991907626546863, + "grad_norm": 197.58169555664062, + "learning_rate": 3.6238272587080183e-07, + "loss": 16.2706, + "step": 445130 + }, + { + "epoch": 0.8992109632873702, + "grad_norm": 220.38482666015625, + "learning_rate": 3.6225226859102515e-07, + "loss": 9.8516, + "step": 445140 + }, + { + "epoch": 0.899231163920054, + "grad_norm": 474.5306091308594, + "learning_rate": 3.621218339150684e-07, + "loss": 14.372, + "step": 445150 + }, + { + "epoch": 0.8992513645527378, + "grad_norm": 487.86395263671875, + "learning_rate": 3.619914218435666e-07, + "loss": 17.0953, + "step": 445160 + }, + { + "epoch": 0.8992715651854216, + "grad_norm": 337.3041687011719, + "learning_rate": 3.6186103237715706e-07, + "loss": 12.5712, + "step": 445170 + }, + { + "epoch": 0.8992917658181054, + "grad_norm": 514.4605102539062, + "learning_rate": 3.617306655164743e-07, + "loss": 36.5964, + "step": 445180 + }, + { + "epoch": 0.8993119664507893, + "grad_norm": 373.0463562011719, + "learning_rate": 3.6160032126215274e-07, + "loss": 17.169, + "step": 445190 + }, + { + "epoch": 0.8993321670834731, + "grad_norm": 625.6041259765625, + "learning_rate": 3.614699996148285e-07, + "loss": 24.1392, + "step": 445200 + }, + { + "epoch": 0.8993523677161569, + "grad_norm": 210.83209228515625, + "learning_rate": 3.613397005751379e-07, + "loss": 24.6616, + "step": 445210 + }, + { + "epoch": 0.8993725683488407, + "grad_norm": 311.2628479003906, + "learning_rate": 3.612094241437153e-07, + "loss": 19.9175, + "step": 445220 + }, + { + "epoch": 0.8993927689815245, + "grad_norm": 179.17237854003906, + "learning_rate": 3.610791703211941e-07, + "loss": 14.6733, + "step": 445230 + }, + { + "epoch": 0.8994129696142084, + "grad_norm": 308.4068298339844, + "learning_rate": 3.6094893910821103e-07, + "loss": 20.6028, + "step": 445240 + }, + { + "epoch": 0.8994331702468922, + "grad_norm": 263.1321105957031, + "learning_rate": 3.608187305054006e-07, + "loss": 12.3121, + "step": 445250 + }, + { + "epoch": 0.899453370879576, + "grad_norm": 400.6637878417969, + "learning_rate": 3.606885445133962e-07, + "loss": 11.6952, + "step": 445260 + }, + { + "epoch": 0.8994735715122598, + "grad_norm": 203.16830444335938, + "learning_rate": 3.605583811328328e-07, + "loss": 21.5273, + "step": 445270 + }, + { + "epoch": 0.8994937721449435, + "grad_norm": 341.5491638183594, + "learning_rate": 3.604282403643472e-07, + "loss": 21.8069, + "step": 445280 + }, + { + "epoch": 0.8995139727776273, + "grad_norm": 515.5481567382812, + "learning_rate": 3.6029812220857e-07, + "loss": 18.6255, + "step": 445290 + }, + { + "epoch": 0.8995341734103112, + "grad_norm": 54.038150787353516, + "learning_rate": 3.601680266661367e-07, + "loss": 16.0552, + "step": 445300 + }, + { + "epoch": 0.899554374042995, + "grad_norm": 574.565185546875, + "learning_rate": 3.6003795373768303e-07, + "loss": 21.3307, + "step": 445310 + }, + { + "epoch": 0.8995745746756788, + "grad_norm": 7.462080955505371, + "learning_rate": 3.5990790342384117e-07, + "loss": 18.7261, + "step": 445320 + }, + { + "epoch": 0.8995947753083626, + "grad_norm": 373.57940673828125, + "learning_rate": 3.5977787572524457e-07, + "loss": 15.4283, + "step": 445330 + }, + { + "epoch": 0.8996149759410464, + "grad_norm": 27.012331008911133, + "learning_rate": 3.596478706425277e-07, + "loss": 16.4097, + "step": 445340 + }, + { + "epoch": 0.8996351765737303, + "grad_norm": 399.6962585449219, + "learning_rate": 3.5951788817632615e-07, + "loss": 14.6317, + "step": 445350 + }, + { + "epoch": 0.8996553772064141, + "grad_norm": 307.5194396972656, + "learning_rate": 3.5938792832726996e-07, + "loss": 24.4639, + "step": 445360 + }, + { + "epoch": 0.8996755778390979, + "grad_norm": 182.70657348632812, + "learning_rate": 3.5925799109599426e-07, + "loss": 16.0825, + "step": 445370 + }, + { + "epoch": 0.8996957784717817, + "grad_norm": 243.04278564453125, + "learning_rate": 3.5912807648313285e-07, + "loss": 14.5699, + "step": 445380 + }, + { + "epoch": 0.8997159791044655, + "grad_norm": 233.26922607421875, + "learning_rate": 3.5899818448931865e-07, + "loss": 20.6728, + "step": 445390 + }, + { + "epoch": 0.8997361797371494, + "grad_norm": 230.99720764160156, + "learning_rate": 3.5886831511518336e-07, + "loss": 19.7656, + "step": 445400 + }, + { + "epoch": 0.8997563803698332, + "grad_norm": 158.81932067871094, + "learning_rate": 3.5873846836136204e-07, + "loss": 22.3781, + "step": 445410 + }, + { + "epoch": 0.899776581002517, + "grad_norm": 0.0, + "learning_rate": 3.586086442284864e-07, + "loss": 21.998, + "step": 445420 + }, + { + "epoch": 0.8997967816352008, + "grad_norm": 236.00514221191406, + "learning_rate": 3.5847884271718814e-07, + "loss": 20.4317, + "step": 445430 + }, + { + "epoch": 0.8998169822678846, + "grad_norm": 201.31605529785156, + "learning_rate": 3.583490638281023e-07, + "loss": 25.3305, + "step": 445440 + }, + { + "epoch": 0.8998371829005685, + "grad_norm": 209.36764526367188, + "learning_rate": 3.5821930756185894e-07, + "loss": 16.4358, + "step": 445450 + }, + { + "epoch": 0.8998573835332523, + "grad_norm": 239.61834716796875, + "learning_rate": 3.5808957391909315e-07, + "loss": 13.0421, + "step": 445460 + }, + { + "epoch": 0.8998775841659361, + "grad_norm": 550.1686401367188, + "learning_rate": 3.579598629004355e-07, + "loss": 15.5555, + "step": 445470 + }, + { + "epoch": 0.8998977847986199, + "grad_norm": 149.8254852294922, + "learning_rate": 3.5783017450651714e-07, + "loss": 11.5235, + "step": 445480 + }, + { + "epoch": 0.8999179854313037, + "grad_norm": 457.91363525390625, + "learning_rate": 3.5770050873797314e-07, + "loss": 18.5262, + "step": 445490 + }, + { + "epoch": 0.8999381860639876, + "grad_norm": 114.06350708007812, + "learning_rate": 3.575708655954324e-07, + "loss": 9.1562, + "step": 445500 + }, + { + "epoch": 0.8999583866966714, + "grad_norm": 207.97767639160156, + "learning_rate": 3.5744124507952895e-07, + "loss": 15.5924, + "step": 445510 + }, + { + "epoch": 0.8999785873293552, + "grad_norm": 83.53045654296875, + "learning_rate": 3.573116471908933e-07, + "loss": 24.5435, + "step": 445520 + }, + { + "epoch": 0.899998787962039, + "grad_norm": 364.34765625, + "learning_rate": 3.571820719301583e-07, + "loss": 16.694, + "step": 445530 + }, + { + "epoch": 0.9000189885947227, + "grad_norm": 270.1733093261719, + "learning_rate": 3.570525192979546e-07, + "loss": 11.4549, + "step": 445540 + }, + { + "epoch": 0.9000391892274066, + "grad_norm": 417.6627197265625, + "learning_rate": 3.569229892949133e-07, + "loss": 21.2883, + "step": 445550 + }, + { + "epoch": 0.9000593898600904, + "grad_norm": 310.5437316894531, + "learning_rate": 3.5679348192166675e-07, + "loss": 8.723, + "step": 445560 + }, + { + "epoch": 0.9000795904927742, + "grad_norm": 0.0, + "learning_rate": 3.5666399717884604e-07, + "loss": 13.0665, + "step": 445570 + }, + { + "epoch": 0.900099791125458, + "grad_norm": 282.0537414550781, + "learning_rate": 3.565345350670807e-07, + "loss": 12.7627, + "step": 445580 + }, + { + "epoch": 0.9001199917581418, + "grad_norm": 365.40948486328125, + "learning_rate": 3.56405095587003e-07, + "loss": 9.9332, + "step": 445590 + }, + { + "epoch": 0.9001401923908257, + "grad_norm": 101.62939453125, + "learning_rate": 3.562756787392452e-07, + "loss": 21.3777, + "step": 445600 + }, + { + "epoch": 0.9001603930235095, + "grad_norm": 287.9040832519531, + "learning_rate": 3.561462845244351e-07, + "loss": 14.5309, + "step": 445610 + }, + { + "epoch": 0.9001805936561933, + "grad_norm": 279.9335021972656, + "learning_rate": 3.560169129432045e-07, + "loss": 24.3167, + "step": 445620 + }, + { + "epoch": 0.9002007942888771, + "grad_norm": 410.9738464355469, + "learning_rate": 3.5588756399618507e-07, + "loss": 10.6661, + "step": 445630 + }, + { + "epoch": 0.9002209949215609, + "grad_norm": 157.7344970703125, + "learning_rate": 3.557582376840063e-07, + "loss": 12.3605, + "step": 445640 + }, + { + "epoch": 0.9002411955542448, + "grad_norm": 285.1471252441406, + "learning_rate": 3.556289340072977e-07, + "loss": 9.8739, + "step": 445650 + }, + { + "epoch": 0.9002613961869286, + "grad_norm": 268.43438720703125, + "learning_rate": 3.55499652966691e-07, + "loss": 16.0661, + "step": 445660 + }, + { + "epoch": 0.9002815968196124, + "grad_norm": 237.16184997558594, + "learning_rate": 3.5537039456281674e-07, + "loss": 13.6825, + "step": 445670 + }, + { + "epoch": 0.9003017974522962, + "grad_norm": 111.08230590820312, + "learning_rate": 3.5524115879630225e-07, + "loss": 7.6335, + "step": 445680 + }, + { + "epoch": 0.90032199808498, + "grad_norm": 381.768798828125, + "learning_rate": 3.551119456677793e-07, + "loss": 14.7367, + "step": 445690 + }, + { + "epoch": 0.9003421987176639, + "grad_norm": 246.447265625, + "learning_rate": 3.5498275517787783e-07, + "loss": 31.8115, + "step": 445700 + }, + { + "epoch": 0.9003623993503477, + "grad_norm": 296.7474060058594, + "learning_rate": 3.5485358732722743e-07, + "loss": 17.3367, + "step": 445710 + }, + { + "epoch": 0.9003825999830315, + "grad_norm": 454.2543029785156, + "learning_rate": 3.547244421164564e-07, + "loss": 20.8422, + "step": 445720 + }, + { + "epoch": 0.9004028006157153, + "grad_norm": 333.75164794921875, + "learning_rate": 3.545953195461954e-07, + "loss": 21.1452, + "step": 445730 + }, + { + "epoch": 0.9004230012483991, + "grad_norm": 204.9512939453125, + "learning_rate": 3.5446621961707284e-07, + "loss": 14.492, + "step": 445740 + }, + { + "epoch": 0.900443201881083, + "grad_norm": 378.46734619140625, + "learning_rate": 3.5433714232971927e-07, + "loss": 24.8063, + "step": 445750 + }, + { + "epoch": 0.9004634025137668, + "grad_norm": 273.31439208984375, + "learning_rate": 3.5420808768476313e-07, + "loss": 10.6374, + "step": 445760 + }, + { + "epoch": 0.9004836031464506, + "grad_norm": 560.32763671875, + "learning_rate": 3.540790556828327e-07, + "loss": 15.3381, + "step": 445770 + }, + { + "epoch": 0.9005038037791344, + "grad_norm": 168.69284057617188, + "learning_rate": 3.539500463245582e-07, + "loss": 44.2349, + "step": 445780 + }, + { + "epoch": 0.9005240044118181, + "grad_norm": 246.83082580566406, + "learning_rate": 3.5382105961056735e-07, + "loss": 25.3657, + "step": 445790 + }, + { + "epoch": 0.9005442050445019, + "grad_norm": 165.73048400878906, + "learning_rate": 3.5369209554148854e-07, + "loss": 11.9341, + "step": 445800 + }, + { + "epoch": 0.9005644056771858, + "grad_norm": 169.6850128173828, + "learning_rate": 3.535631541179507e-07, + "loss": 13.8381, + "step": 445810 + }, + { + "epoch": 0.9005846063098696, + "grad_norm": 7.769381523132324, + "learning_rate": 3.534342353405834e-07, + "loss": 12.1496, + "step": 445820 + }, + { + "epoch": 0.9006048069425534, + "grad_norm": 248.3420867919922, + "learning_rate": 3.533053392100144e-07, + "loss": 24.7979, + "step": 445830 + }, + { + "epoch": 0.9006250075752372, + "grad_norm": 357.61468505859375, + "learning_rate": 3.531764657268705e-07, + "loss": 13.5486, + "step": 445840 + }, + { + "epoch": 0.900645208207921, + "grad_norm": 0.0, + "learning_rate": 3.530476148917816e-07, + "loss": 17.1647, + "step": 445850 + }, + { + "epoch": 0.9006654088406049, + "grad_norm": 455.8564147949219, + "learning_rate": 3.5291878670537516e-07, + "loss": 21.4658, + "step": 445860 + }, + { + "epoch": 0.9006856094732887, + "grad_norm": 261.73760986328125, + "learning_rate": 3.5278998116827835e-07, + "loss": 23.2449, + "step": 445870 + }, + { + "epoch": 0.9007058101059725, + "grad_norm": 208.8277130126953, + "learning_rate": 3.5266119828111953e-07, + "loss": 20.0035, + "step": 445880 + }, + { + "epoch": 0.9007260107386563, + "grad_norm": 399.3625183105469, + "learning_rate": 3.525324380445277e-07, + "loss": 14.0625, + "step": 445890 + }, + { + "epoch": 0.9007462113713401, + "grad_norm": 318.3999328613281, + "learning_rate": 3.524037004591274e-07, + "loss": 16.3384, + "step": 445900 + }, + { + "epoch": 0.900766412004024, + "grad_norm": 90.35254669189453, + "learning_rate": 3.5227498552554805e-07, + "loss": 18.7766, + "step": 445910 + }, + { + "epoch": 0.9007866126367078, + "grad_norm": 308.0154113769531, + "learning_rate": 3.5214629324441754e-07, + "loss": 32.7715, + "step": 445920 + }, + { + "epoch": 0.9008068132693916, + "grad_norm": 546.2141723632812, + "learning_rate": 3.5201762361636195e-07, + "loss": 12.3502, + "step": 445930 + }, + { + "epoch": 0.9008270139020754, + "grad_norm": 356.170654296875, + "learning_rate": 3.5188897664200804e-07, + "loss": 17.1992, + "step": 445940 + }, + { + "epoch": 0.9008472145347592, + "grad_norm": 272.68035888671875, + "learning_rate": 3.5176035232198367e-07, + "loss": 18.716, + "step": 445950 + }, + { + "epoch": 0.9008674151674431, + "grad_norm": 448.6239013671875, + "learning_rate": 3.516317506569172e-07, + "loss": 23.2998, + "step": 445960 + }, + { + "epoch": 0.9008876158001269, + "grad_norm": 247.167724609375, + "learning_rate": 3.515031716474321e-07, + "loss": 17.9454, + "step": 445970 + }, + { + "epoch": 0.9009078164328107, + "grad_norm": 407.5674743652344, + "learning_rate": 3.513746152941572e-07, + "loss": 14.5001, + "step": 445980 + }, + { + "epoch": 0.9009280170654945, + "grad_norm": 327.32696533203125, + "learning_rate": 3.5124608159771864e-07, + "loss": 15.3075, + "step": 445990 + }, + { + "epoch": 0.9009482176981783, + "grad_norm": 184.2942352294922, + "learning_rate": 3.511175705587433e-07, + "loss": 20.2835, + "step": 446000 + }, + { + "epoch": 0.9009684183308622, + "grad_norm": 489.49395751953125, + "learning_rate": 3.509890821778561e-07, + "loss": 17.2991, + "step": 446010 + }, + { + "epoch": 0.900988618963546, + "grad_norm": 1080.24169921875, + "learning_rate": 3.508606164556855e-07, + "loss": 21.9367, + "step": 446020 + }, + { + "epoch": 0.9010088195962298, + "grad_norm": 280.5447082519531, + "learning_rate": 3.507321733928559e-07, + "loss": 12.6716, + "step": 446030 + }, + { + "epoch": 0.9010290202289136, + "grad_norm": 233.6311798095703, + "learning_rate": 3.5060375298999303e-07, + "loss": 24.0079, + "step": 446040 + }, + { + "epoch": 0.9010492208615973, + "grad_norm": 321.3100891113281, + "learning_rate": 3.5047535524772467e-07, + "loss": 25.8466, + "step": 446050 + }, + { + "epoch": 0.9010694214942812, + "grad_norm": 210.2325897216797, + "learning_rate": 3.5034698016667423e-07, + "loss": 12.6793, + "step": 446060 + }, + { + "epoch": 0.901089622126965, + "grad_norm": 234.207275390625, + "learning_rate": 3.5021862774747007e-07, + "loss": 9.8604, + "step": 446070 + }, + { + "epoch": 0.9011098227596488, + "grad_norm": 80.25869750976562, + "learning_rate": 3.500902979907356e-07, + "loss": 15.5287, + "step": 446080 + }, + { + "epoch": 0.9011300233923326, + "grad_norm": 631.1233520507812, + "learning_rate": 3.4996199089709695e-07, + "loss": 14.0402, + "step": 446090 + }, + { + "epoch": 0.9011502240250164, + "grad_norm": 324.9217529296875, + "learning_rate": 3.498337064671803e-07, + "loss": 20.8748, + "step": 446100 + }, + { + "epoch": 0.9011704246577003, + "grad_norm": 180.3143310546875, + "learning_rate": 3.4970544470160905e-07, + "loss": 12.7918, + "step": 446110 + }, + { + "epoch": 0.9011906252903841, + "grad_norm": 171.03756713867188, + "learning_rate": 3.495772056010105e-07, + "loss": 21.1228, + "step": 446120 + }, + { + "epoch": 0.9012108259230679, + "grad_norm": 276.5911865234375, + "learning_rate": 3.4944898916600743e-07, + "loss": 25.7992, + "step": 446130 + }, + { + "epoch": 0.9012310265557517, + "grad_norm": 402.3453369140625, + "learning_rate": 3.493207953972272e-07, + "loss": 13.0384, + "step": 446140 + }, + { + "epoch": 0.9012512271884355, + "grad_norm": 578.5548706054688, + "learning_rate": 3.491926242952931e-07, + "loss": 20.0322, + "step": 446150 + }, + { + "epoch": 0.9012714278211194, + "grad_norm": 396.3736877441406, + "learning_rate": 3.4906447586082917e-07, + "loss": 27.0589, + "step": 446160 + }, + { + "epoch": 0.9012916284538032, + "grad_norm": 145.3114013671875, + "learning_rate": 3.48936350094462e-07, + "loss": 9.8804, + "step": 446170 + }, + { + "epoch": 0.901311829086487, + "grad_norm": 717.904296875, + "learning_rate": 3.488082469968146e-07, + "loss": 25.9545, + "step": 446180 + }, + { + "epoch": 0.9013320297191708, + "grad_norm": 750.8253784179688, + "learning_rate": 3.4868016656851135e-07, + "loss": 20.0636, + "step": 446190 + }, + { + "epoch": 0.9013522303518546, + "grad_norm": 168.28651428222656, + "learning_rate": 3.4855210881017675e-07, + "loss": 17.5162, + "step": 446200 + }, + { + "epoch": 0.9013724309845385, + "grad_norm": 342.64117431640625, + "learning_rate": 3.4842407372243646e-07, + "loss": 31.7302, + "step": 446210 + }, + { + "epoch": 0.9013926316172223, + "grad_norm": 90.33460235595703, + "learning_rate": 3.482960613059111e-07, + "loss": 16.0992, + "step": 446220 + }, + { + "epoch": 0.9014128322499061, + "grad_norm": 227.67636108398438, + "learning_rate": 3.481680715612273e-07, + "loss": 10.1211, + "step": 446230 + }, + { + "epoch": 0.9014330328825899, + "grad_norm": 468.148681640625, + "learning_rate": 3.480401044890086e-07, + "loss": 11.4297, + "step": 446240 + }, + { + "epoch": 0.9014532335152737, + "grad_norm": 254.1317901611328, + "learning_rate": 3.479121600898777e-07, + "loss": 24.1494, + "step": 446250 + }, + { + "epoch": 0.9014734341479576, + "grad_norm": 399.9097900390625, + "learning_rate": 3.477842383644586e-07, + "loss": 14.8352, + "step": 446260 + }, + { + "epoch": 0.9014936347806414, + "grad_norm": 293.8617248535156, + "learning_rate": 3.476563393133747e-07, + "loss": 18.3029, + "step": 446270 + }, + { + "epoch": 0.9015138354133252, + "grad_norm": 208.68783569335938, + "learning_rate": 3.475284629372511e-07, + "loss": 23.6214, + "step": 446280 + }, + { + "epoch": 0.901534036046009, + "grad_norm": 374.38604736328125, + "learning_rate": 3.474006092367077e-07, + "loss": 12.5321, + "step": 446290 + }, + { + "epoch": 0.9015542366786927, + "grad_norm": 14.303327560424805, + "learning_rate": 3.472727782123697e-07, + "loss": 5.7181, + "step": 446300 + }, + { + "epoch": 0.9015744373113765, + "grad_norm": 281.0022277832031, + "learning_rate": 3.4714496986486045e-07, + "loss": 15.1799, + "step": 446310 + }, + { + "epoch": 0.9015946379440604, + "grad_norm": 320.8405456542969, + "learning_rate": 3.470171841948022e-07, + "loss": 16.6386, + "step": 446320 + }, + { + "epoch": 0.9016148385767442, + "grad_norm": 221.64418029785156, + "learning_rate": 3.468894212028173e-07, + "loss": 25.7132, + "step": 446330 + }, + { + "epoch": 0.901635039209428, + "grad_norm": 297.1494140625, + "learning_rate": 3.467616808895302e-07, + "loss": 15.8963, + "step": 446340 + }, + { + "epoch": 0.9016552398421118, + "grad_norm": 207.5110626220703, + "learning_rate": 3.4663396325556154e-07, + "loss": 7.7352, + "step": 446350 + }, + { + "epoch": 0.9016754404747956, + "grad_norm": 148.60226440429688, + "learning_rate": 3.465062683015341e-07, + "loss": 14.0815, + "step": 446360 + }, + { + "epoch": 0.9016956411074795, + "grad_norm": 234.91012573242188, + "learning_rate": 3.463785960280719e-07, + "loss": 17.0193, + "step": 446370 + }, + { + "epoch": 0.9017158417401633, + "grad_norm": 219.4169464111328, + "learning_rate": 3.462509464357944e-07, + "loss": 28.2836, + "step": 446380 + }, + { + "epoch": 0.9017360423728471, + "grad_norm": 194.2111053466797, + "learning_rate": 3.461233195253266e-07, + "loss": 16.8256, + "step": 446390 + }, + { + "epoch": 0.9017562430055309, + "grad_norm": 653.9534912109375, + "learning_rate": 3.459957152972887e-07, + "loss": 27.3666, + "step": 446400 + }, + { + "epoch": 0.9017764436382147, + "grad_norm": 0.0, + "learning_rate": 3.45868133752304e-07, + "loss": 18.6734, + "step": 446410 + }, + { + "epoch": 0.9017966442708986, + "grad_norm": 357.0810546875, + "learning_rate": 3.45740574890992e-07, + "loss": 17.6775, + "step": 446420 + }, + { + "epoch": 0.9018168449035824, + "grad_norm": 52.81655502319336, + "learning_rate": 3.456130387139778e-07, + "loss": 41.3172, + "step": 446430 + }, + { + "epoch": 0.9018370455362662, + "grad_norm": 195.17762756347656, + "learning_rate": 3.454855252218803e-07, + "loss": 21.7486, + "step": 446440 + }, + { + "epoch": 0.90185724616895, + "grad_norm": 160.00689697265625, + "learning_rate": 3.4535803441532125e-07, + "loss": 14.2743, + "step": 446450 + }, + { + "epoch": 0.9018774468016338, + "grad_norm": 197.13502502441406, + "learning_rate": 3.4523056629492344e-07, + "loss": 23.7805, + "step": 446460 + }, + { + "epoch": 0.9018976474343177, + "grad_norm": 206.31163024902344, + "learning_rate": 3.451031208613076e-07, + "loss": 11.4748, + "step": 446470 + }, + { + "epoch": 0.9019178480670015, + "grad_norm": 243.82015991210938, + "learning_rate": 3.449756981150931e-07, + "loss": 13.8016, + "step": 446480 + }, + { + "epoch": 0.9019380486996853, + "grad_norm": 332.4928894042969, + "learning_rate": 3.448482980569029e-07, + "loss": 29.6933, + "step": 446490 + }, + { + "epoch": 0.9019582493323691, + "grad_norm": 554.052978515625, + "learning_rate": 3.4472092068735917e-07, + "loss": 27.1281, + "step": 446500 + }, + { + "epoch": 0.9019784499650529, + "grad_norm": 424.04071044921875, + "learning_rate": 3.4459356600707925e-07, + "loss": 9.0941, + "step": 446510 + }, + { + "epoch": 0.9019986505977368, + "grad_norm": 439.9280090332031, + "learning_rate": 3.44466234016686e-07, + "loss": 14.4513, + "step": 446520 + }, + { + "epoch": 0.9020188512304206, + "grad_norm": 211.07901000976562, + "learning_rate": 3.443389247168e-07, + "loss": 15.9431, + "step": 446530 + }, + { + "epoch": 0.9020390518631044, + "grad_norm": 146.1761932373047, + "learning_rate": 3.442116381080418e-07, + "loss": 13.5847, + "step": 446540 + }, + { + "epoch": 0.9020592524957882, + "grad_norm": 368.0498046875, + "learning_rate": 3.4408437419103047e-07, + "loss": 10.3202, + "step": 446550 + }, + { + "epoch": 0.9020794531284719, + "grad_norm": 143.20919799804688, + "learning_rate": 3.4395713296638713e-07, + "loss": 15.2309, + "step": 446560 + }, + { + "epoch": 0.9020996537611558, + "grad_norm": 154.44309997558594, + "learning_rate": 3.4382991443473403e-07, + "loss": 21.3466, + "step": 446570 + }, + { + "epoch": 0.9021198543938396, + "grad_norm": 545.963134765625, + "learning_rate": 3.437027185966868e-07, + "loss": 13.0537, + "step": 446580 + }, + { + "epoch": 0.9021400550265234, + "grad_norm": 160.8242645263672, + "learning_rate": 3.4357554545286833e-07, + "loss": 21.0006, + "step": 446590 + }, + { + "epoch": 0.9021602556592072, + "grad_norm": 8.755069732666016, + "learning_rate": 3.434483950038986e-07, + "loss": 10.28, + "step": 446600 + }, + { + "epoch": 0.902180456291891, + "grad_norm": 439.3963623046875, + "learning_rate": 3.433212672503966e-07, + "loss": 18.5469, + "step": 446610 + }, + { + "epoch": 0.9022006569245749, + "grad_norm": 256.8570861816406, + "learning_rate": 3.431941621929813e-07, + "loss": 21.0754, + "step": 446620 + }, + { + "epoch": 0.9022208575572587, + "grad_norm": 30.363508224487305, + "learning_rate": 3.430670798322733e-07, + "loss": 13.3923, + "step": 446630 + }, + { + "epoch": 0.9022410581899425, + "grad_norm": 162.2301025390625, + "learning_rate": 3.4294002016889206e-07, + "loss": 15.3003, + "step": 446640 + }, + { + "epoch": 0.9022612588226263, + "grad_norm": 167.9653778076172, + "learning_rate": 3.428129832034549e-07, + "loss": 19.8767, + "step": 446650 + }, + { + "epoch": 0.9022814594553101, + "grad_norm": 346.23358154296875, + "learning_rate": 3.426859689365836e-07, + "loss": 13.0976, + "step": 446660 + }, + { + "epoch": 0.902301660087994, + "grad_norm": 137.21295166015625, + "learning_rate": 3.425589773688953e-07, + "loss": 19.9106, + "step": 446670 + }, + { + "epoch": 0.9023218607206778, + "grad_norm": 206.3722381591797, + "learning_rate": 3.424320085010102e-07, + "loss": 18.7182, + "step": 446680 + }, + { + "epoch": 0.9023420613533616, + "grad_norm": 68.49281311035156, + "learning_rate": 3.423050623335467e-07, + "loss": 7.6911, + "step": 446690 + }, + { + "epoch": 0.9023622619860454, + "grad_norm": 320.5769958496094, + "learning_rate": 3.421781388671225e-07, + "loss": 13.3725, + "step": 446700 + }, + { + "epoch": 0.9023824626187292, + "grad_norm": 17.555816650390625, + "learning_rate": 3.420512381023583e-07, + "loss": 16.0823, + "step": 446710 + }, + { + "epoch": 0.902402663251413, + "grad_norm": 48.743309020996094, + "learning_rate": 3.419243600398703e-07, + "loss": 11.5678, + "step": 446720 + }, + { + "epoch": 0.9024228638840969, + "grad_norm": 184.26426696777344, + "learning_rate": 3.4179750468027906e-07, + "loss": 15.7388, + "step": 446730 + }, + { + "epoch": 0.9024430645167807, + "grad_norm": 304.2300109863281, + "learning_rate": 3.416706720242008e-07, + "loss": 15.5561, + "step": 446740 + }, + { + "epoch": 0.9024632651494645, + "grad_norm": 230.85507202148438, + "learning_rate": 3.415438620722555e-07, + "loss": 17.5529, + "step": 446750 + }, + { + "epoch": 0.9024834657821483, + "grad_norm": 226.01370239257812, + "learning_rate": 3.4141707482506056e-07, + "loss": 18.1902, + "step": 446760 + }, + { + "epoch": 0.9025036664148322, + "grad_norm": 993.4498291015625, + "learning_rate": 3.412903102832327e-07, + "loss": 11.4418, + "step": 446770 + }, + { + "epoch": 0.902523867047516, + "grad_norm": 888.44091796875, + "learning_rate": 3.4116356844739184e-07, + "loss": 29.1596, + "step": 446780 + }, + { + "epoch": 0.9025440676801998, + "grad_norm": 48.29187774658203, + "learning_rate": 3.4103684931815483e-07, + "loss": 16.8089, + "step": 446790 + }, + { + "epoch": 0.9025642683128836, + "grad_norm": 186.04946899414062, + "learning_rate": 3.409101528961378e-07, + "loss": 4.0013, + "step": 446800 + }, + { + "epoch": 0.9025844689455673, + "grad_norm": 142.0152130126953, + "learning_rate": 3.407834791819603e-07, + "loss": 16.3202, + "step": 446810 + }, + { + "epoch": 0.9026046695782511, + "grad_norm": 200.44882202148438, + "learning_rate": 3.4065682817624015e-07, + "loss": 19.398, + "step": 446820 + }, + { + "epoch": 0.902624870210935, + "grad_norm": 411.5492858886719, + "learning_rate": 3.4053019987959234e-07, + "loss": 27.2015, + "step": 446830 + }, + { + "epoch": 0.9026450708436188, + "grad_norm": 496.2413024902344, + "learning_rate": 3.404035942926348e-07, + "loss": 15.3034, + "step": 446840 + }, + { + "epoch": 0.9026652714763026, + "grad_norm": 363.2920227050781, + "learning_rate": 3.402770114159859e-07, + "loss": 24.9961, + "step": 446850 + }, + { + "epoch": 0.9026854721089864, + "grad_norm": 233.4441375732422, + "learning_rate": 3.401504512502618e-07, + "loss": 18.1636, + "step": 446860 + }, + { + "epoch": 0.9027056727416702, + "grad_norm": 291.9984130859375, + "learning_rate": 3.4002391379607815e-07, + "loss": 20.9636, + "step": 446870 + }, + { + "epoch": 0.9027258733743541, + "grad_norm": 19.619997024536133, + "learning_rate": 3.3989739905405326e-07, + "loss": 11.5068, + "step": 446880 + }, + { + "epoch": 0.9027460740070379, + "grad_norm": 302.156982421875, + "learning_rate": 3.3977090702480455e-07, + "loss": 21.4687, + "step": 446890 + }, + { + "epoch": 0.9027662746397217, + "grad_norm": 475.95806884765625, + "learning_rate": 3.396444377089453e-07, + "loss": 18.9968, + "step": 446900 + }, + { + "epoch": 0.9027864752724055, + "grad_norm": 319.4463195800781, + "learning_rate": 3.395179911070945e-07, + "loss": 19.7993, + "step": 446910 + }, + { + "epoch": 0.9028066759050893, + "grad_norm": 132.29652404785156, + "learning_rate": 3.3939156721986777e-07, + "loss": 13.7618, + "step": 446920 + }, + { + "epoch": 0.9028268765377732, + "grad_norm": 109.25587463378906, + "learning_rate": 3.3926516604788185e-07, + "loss": 22.8294, + "step": 446930 + }, + { + "epoch": 0.902847077170457, + "grad_norm": 314.4452819824219, + "learning_rate": 3.3913878759175124e-07, + "loss": 25.3541, + "step": 446940 + }, + { + "epoch": 0.9028672778031408, + "grad_norm": 455.28790283203125, + "learning_rate": 3.3901243185209375e-07, + "loss": 17.1267, + "step": 446950 + }, + { + "epoch": 0.9028874784358246, + "grad_norm": 548.4476318359375, + "learning_rate": 3.388860988295245e-07, + "loss": 18.3909, + "step": 446960 + }, + { + "epoch": 0.9029076790685084, + "grad_norm": 219.3155517578125, + "learning_rate": 3.3875978852465795e-07, + "loss": 14.5548, + "step": 446970 + }, + { + "epoch": 0.9029278797011923, + "grad_norm": 82.13236999511719, + "learning_rate": 3.3863350093811196e-07, + "loss": 12.6254, + "step": 446980 + }, + { + "epoch": 0.9029480803338761, + "grad_norm": 96.57003784179688, + "learning_rate": 3.3850723607049994e-07, + "loss": 13.3052, + "step": 446990 + }, + { + "epoch": 0.9029682809665599, + "grad_norm": 132.06687927246094, + "learning_rate": 3.3838099392243915e-07, + "loss": 18.9448, + "step": 447000 + }, + { + "epoch": 0.9029884815992437, + "grad_norm": 52.39521789550781, + "learning_rate": 3.382547744945436e-07, + "loss": 11.7903, + "step": 447010 + }, + { + "epoch": 0.9030086822319275, + "grad_norm": 265.75286865234375, + "learning_rate": 3.3812857778742935e-07, + "loss": 11.8829, + "step": 447020 + }, + { + "epoch": 0.9030288828646114, + "grad_norm": 353.17645263671875, + "learning_rate": 3.3800240380171046e-07, + "loss": 14.2112, + "step": 447030 + }, + { + "epoch": 0.9030490834972952, + "grad_norm": 367.2689208984375, + "learning_rate": 3.3787625253800247e-07, + "loss": 13.5671, + "step": 447040 + }, + { + "epoch": 0.903069284129979, + "grad_norm": 253.53717041015625, + "learning_rate": 3.3775012399692055e-07, + "loss": 23.6926, + "step": 447050 + }, + { + "epoch": 0.9030894847626628, + "grad_norm": 591.800048828125, + "learning_rate": 3.3762401817907795e-07, + "loss": 19.7851, + "step": 447060 + }, + { + "epoch": 0.9031096853953465, + "grad_norm": 185.76414489746094, + "learning_rate": 3.374979350850921e-07, + "loss": 23.7267, + "step": 447070 + }, + { + "epoch": 0.9031298860280303, + "grad_norm": 752.8250732421875, + "learning_rate": 3.373718747155752e-07, + "loss": 24.4539, + "step": 447080 + }, + { + "epoch": 0.9031500866607142, + "grad_norm": 116.03507232666016, + "learning_rate": 3.372458370711412e-07, + "loss": 11.2266, + "step": 447090 + }, + { + "epoch": 0.903170287293398, + "grad_norm": 170.8887939453125, + "learning_rate": 3.371198221524069e-07, + "loss": 12.8361, + "step": 447100 + }, + { + "epoch": 0.9031904879260818, + "grad_norm": 300.7256774902344, + "learning_rate": 3.3699382995998455e-07, + "loss": 13.9144, + "step": 447110 + }, + { + "epoch": 0.9032106885587656, + "grad_norm": 189.86111450195312, + "learning_rate": 3.368678604944886e-07, + "loss": 13.0529, + "step": 447120 + }, + { + "epoch": 0.9032308891914494, + "grad_norm": 287.09002685546875, + "learning_rate": 3.3674191375653255e-07, + "loss": 9.8071, + "step": 447130 + }, + { + "epoch": 0.9032510898241333, + "grad_norm": 208.15184020996094, + "learning_rate": 3.366159897467314e-07, + "loss": 13.5541, + "step": 447140 + }, + { + "epoch": 0.9032712904568171, + "grad_norm": 254.23193359375, + "learning_rate": 3.364900884656991e-07, + "loss": 16.9599, + "step": 447150 + }, + { + "epoch": 0.9032914910895009, + "grad_norm": 297.40203857421875, + "learning_rate": 3.3636420991404686e-07, + "loss": 10.7206, + "step": 447160 + }, + { + "epoch": 0.9033116917221847, + "grad_norm": 330.4181213378906, + "learning_rate": 3.3623835409239023e-07, + "loss": 21.283, + "step": 447170 + }, + { + "epoch": 0.9033318923548685, + "grad_norm": 313.4491271972656, + "learning_rate": 3.361125210013438e-07, + "loss": 16.4498, + "step": 447180 + }, + { + "epoch": 0.9033520929875524, + "grad_norm": 0.0, + "learning_rate": 3.3598671064151767e-07, + "loss": 17.463, + "step": 447190 + }, + { + "epoch": 0.9033722936202362, + "grad_norm": 227.22650146484375, + "learning_rate": 3.358609230135268e-07, + "loss": 14.3232, + "step": 447200 + }, + { + "epoch": 0.90339249425292, + "grad_norm": 128.48728942871094, + "learning_rate": 3.357351581179846e-07, + "loss": 23.0015, + "step": 447210 + }, + { + "epoch": 0.9034126948856038, + "grad_norm": 221.73208618164062, + "learning_rate": 3.35609415955504e-07, + "loss": 15.211, + "step": 447220 + }, + { + "epoch": 0.9034328955182876, + "grad_norm": 590.2010498046875, + "learning_rate": 3.354836965266961e-07, + "loss": 47.4309, + "step": 447230 + }, + { + "epoch": 0.9034530961509715, + "grad_norm": 288.79150390625, + "learning_rate": 3.35357999832176e-07, + "loss": 14.823, + "step": 447240 + }, + { + "epoch": 0.9034732967836553, + "grad_norm": 9.984078407287598, + "learning_rate": 3.352323258725554e-07, + "loss": 12.5962, + "step": 447250 + }, + { + "epoch": 0.9034934974163391, + "grad_norm": 352.9945983886719, + "learning_rate": 3.351066746484455e-07, + "loss": 16.0944, + "step": 447260 + }, + { + "epoch": 0.9035136980490229, + "grad_norm": 356.8675842285156, + "learning_rate": 3.349810461604608e-07, + "loss": 19.7702, + "step": 447270 + }, + { + "epoch": 0.9035338986817067, + "grad_norm": 59.46029281616211, + "learning_rate": 3.3485544040921194e-07, + "loss": 30.8776, + "step": 447280 + }, + { + "epoch": 0.9035540993143906, + "grad_norm": 437.4639892578125, + "learning_rate": 3.347298573953128e-07, + "loss": 13.3344, + "step": 447290 + }, + { + "epoch": 0.9035742999470744, + "grad_norm": 21.0152587890625, + "learning_rate": 3.3460429711937417e-07, + "loss": 19.9867, + "step": 447300 + }, + { + "epoch": 0.9035945005797582, + "grad_norm": 320.994140625, + "learning_rate": 3.344787595820076e-07, + "loss": 11.8049, + "step": 447310 + }, + { + "epoch": 0.903614701212442, + "grad_norm": 364.42034912109375, + "learning_rate": 3.343532447838266e-07, + "loss": 17.2327, + "step": 447320 + }, + { + "epoch": 0.9036349018451257, + "grad_norm": 274.0588684082031, + "learning_rate": 3.3422775272544115e-07, + "loss": 17.06, + "step": 447330 + }, + { + "epoch": 0.9036551024778096, + "grad_norm": 255.96929931640625, + "learning_rate": 3.3410228340746475e-07, + "loss": 15.7417, + "step": 447340 + }, + { + "epoch": 0.9036753031104934, + "grad_norm": 5.572038173675537, + "learning_rate": 3.3397683683050685e-07, + "loss": 26.1625, + "step": 447350 + }, + { + "epoch": 0.9036955037431772, + "grad_norm": 327.1401062011719, + "learning_rate": 3.338514129951809e-07, + "loss": 6.1452, + "step": 447360 + }, + { + "epoch": 0.903715704375861, + "grad_norm": 120.31031036376953, + "learning_rate": 3.337260119020974e-07, + "loss": 18.9752, + "step": 447370 + }, + { + "epoch": 0.9037359050085448, + "grad_norm": 281.9804382324219, + "learning_rate": 3.33600633551866e-07, + "loss": 24.0222, + "step": 447380 + }, + { + "epoch": 0.9037561056412287, + "grad_norm": 111.14518737792969, + "learning_rate": 3.334752779451006e-07, + "loss": 13.0984, + "step": 447390 + }, + { + "epoch": 0.9037763062739125, + "grad_norm": 203.70059204101562, + "learning_rate": 3.3334994508241013e-07, + "loss": 18.8232, + "step": 447400 + }, + { + "epoch": 0.9037965069065963, + "grad_norm": 634.7371826171875, + "learning_rate": 3.332246349644058e-07, + "loss": 31.133, + "step": 447410 + }, + { + "epoch": 0.9038167075392801, + "grad_norm": 325.8391418457031, + "learning_rate": 3.3309934759169825e-07, + "loss": 19.8529, + "step": 447420 + }, + { + "epoch": 0.9038369081719639, + "grad_norm": 235.7917938232422, + "learning_rate": 3.3297408296489973e-07, + "loss": 13.4451, + "step": 447430 + }, + { + "epoch": 0.9038571088046478, + "grad_norm": 275.82525634765625, + "learning_rate": 3.328488410846187e-07, + "loss": 19.9762, + "step": 447440 + }, + { + "epoch": 0.9038773094373316, + "grad_norm": 541.6697387695312, + "learning_rate": 3.327236219514657e-07, + "loss": 16.1105, + "step": 447450 + }, + { + "epoch": 0.9038975100700154, + "grad_norm": 143.609130859375, + "learning_rate": 3.325984255660525e-07, + "loss": 16.8708, + "step": 447460 + }, + { + "epoch": 0.9039177107026992, + "grad_norm": 310.2473449707031, + "learning_rate": 3.324732519289886e-07, + "loss": 14.6144, + "step": 447470 + }, + { + "epoch": 0.903937911335383, + "grad_norm": 139.44161987304688, + "learning_rate": 3.3234810104088356e-07, + "loss": 16.5766, + "step": 447480 + }, + { + "epoch": 0.9039581119680669, + "grad_norm": 401.2772521972656, + "learning_rate": 3.322229729023474e-07, + "loss": 11.4328, + "step": 447490 + }, + { + "epoch": 0.9039783126007507, + "grad_norm": 358.89385986328125, + "learning_rate": 3.320978675139919e-07, + "loss": 20.5475, + "step": 447500 + }, + { + "epoch": 0.9039985132334345, + "grad_norm": 10.338994979858398, + "learning_rate": 3.319727848764237e-07, + "loss": 18.9177, + "step": 447510 + }, + { + "epoch": 0.9040187138661183, + "grad_norm": 386.9339294433594, + "learning_rate": 3.318477249902541e-07, + "loss": 13.5949, + "step": 447520 + }, + { + "epoch": 0.9040389144988021, + "grad_norm": 29.801605224609375, + "learning_rate": 3.317226878560931e-07, + "loss": 15.1736, + "step": 447530 + }, + { + "epoch": 0.904059115131486, + "grad_norm": 397.3448181152344, + "learning_rate": 3.3159767347454963e-07, + "loss": 27.8067, + "step": 447540 + }, + { + "epoch": 0.9040793157641698, + "grad_norm": 227.94371032714844, + "learning_rate": 3.3147268184623216e-07, + "loss": 14.3855, + "step": 447550 + }, + { + "epoch": 0.9040995163968536, + "grad_norm": 287.36328125, + "learning_rate": 3.3134771297175127e-07, + "loss": 21.9384, + "step": 447560 + }, + { + "epoch": 0.9041197170295374, + "grad_norm": 217.54669189453125, + "learning_rate": 3.3122276685171593e-07, + "loss": 24.2679, + "step": 447570 + }, + { + "epoch": 0.9041399176622211, + "grad_norm": 520.4777221679688, + "learning_rate": 3.3109784348673293e-07, + "loss": 16.5656, + "step": 447580 + }, + { + "epoch": 0.904160118294905, + "grad_norm": 585.6632080078125, + "learning_rate": 3.309729428774144e-07, + "loss": 16.717, + "step": 447590 + }, + { + "epoch": 0.9041803189275888, + "grad_norm": 181.5473175048828, + "learning_rate": 3.3084806502436617e-07, + "loss": 15.9249, + "step": 447600 + }, + { + "epoch": 0.9042005195602726, + "grad_norm": 111.66215515136719, + "learning_rate": 3.3072320992819875e-07, + "loss": 20.6349, + "step": 447610 + }, + { + "epoch": 0.9042207201929564, + "grad_norm": 123.65542602539062, + "learning_rate": 3.3059837758951995e-07, + "loss": 10.5056, + "step": 447620 + }, + { + "epoch": 0.9042409208256402, + "grad_norm": 575.4657592773438, + "learning_rate": 3.3047356800893826e-07, + "loss": 23.5115, + "step": 447630 + }, + { + "epoch": 0.904261121458324, + "grad_norm": 204.24893188476562, + "learning_rate": 3.303487811870626e-07, + "loss": 11.435, + "step": 447640 + }, + { + "epoch": 0.9042813220910079, + "grad_norm": 167.03961181640625, + "learning_rate": 3.3022401712450025e-07, + "loss": 15.5288, + "step": 447650 + }, + { + "epoch": 0.9043015227236917, + "grad_norm": 1584.814697265625, + "learning_rate": 3.3009927582185965e-07, + "loss": 35.3933, + "step": 447660 + }, + { + "epoch": 0.9043217233563755, + "grad_norm": 43.52713394165039, + "learning_rate": 3.2997455727974856e-07, + "loss": 18.9435, + "step": 447670 + }, + { + "epoch": 0.9043419239890593, + "grad_norm": 194.8103485107422, + "learning_rate": 3.2984986149877554e-07, + "loss": 7.7875, + "step": 447680 + }, + { + "epoch": 0.9043621246217431, + "grad_norm": 1.3497366905212402, + "learning_rate": 3.297251884795477e-07, + "loss": 19.7871, + "step": 447690 + }, + { + "epoch": 0.904382325254427, + "grad_norm": 382.6680603027344, + "learning_rate": 3.2960053822267245e-07, + "loss": 18.3611, + "step": 447700 + }, + { + "epoch": 0.9044025258871108, + "grad_norm": 143.8529052734375, + "learning_rate": 3.294759107287582e-07, + "loss": 10.4991, + "step": 447710 + }, + { + "epoch": 0.9044227265197946, + "grad_norm": 199.284423828125, + "learning_rate": 3.293513059984121e-07, + "loss": 15.4406, + "step": 447720 + }, + { + "epoch": 0.9044429271524784, + "grad_norm": 339.30767822265625, + "learning_rate": 3.2922672403224053e-07, + "loss": 19.711, + "step": 447730 + }, + { + "epoch": 0.9044631277851622, + "grad_norm": 454.2643737792969, + "learning_rate": 3.2910216483085125e-07, + "loss": 14.7664, + "step": 447740 + }, + { + "epoch": 0.9044833284178461, + "grad_norm": 250.41156005859375, + "learning_rate": 3.289776283948526e-07, + "loss": 16.6035, + "step": 447750 + }, + { + "epoch": 0.9045035290505299, + "grad_norm": 363.5282287597656, + "learning_rate": 3.2885311472485025e-07, + "loss": 14.1688, + "step": 447760 + }, + { + "epoch": 0.9045237296832137, + "grad_norm": 178.89268493652344, + "learning_rate": 3.287286238214504e-07, + "loss": 11.6931, + "step": 447770 + }, + { + "epoch": 0.9045439303158975, + "grad_norm": 147.0160675048828, + "learning_rate": 3.286041556852615e-07, + "loss": 28.4786, + "step": 447780 + }, + { + "epoch": 0.9045641309485813, + "grad_norm": 131.66220092773438, + "learning_rate": 3.2847971031688963e-07, + "loss": 34.993, + "step": 447790 + }, + { + "epoch": 0.9045843315812652, + "grad_norm": 220.76544189453125, + "learning_rate": 3.283552877169399e-07, + "loss": 17.8001, + "step": 447800 + }, + { + "epoch": 0.904604532213949, + "grad_norm": 228.4512481689453, + "learning_rate": 3.282308878860202e-07, + "loss": 11.7146, + "step": 447810 + }, + { + "epoch": 0.9046247328466328, + "grad_norm": 314.55743408203125, + "learning_rate": 3.281065108247372e-07, + "loss": 45.2426, + "step": 447820 + }, + { + "epoch": 0.9046449334793166, + "grad_norm": 287.1359558105469, + "learning_rate": 3.279821565336966e-07, + "loss": 12.0542, + "step": 447830 + }, + { + "epoch": 0.9046651341120003, + "grad_norm": 121.59862518310547, + "learning_rate": 3.2785782501350284e-07, + "loss": 19.8585, + "step": 447840 + }, + { + "epoch": 0.9046853347446842, + "grad_norm": 222.90481567382812, + "learning_rate": 3.277335162647649e-07, + "loss": 32.5965, + "step": 447850 + }, + { + "epoch": 0.904705535377368, + "grad_norm": 374.2373962402344, + "learning_rate": 3.276092302880868e-07, + "loss": 15.8904, + "step": 447860 + }, + { + "epoch": 0.9047257360100518, + "grad_norm": 131.89588928222656, + "learning_rate": 3.274849670840741e-07, + "loss": 6.9801, + "step": 447870 + }, + { + "epoch": 0.9047459366427356, + "grad_norm": 299.618408203125, + "learning_rate": 3.2736072665333353e-07, + "loss": 16.9254, + "step": 447880 + }, + { + "epoch": 0.9047661372754194, + "grad_norm": 223.69972229003906, + "learning_rate": 3.272365089964691e-07, + "loss": 10.6168, + "step": 447890 + }, + { + "epoch": 0.9047863379081033, + "grad_norm": 187.61891174316406, + "learning_rate": 3.271123141140886e-07, + "loss": 16.898, + "step": 447900 + }, + { + "epoch": 0.9048065385407871, + "grad_norm": 530.3232421875, + "learning_rate": 3.269881420067944e-07, + "loss": 17.9211, + "step": 447910 + }, + { + "epoch": 0.9048267391734709, + "grad_norm": 215.07627868652344, + "learning_rate": 3.268639926751943e-07, + "loss": 11.479, + "step": 447920 + }, + { + "epoch": 0.9048469398061547, + "grad_norm": 485.24908447265625, + "learning_rate": 3.267398661198923e-07, + "loss": 25.1056, + "step": 447930 + }, + { + "epoch": 0.9048671404388385, + "grad_norm": 272.2235412597656, + "learning_rate": 3.2661576234149285e-07, + "loss": 15.0733, + "step": 447940 + }, + { + "epoch": 0.9048873410715224, + "grad_norm": 207.17344665527344, + "learning_rate": 3.264916813406022e-07, + "loss": 13.0652, + "step": 447950 + }, + { + "epoch": 0.9049075417042062, + "grad_norm": 270.88641357421875, + "learning_rate": 3.263676231178231e-07, + "loss": 9.3535, + "step": 447960 + }, + { + "epoch": 0.90492774233689, + "grad_norm": 4.275578498840332, + "learning_rate": 3.262435876737624e-07, + "loss": 17.6642, + "step": 447970 + }, + { + "epoch": 0.9049479429695738, + "grad_norm": 303.15155029296875, + "learning_rate": 3.2611957500902345e-07, + "loss": 28.7785, + "step": 447980 + }, + { + "epoch": 0.9049681436022576, + "grad_norm": 1.4317559003829956, + "learning_rate": 3.2599558512421024e-07, + "loss": 18.0041, + "step": 447990 + }, + { + "epoch": 0.9049883442349415, + "grad_norm": 183.83926391601562, + "learning_rate": 3.258716180199278e-07, + "loss": 19.8937, + "step": 448000 + }, + { + "epoch": 0.9050085448676253, + "grad_norm": 115.31336212158203, + "learning_rate": 3.2574767369678073e-07, + "loss": 15.0221, + "step": 448010 + }, + { + "epoch": 0.9050287455003091, + "grad_norm": 276.0068054199219, + "learning_rate": 3.2562375215537176e-07, + "loss": 12.0676, + "step": 448020 + }, + { + "epoch": 0.9050489461329929, + "grad_norm": 194.3147735595703, + "learning_rate": 3.2549985339630606e-07, + "loss": 24.008, + "step": 448030 + }, + { + "epoch": 0.9050691467656767, + "grad_norm": 158.41131591796875, + "learning_rate": 3.253759774201881e-07, + "loss": 17.3652, + "step": 448040 + }, + { + "epoch": 0.9050893473983606, + "grad_norm": 226.0045928955078, + "learning_rate": 3.252521242276191e-07, + "loss": 30.4765, + "step": 448050 + }, + { + "epoch": 0.9051095480310444, + "grad_norm": 272.4423828125, + "learning_rate": 3.2512829381920463e-07, + "loss": 22.5964, + "step": 448060 + }, + { + "epoch": 0.9051297486637282, + "grad_norm": 219.59657287597656, + "learning_rate": 3.250044861955487e-07, + "loss": 23.9859, + "step": 448070 + }, + { + "epoch": 0.905149949296412, + "grad_norm": 13.538033485412598, + "learning_rate": 3.248807013572536e-07, + "loss": 18.8713, + "step": 448080 + }, + { + "epoch": 0.9051701499290957, + "grad_norm": 245.32994079589844, + "learning_rate": 3.2475693930492214e-07, + "loss": 11.9465, + "step": 448090 + }, + { + "epoch": 0.9051903505617795, + "grad_norm": 374.3828430175781, + "learning_rate": 3.246332000391583e-07, + "loss": 15.8, + "step": 448100 + }, + { + "epoch": 0.9052105511944634, + "grad_norm": 555.6296997070312, + "learning_rate": 3.245094835605667e-07, + "loss": 15.4352, + "step": 448110 + }, + { + "epoch": 0.9052307518271472, + "grad_norm": 69.77349090576172, + "learning_rate": 3.2438578986974776e-07, + "loss": 18.0138, + "step": 448120 + }, + { + "epoch": 0.905250952459831, + "grad_norm": 689.2047119140625, + "learning_rate": 3.242621189673051e-07, + "loss": 35.6984, + "step": 448130 + }, + { + "epoch": 0.9052711530925148, + "grad_norm": 241.93197631835938, + "learning_rate": 3.2413847085384256e-07, + "loss": 12.6856, + "step": 448140 + }, + { + "epoch": 0.9052913537251986, + "grad_norm": 231.5634002685547, + "learning_rate": 3.240148455299619e-07, + "loss": 11.645, + "step": 448150 + }, + { + "epoch": 0.9053115543578825, + "grad_norm": 260.03094482421875, + "learning_rate": 3.2389124299626483e-07, + "loss": 24.7901, + "step": 448160 + }, + { + "epoch": 0.9053317549905663, + "grad_norm": 552.4435424804688, + "learning_rate": 3.237676632533554e-07, + "loss": 15.0893, + "step": 448170 + }, + { + "epoch": 0.9053519556232501, + "grad_norm": 472.7244567871094, + "learning_rate": 3.2364410630183587e-07, + "loss": 12.8835, + "step": 448180 + }, + { + "epoch": 0.9053721562559339, + "grad_norm": 173.71104431152344, + "learning_rate": 3.2352057214230623e-07, + "loss": 12.9068, + "step": 448190 + }, + { + "epoch": 0.9053923568886177, + "grad_norm": 19.331586837768555, + "learning_rate": 3.233970607753717e-07, + "loss": 18.2913, + "step": 448200 + }, + { + "epoch": 0.9054125575213016, + "grad_norm": 281.31011962890625, + "learning_rate": 3.2327357220163116e-07, + "loss": 14.0501, + "step": 448210 + }, + { + "epoch": 0.9054327581539854, + "grad_norm": 265.1382141113281, + "learning_rate": 3.231501064216891e-07, + "loss": 28.7558, + "step": 448220 + }, + { + "epoch": 0.9054529587866692, + "grad_norm": 129.23358154296875, + "learning_rate": 3.2302666343614565e-07, + "loss": 12.1181, + "step": 448230 + }, + { + "epoch": 0.905473159419353, + "grad_norm": 181.31980895996094, + "learning_rate": 3.2290324324560363e-07, + "loss": 17.8086, + "step": 448240 + }, + { + "epoch": 0.9054933600520368, + "grad_norm": 278.672119140625, + "learning_rate": 3.227798458506637e-07, + "loss": 23.1344, + "step": 448250 + }, + { + "epoch": 0.9055135606847207, + "grad_norm": 344.4086608886719, + "learning_rate": 3.22656471251927e-07, + "loss": 20.6322, + "step": 448260 + }, + { + "epoch": 0.9055337613174045, + "grad_norm": 235.47267150878906, + "learning_rate": 3.225331194499964e-07, + "loss": 10.4709, + "step": 448270 + }, + { + "epoch": 0.9055539619500883, + "grad_norm": 168.4288330078125, + "learning_rate": 3.2240979044547095e-07, + "loss": 14.7583, + "step": 448280 + }, + { + "epoch": 0.9055741625827721, + "grad_norm": 131.17369079589844, + "learning_rate": 3.2228648423895335e-07, + "loss": 18.4809, + "step": 448290 + }, + { + "epoch": 0.9055943632154559, + "grad_norm": 227.6136016845703, + "learning_rate": 3.2216320083104434e-07, + "loss": 17.8105, + "step": 448300 + }, + { + "epoch": 0.9056145638481398, + "grad_norm": 184.4230194091797, + "learning_rate": 3.2203994022234396e-07, + "loss": 18.6132, + "step": 448310 + }, + { + "epoch": 0.9056347644808236, + "grad_norm": 266.4573974609375, + "learning_rate": 3.2191670241345395e-07, + "loss": 21.2703, + "step": 448320 + }, + { + "epoch": 0.9056549651135074, + "grad_norm": 698.3164672851562, + "learning_rate": 3.2179348740497494e-07, + "loss": 15.9293, + "step": 448330 + }, + { + "epoch": 0.9056751657461912, + "grad_norm": 304.7605285644531, + "learning_rate": 3.216702951975059e-07, + "loss": 15.8953, + "step": 448340 + }, + { + "epoch": 0.9056953663788749, + "grad_norm": 113.5278549194336, + "learning_rate": 3.2154712579164913e-07, + "loss": 8.5069, + "step": 448350 + }, + { + "epoch": 0.9057155670115588, + "grad_norm": 274.9203796386719, + "learning_rate": 3.2142397918800416e-07, + "loss": 19.7816, + "step": 448360 + }, + { + "epoch": 0.9057357676442426, + "grad_norm": 158.28135681152344, + "learning_rate": 3.213008553871716e-07, + "loss": 12.7516, + "step": 448370 + }, + { + "epoch": 0.9057559682769264, + "grad_norm": 272.28643798828125, + "learning_rate": 3.2117775438975096e-07, + "loss": 19.5911, + "step": 448380 + }, + { + "epoch": 0.9057761689096102, + "grad_norm": 136.56214904785156, + "learning_rate": 3.2105467619634234e-07, + "loss": 14.3715, + "step": 448390 + }, + { + "epoch": 0.905796369542294, + "grad_norm": 198.1589813232422, + "learning_rate": 3.2093162080754634e-07, + "loss": 10.9491, + "step": 448400 + }, + { + "epoch": 0.9058165701749779, + "grad_norm": 187.0760040283203, + "learning_rate": 3.208085882239614e-07, + "loss": 11.7124, + "step": 448410 + }, + { + "epoch": 0.9058367708076617, + "grad_norm": 120.32931518554688, + "learning_rate": 3.206855784461876e-07, + "loss": 15.4974, + "step": 448420 + }, + { + "epoch": 0.9058569714403455, + "grad_norm": 175.50558471679688, + "learning_rate": 3.205625914748256e-07, + "loss": 22.6222, + "step": 448430 + }, + { + "epoch": 0.9058771720730293, + "grad_norm": 405.4112548828125, + "learning_rate": 3.2043962731047373e-07, + "loss": 11.259, + "step": 448440 + }, + { + "epoch": 0.9058973727057131, + "grad_norm": 1.81077241897583, + "learning_rate": 3.20316685953731e-07, + "loss": 27.5821, + "step": 448450 + }, + { + "epoch": 0.905917573338397, + "grad_norm": 257.16375732421875, + "learning_rate": 3.20193767405198e-07, + "loss": 12.0861, + "step": 448460 + }, + { + "epoch": 0.9059377739710808, + "grad_norm": 166.19174194335938, + "learning_rate": 3.2007087166547325e-07, + "loss": 10.5968, + "step": 448470 + }, + { + "epoch": 0.9059579746037646, + "grad_norm": 147.11138916015625, + "learning_rate": 3.199479987351545e-07, + "loss": 14.8641, + "step": 448480 + }, + { + "epoch": 0.9059781752364484, + "grad_norm": 346.37860107421875, + "learning_rate": 3.1982514861484184e-07, + "loss": 16.6575, + "step": 448490 + }, + { + "epoch": 0.9059983758691322, + "grad_norm": 0.0, + "learning_rate": 3.1970232130513365e-07, + "loss": 7.7384, + "step": 448500 + }, + { + "epoch": 0.906018576501816, + "grad_norm": 271.4297790527344, + "learning_rate": 3.19579516806629e-07, + "loss": 21.7959, + "step": 448510 + }, + { + "epoch": 0.9060387771344999, + "grad_norm": 263.17095947265625, + "learning_rate": 3.194567351199257e-07, + "loss": 23.3497, + "step": 448520 + }, + { + "epoch": 0.9060589777671837, + "grad_norm": 547.2625122070312, + "learning_rate": 3.193339762456232e-07, + "loss": 13.4204, + "step": 448530 + }, + { + "epoch": 0.9060791783998675, + "grad_norm": 485.02685546875, + "learning_rate": 3.1921124018431946e-07, + "loss": 18.5841, + "step": 448540 + }, + { + "epoch": 0.9060993790325513, + "grad_norm": 329.9028015136719, + "learning_rate": 3.1908852693661116e-07, + "loss": 26.0023, + "step": 448550 + }, + { + "epoch": 0.9061195796652352, + "grad_norm": 283.8506774902344, + "learning_rate": 3.1896583650309896e-07, + "loss": 19.4961, + "step": 448560 + }, + { + "epoch": 0.906139780297919, + "grad_norm": 449.8561706542969, + "learning_rate": 3.188431688843785e-07, + "loss": 20.3796, + "step": 448570 + }, + { + "epoch": 0.9061599809306028, + "grad_norm": 428.76458740234375, + "learning_rate": 3.187205240810493e-07, + "loss": 19.7878, + "step": 448580 + }, + { + "epoch": 0.9061801815632866, + "grad_norm": 226.718505859375, + "learning_rate": 3.1859790209370855e-07, + "loss": 18.1999, + "step": 448590 + }, + { + "epoch": 0.9062003821959704, + "grad_norm": 33.923519134521484, + "learning_rate": 3.1847530292295313e-07, + "loss": 9.0033, + "step": 448600 + }, + { + "epoch": 0.9062205828286541, + "grad_norm": 286.41119384765625, + "learning_rate": 3.18352726569382e-07, + "loss": 16.4444, + "step": 448610 + }, + { + "epoch": 0.906240783461338, + "grad_norm": 281.3250732421875, + "learning_rate": 3.1823017303359185e-07, + "loss": 17.605, + "step": 448620 + }, + { + "epoch": 0.9062609840940218, + "grad_norm": 464.6627197265625, + "learning_rate": 3.181076423161794e-07, + "loss": 16.2893, + "step": 448630 + }, + { + "epoch": 0.9062811847267056, + "grad_norm": 285.9560852050781, + "learning_rate": 3.179851344177426e-07, + "loss": 8.4293, + "step": 448640 + }, + { + "epoch": 0.9063013853593894, + "grad_norm": 287.4449157714844, + "learning_rate": 3.1786264933887977e-07, + "loss": 10.3244, + "step": 448650 + }, + { + "epoch": 0.9063215859920732, + "grad_norm": 191.82052612304688, + "learning_rate": 3.1774018708018493e-07, + "loss": 13.5765, + "step": 448660 + }, + { + "epoch": 0.9063417866247571, + "grad_norm": 270.4676208496094, + "learning_rate": 3.176177476422565e-07, + "loss": 11.8198, + "step": 448670 + }, + { + "epoch": 0.9063619872574409, + "grad_norm": 336.0176086425781, + "learning_rate": 3.1749533102569176e-07, + "loss": 8.6977, + "step": 448680 + }, + { + "epoch": 0.9063821878901247, + "grad_norm": 15.550987243652344, + "learning_rate": 3.173729372310874e-07, + "loss": 8.9225, + "step": 448690 + }, + { + "epoch": 0.9064023885228085, + "grad_norm": 239.37109375, + "learning_rate": 3.172505662590386e-07, + "loss": 12.3695, + "step": 448700 + }, + { + "epoch": 0.9064225891554923, + "grad_norm": 628.9953002929688, + "learning_rate": 3.1712821811014205e-07, + "loss": 17.098, + "step": 448710 + }, + { + "epoch": 0.9064427897881762, + "grad_norm": 324.8562316894531, + "learning_rate": 3.170058927849967e-07, + "loss": 9.2761, + "step": 448720 + }, + { + "epoch": 0.90646299042086, + "grad_norm": 219.28326416015625, + "learning_rate": 3.168835902841949e-07, + "loss": 10.7078, + "step": 448730 + }, + { + "epoch": 0.9064831910535438, + "grad_norm": 188.46009826660156, + "learning_rate": 3.167613106083345e-07, + "loss": 16.3328, + "step": 448740 + }, + { + "epoch": 0.9065033916862276, + "grad_norm": 245.0628204345703, + "learning_rate": 3.166390537580122e-07, + "loss": 16.5272, + "step": 448750 + }, + { + "epoch": 0.9065235923189114, + "grad_norm": 3.24403715133667, + "learning_rate": 3.165168197338231e-07, + "loss": 27.1772, + "step": 448760 + }, + { + "epoch": 0.9065437929515953, + "grad_norm": 415.12286376953125, + "learning_rate": 3.1639460853636226e-07, + "loss": 15.0703, + "step": 448770 + }, + { + "epoch": 0.9065639935842791, + "grad_norm": 23.8918514251709, + "learning_rate": 3.162724201662265e-07, + "loss": 11.7758, + "step": 448780 + }, + { + "epoch": 0.9065841942169629, + "grad_norm": 4.333435535430908, + "learning_rate": 3.161502546240114e-07, + "loss": 14.5456, + "step": 448790 + }, + { + "epoch": 0.9066043948496467, + "grad_norm": 589.1670532226562, + "learning_rate": 3.160281119103109e-07, + "loss": 29.9515, + "step": 448800 + }, + { + "epoch": 0.9066245954823305, + "grad_norm": 142.0504913330078, + "learning_rate": 3.159059920257218e-07, + "loss": 15.2165, + "step": 448810 + }, + { + "epoch": 0.9066447961150144, + "grad_norm": 28.602466583251953, + "learning_rate": 3.157838949708386e-07, + "loss": 16.4465, + "step": 448820 + }, + { + "epoch": 0.9066649967476982, + "grad_norm": 1572.9324951171875, + "learning_rate": 3.1566182074625693e-07, + "loss": 25.6184, + "step": 448830 + }, + { + "epoch": 0.906685197380382, + "grad_norm": 426.5455627441406, + "learning_rate": 3.155397693525708e-07, + "loss": 25.8037, + "step": 448840 + }, + { + "epoch": 0.9067053980130658, + "grad_norm": 320.0198669433594, + "learning_rate": 3.1541774079037635e-07, + "loss": 22.5102, + "step": 448850 + }, + { + "epoch": 0.9067255986457495, + "grad_norm": 527.325439453125, + "learning_rate": 3.1529573506026757e-07, + "loss": 27.059, + "step": 448860 + }, + { + "epoch": 0.9067457992784334, + "grad_norm": 8.045053482055664, + "learning_rate": 3.151737521628384e-07, + "loss": 11.6383, + "step": 448870 + }, + { + "epoch": 0.9067659999111172, + "grad_norm": 207.8684539794922, + "learning_rate": 3.150517920986851e-07, + "loss": 11.5602, + "step": 448880 + }, + { + "epoch": 0.906786200543801, + "grad_norm": 378.9325256347656, + "learning_rate": 3.1492985486840044e-07, + "loss": 11.034, + "step": 448890 + }, + { + "epoch": 0.9068064011764848, + "grad_norm": 297.4162902832031, + "learning_rate": 3.148079404725801e-07, + "loss": 14.1207, + "step": 448900 + }, + { + "epoch": 0.9068266018091686, + "grad_norm": 269.1322937011719, + "learning_rate": 3.1468604891181755e-07, + "loss": 9.0224, + "step": 448910 + }, + { + "epoch": 0.9068468024418525, + "grad_norm": 417.515625, + "learning_rate": 3.145641801867061e-07, + "loss": 17.4701, + "step": 448920 + }, + { + "epoch": 0.9068670030745363, + "grad_norm": 18.833566665649414, + "learning_rate": 3.1444233429784145e-07, + "loss": 16.3909, + "step": 448930 + }, + { + "epoch": 0.9068872037072201, + "grad_norm": 286.1451416015625, + "learning_rate": 3.14320511245817e-07, + "loss": 17.3131, + "step": 448940 + }, + { + "epoch": 0.9069074043399039, + "grad_norm": 208.767333984375, + "learning_rate": 3.1419871103122447e-07, + "loss": 15.315, + "step": 448950 + }, + { + "epoch": 0.9069276049725877, + "grad_norm": 116.77249145507812, + "learning_rate": 3.1407693365465954e-07, + "loss": 20.7787, + "step": 448960 + }, + { + "epoch": 0.9069478056052716, + "grad_norm": 431.48309326171875, + "learning_rate": 3.1395517911671613e-07, + "loss": 28.2554, + "step": 448970 + }, + { + "epoch": 0.9069680062379554, + "grad_norm": 38.44464111328125, + "learning_rate": 3.1383344741798716e-07, + "loss": 19.8453, + "step": 448980 + }, + { + "epoch": 0.9069882068706392, + "grad_norm": 291.7116394042969, + "learning_rate": 3.137117385590643e-07, + "loss": 22.2248, + "step": 448990 + }, + { + "epoch": 0.907008407503323, + "grad_norm": 80.84439849853516, + "learning_rate": 3.135900525405428e-07, + "loss": 13.2023, + "step": 449000 + }, + { + "epoch": 0.9070286081360068, + "grad_norm": 112.67292785644531, + "learning_rate": 3.134683893630153e-07, + "loss": 20.4081, + "step": 449010 + }, + { + "epoch": 0.9070488087686907, + "grad_norm": 105.46914672851562, + "learning_rate": 3.133467490270736e-07, + "loss": 10.5356, + "step": 449020 + }, + { + "epoch": 0.9070690094013745, + "grad_norm": 340.24774169921875, + "learning_rate": 3.1322513153331124e-07, + "loss": 19.299, + "step": 449030 + }, + { + "epoch": 0.9070892100340583, + "grad_norm": 1019.2459716796875, + "learning_rate": 3.1310353688232207e-07, + "loss": 19.9877, + "step": 449040 + }, + { + "epoch": 0.9071094106667421, + "grad_norm": 138.2771759033203, + "learning_rate": 3.1298196507469737e-07, + "loss": 25.8873, + "step": 449050 + }, + { + "epoch": 0.9071296112994259, + "grad_norm": 150.6022491455078, + "learning_rate": 3.128604161110299e-07, + "loss": 8.4736, + "step": 449060 + }, + { + "epoch": 0.9071498119321098, + "grad_norm": 222.2021942138672, + "learning_rate": 3.1273888999191314e-07, + "loss": 13.5926, + "step": 449070 + }, + { + "epoch": 0.9071700125647936, + "grad_norm": 743.35302734375, + "learning_rate": 3.126173867179383e-07, + "loss": 24.3137, + "step": 449080 + }, + { + "epoch": 0.9071902131974774, + "grad_norm": 131.61061096191406, + "learning_rate": 3.1249590628969707e-07, + "loss": 17.9359, + "step": 449090 + }, + { + "epoch": 0.9072104138301612, + "grad_norm": 333.20806884765625, + "learning_rate": 3.123744487077829e-07, + "loss": 26.8429, + "step": 449100 + }, + { + "epoch": 0.907230614462845, + "grad_norm": 159.0042266845703, + "learning_rate": 3.122530139727864e-07, + "loss": 20.8925, + "step": 449110 + }, + { + "epoch": 0.9072508150955287, + "grad_norm": 210.1376953125, + "learning_rate": 3.12131602085301e-07, + "loss": 12.3458, + "step": 449120 + }, + { + "epoch": 0.9072710157282126, + "grad_norm": 403.5615234375, + "learning_rate": 3.1201021304591684e-07, + "loss": 16.6434, + "step": 449130 + }, + { + "epoch": 0.9072912163608964, + "grad_norm": 312.8066101074219, + "learning_rate": 3.118888468552267e-07, + "loss": 6.4285, + "step": 449140 + }, + { + "epoch": 0.9073114169935802, + "grad_norm": 283.7262878417969, + "learning_rate": 3.1176750351382235e-07, + "loss": 15.0267, + "step": 449150 + }, + { + "epoch": 0.907331617626264, + "grad_norm": 106.77458190917969, + "learning_rate": 3.116461830222933e-07, + "loss": 12.1946, + "step": 449160 + }, + { + "epoch": 0.9073518182589478, + "grad_norm": 428.0980224609375, + "learning_rate": 3.11524885381233e-07, + "loss": 33.8796, + "step": 449170 + }, + { + "epoch": 0.9073720188916317, + "grad_norm": 181.52322387695312, + "learning_rate": 3.11403610591231e-07, + "loss": 15.0143, + "step": 449180 + }, + { + "epoch": 0.9073922195243155, + "grad_norm": 115.66758728027344, + "learning_rate": 3.1128235865288013e-07, + "loss": 16.0853, + "step": 449190 + }, + { + "epoch": 0.9074124201569993, + "grad_norm": 170.62115478515625, + "learning_rate": 3.1116112956677045e-07, + "loss": 9.9213, + "step": 449200 + }, + { + "epoch": 0.9074326207896831, + "grad_norm": 68.90569305419922, + "learning_rate": 3.1103992333349153e-07, + "loss": 14.7757, + "step": 449210 + }, + { + "epoch": 0.9074528214223669, + "grad_norm": 350.0859680175781, + "learning_rate": 3.1091873995363677e-07, + "loss": 16.3275, + "step": 449220 + }, + { + "epoch": 0.9074730220550508, + "grad_norm": 226.39288330078125, + "learning_rate": 3.1079757942779453e-07, + "loss": 17.1437, + "step": 449230 + }, + { + "epoch": 0.9074932226877346, + "grad_norm": 276.9502258300781, + "learning_rate": 3.106764417565561e-07, + "loss": 10.0963, + "step": 449240 + }, + { + "epoch": 0.9075134233204184, + "grad_norm": 319.51055908203125, + "learning_rate": 3.105553269405115e-07, + "loss": 18.2976, + "step": 449250 + }, + { + "epoch": 0.9075336239531022, + "grad_norm": 328.3490295410156, + "learning_rate": 3.1043423498025303e-07, + "loss": 21.3379, + "step": 449260 + }, + { + "epoch": 0.907553824585786, + "grad_norm": 432.4721374511719, + "learning_rate": 3.1031316587636805e-07, + "loss": 17.5255, + "step": 449270 + }, + { + "epoch": 0.9075740252184699, + "grad_norm": 0.0, + "learning_rate": 3.101921196294477e-07, + "loss": 24.2896, + "step": 449280 + }, + { + "epoch": 0.9075942258511537, + "grad_norm": 125.64295959472656, + "learning_rate": 3.1007109624008326e-07, + "loss": 28.2211, + "step": 449290 + }, + { + "epoch": 0.9076144264838375, + "grad_norm": 320.15679931640625, + "learning_rate": 3.0995009570886305e-07, + "loss": 23.9613, + "step": 449300 + }, + { + "epoch": 0.9076346271165213, + "grad_norm": 164.57333374023438, + "learning_rate": 3.098291180363766e-07, + "loss": 16.5072, + "step": 449310 + }, + { + "epoch": 0.9076548277492051, + "grad_norm": 219.70770263671875, + "learning_rate": 3.097081632232141e-07, + "loss": 12.2796, + "step": 449320 + }, + { + "epoch": 0.907675028381889, + "grad_norm": 225.4513397216797, + "learning_rate": 3.095872312699666e-07, + "loss": 10.1394, + "step": 449330 + }, + { + "epoch": 0.9076952290145728, + "grad_norm": 317.7618713378906, + "learning_rate": 3.094663221772209e-07, + "loss": 18.2566, + "step": 449340 + }, + { + "epoch": 0.9077154296472566, + "grad_norm": 214.27200317382812, + "learning_rate": 3.093454359455672e-07, + "loss": 18.4359, + "step": 449350 + }, + { + "epoch": 0.9077356302799404, + "grad_norm": 138.70181274414062, + "learning_rate": 3.09224572575596e-07, + "loss": 20.3603, + "step": 449360 + }, + { + "epoch": 0.9077558309126241, + "grad_norm": 485.7292785644531, + "learning_rate": 3.091037320678947e-07, + "loss": 20.2745, + "step": 449370 + }, + { + "epoch": 0.907776031545308, + "grad_norm": 255.82957458496094, + "learning_rate": 3.089829144230527e-07, + "loss": 15.1267, + "step": 449380 + }, + { + "epoch": 0.9077962321779918, + "grad_norm": 241.6278839111328, + "learning_rate": 3.088621196416597e-07, + "loss": 6.9126, + "step": 449390 + }, + { + "epoch": 0.9078164328106756, + "grad_norm": 254.8242645263672, + "learning_rate": 3.0874134772430344e-07, + "loss": 8.0878, + "step": 449400 + }, + { + "epoch": 0.9078366334433594, + "grad_norm": 928.5169067382812, + "learning_rate": 3.0862059867157237e-07, + "loss": 17.6812, + "step": 449410 + }, + { + "epoch": 0.9078568340760432, + "grad_norm": 315.62933349609375, + "learning_rate": 3.08499872484056e-07, + "loss": 9.0443, + "step": 449420 + }, + { + "epoch": 0.907877034708727, + "grad_norm": 249.79283142089844, + "learning_rate": 3.0837916916234166e-07, + "loss": 32.0876, + "step": 449430 + }, + { + "epoch": 0.9078972353414109, + "grad_norm": 296.26904296875, + "learning_rate": 3.0825848870701893e-07, + "loss": 19.0261, + "step": 449440 + }, + { + "epoch": 0.9079174359740947, + "grad_norm": 4.003561973571777, + "learning_rate": 3.08137831118675e-07, + "loss": 15.0675, + "step": 449450 + }, + { + "epoch": 0.9079376366067785, + "grad_norm": 197.02659606933594, + "learning_rate": 3.080171963978984e-07, + "loss": 21.0176, + "step": 449460 + }, + { + "epoch": 0.9079578372394623, + "grad_norm": 127.26113891601562, + "learning_rate": 3.078965845452769e-07, + "loss": 9.8642, + "step": 449470 + }, + { + "epoch": 0.9079780378721461, + "grad_norm": 345.94903564453125, + "learning_rate": 3.077759955613979e-07, + "loss": 13.7259, + "step": 449480 + }, + { + "epoch": 0.90799823850483, + "grad_norm": 188.60939025878906, + "learning_rate": 3.0765542944685036e-07, + "loss": 23.0865, + "step": 449490 + }, + { + "epoch": 0.9080184391375138, + "grad_norm": 357.7233581542969, + "learning_rate": 3.0753488620222037e-07, + "loss": 29.3263, + "step": 449500 + }, + { + "epoch": 0.9080386397701976, + "grad_norm": 13.091974258422852, + "learning_rate": 3.07414365828097e-07, + "loss": 14.683, + "step": 449510 + }, + { + "epoch": 0.9080588404028814, + "grad_norm": 337.0787048339844, + "learning_rate": 3.0729386832506647e-07, + "loss": 21.3541, + "step": 449520 + }, + { + "epoch": 0.9080790410355652, + "grad_norm": 631.6271362304688, + "learning_rate": 3.07173393693716e-07, + "loss": 23.8249, + "step": 449530 + }, + { + "epoch": 0.9080992416682491, + "grad_norm": 551.5026245117188, + "learning_rate": 3.0705294193463406e-07, + "loss": 14.6544, + "step": 449540 + }, + { + "epoch": 0.9081194423009329, + "grad_norm": 392.66705322265625, + "learning_rate": 3.069325130484069e-07, + "loss": 19.4949, + "step": 449550 + }, + { + "epoch": 0.9081396429336167, + "grad_norm": 56.55012130737305, + "learning_rate": 3.068121070356206e-07, + "loss": 19.1171, + "step": 449560 + }, + { + "epoch": 0.9081598435663005, + "grad_norm": 78.03938293457031, + "learning_rate": 3.066917238968631e-07, + "loss": 11.2309, + "step": 449570 + }, + { + "epoch": 0.9081800441989843, + "grad_norm": 86.67057037353516, + "learning_rate": 3.065713636327211e-07, + "loss": 20.7654, + "step": 449580 + }, + { + "epoch": 0.9082002448316682, + "grad_norm": 224.8217315673828, + "learning_rate": 3.0645102624378144e-07, + "loss": 16.2432, + "step": 449590 + }, + { + "epoch": 0.908220445464352, + "grad_norm": 423.26861572265625, + "learning_rate": 3.0633071173062966e-07, + "loss": 12.1546, + "step": 449600 + }, + { + "epoch": 0.9082406460970358, + "grad_norm": 68.96484375, + "learning_rate": 3.0621042009385313e-07, + "loss": 17.9391, + "step": 449610 + }, + { + "epoch": 0.9082608467297196, + "grad_norm": 189.0087890625, + "learning_rate": 3.0609015133403806e-07, + "loss": 19.6735, + "step": 449620 + }, + { + "epoch": 0.9082810473624033, + "grad_norm": 177.26031494140625, + "learning_rate": 3.0596990545176895e-07, + "loss": 15.3103, + "step": 449630 + }, + { + "epoch": 0.9083012479950872, + "grad_norm": 235.55921936035156, + "learning_rate": 3.058496824476337e-07, + "loss": 9.5063, + "step": 449640 + }, + { + "epoch": 0.908321448627771, + "grad_norm": 175.67483520507812, + "learning_rate": 3.057294823222184e-07, + "loss": 20.511, + "step": 449650 + }, + { + "epoch": 0.9083416492604548, + "grad_norm": 210.60415649414062, + "learning_rate": 3.056093050761083e-07, + "loss": 14.8836, + "step": 449660 + }, + { + "epoch": 0.9083618498931386, + "grad_norm": 255.8742218017578, + "learning_rate": 3.0548915070988837e-07, + "loss": 12.1927, + "step": 449670 + }, + { + "epoch": 0.9083820505258224, + "grad_norm": 211.16329956054688, + "learning_rate": 3.0536901922414543e-07, + "loss": 21.3886, + "step": 449680 + }, + { + "epoch": 0.9084022511585063, + "grad_norm": 361.5028991699219, + "learning_rate": 3.052489106194645e-07, + "loss": 29.5198, + "step": 449690 + }, + { + "epoch": 0.9084224517911901, + "grad_norm": 465.8304443359375, + "learning_rate": 3.051288248964307e-07, + "loss": 22.9383, + "step": 449700 + }, + { + "epoch": 0.9084426524238739, + "grad_norm": 68.58818817138672, + "learning_rate": 3.050087620556302e-07, + "loss": 7.1828, + "step": 449710 + }, + { + "epoch": 0.9084628530565577, + "grad_norm": 0.0, + "learning_rate": 3.0488872209764654e-07, + "loss": 20.7759, + "step": 449720 + }, + { + "epoch": 0.9084830536892415, + "grad_norm": 36.281620025634766, + "learning_rate": 3.047687050230663e-07, + "loss": 12.8341, + "step": 449730 + }, + { + "epoch": 0.9085032543219254, + "grad_norm": 411.8639831542969, + "learning_rate": 3.046487108324736e-07, + "loss": 16.3744, + "step": 449740 + }, + { + "epoch": 0.9085234549546092, + "grad_norm": 381.7320251464844, + "learning_rate": 3.0452873952645455e-07, + "loss": 12.6217, + "step": 449750 + }, + { + "epoch": 0.908543655587293, + "grad_norm": 478.99676513671875, + "learning_rate": 3.0440879110559263e-07, + "loss": 31.7627, + "step": 449760 + }, + { + "epoch": 0.9085638562199768, + "grad_norm": 40.081295013427734, + "learning_rate": 3.0428886557047176e-07, + "loss": 18.577, + "step": 449770 + }, + { + "epoch": 0.9085840568526606, + "grad_norm": 374.45465087890625, + "learning_rate": 3.0416896292167873e-07, + "loss": 23.2313, + "step": 449780 + }, + { + "epoch": 0.9086042574853445, + "grad_norm": 175.38536071777344, + "learning_rate": 3.0404908315979587e-07, + "loss": 20.0117, + "step": 449790 + }, + { + "epoch": 0.9086244581180283, + "grad_norm": 372.4679870605469, + "learning_rate": 3.0392922628540875e-07, + "loss": 22.1593, + "step": 449800 + }, + { + "epoch": 0.9086446587507121, + "grad_norm": 285.4126281738281, + "learning_rate": 3.0380939229910087e-07, + "loss": 22.7447, + "step": 449810 + }, + { + "epoch": 0.9086648593833959, + "grad_norm": 367.1722717285156, + "learning_rate": 3.036895812014556e-07, + "loss": 14.5338, + "step": 449820 + }, + { + "epoch": 0.9086850600160797, + "grad_norm": 357.52117919921875, + "learning_rate": 3.0356979299305867e-07, + "loss": 18.3037, + "step": 449830 + }, + { + "epoch": 0.9087052606487636, + "grad_norm": 200.9291534423828, + "learning_rate": 3.0345002767449337e-07, + "loss": 14.4045, + "step": 449840 + }, + { + "epoch": 0.9087254612814474, + "grad_norm": 659.1195068359375, + "learning_rate": 3.0333028524634156e-07, + "loss": 20.09, + "step": 449850 + }, + { + "epoch": 0.9087456619141312, + "grad_norm": 9.049476623535156, + "learning_rate": 3.0321056570918883e-07, + "loss": 14.2662, + "step": 449860 + }, + { + "epoch": 0.908765862546815, + "grad_norm": 711.8421630859375, + "learning_rate": 3.030908690636192e-07, + "loss": 25.4986, + "step": 449870 + }, + { + "epoch": 0.9087860631794987, + "grad_norm": 201.71435546875, + "learning_rate": 3.029711953102138e-07, + "loss": 23.3851, + "step": 449880 + }, + { + "epoch": 0.9088062638121825, + "grad_norm": 257.1253662109375, + "learning_rate": 3.028515444495572e-07, + "loss": 10.9446, + "step": 449890 + }, + { + "epoch": 0.9088264644448664, + "grad_norm": 275.5248718261719, + "learning_rate": 3.027319164822329e-07, + "loss": 16.2062, + "step": 449900 + }, + { + "epoch": 0.9088466650775502, + "grad_norm": 122.3408432006836, + "learning_rate": 3.0261231140882363e-07, + "loss": 30.1198, + "step": 449910 + }, + { + "epoch": 0.908866865710234, + "grad_norm": 161.4365692138672, + "learning_rate": 3.024927292299118e-07, + "loss": 21.3074, + "step": 449920 + }, + { + "epoch": 0.9088870663429178, + "grad_norm": 362.51983642578125, + "learning_rate": 3.0237316994608025e-07, + "loss": 15.4257, + "step": 449930 + }, + { + "epoch": 0.9089072669756016, + "grad_norm": 353.9023132324219, + "learning_rate": 3.02253633557914e-07, + "loss": 18.5174, + "step": 449940 + }, + { + "epoch": 0.9089274676082855, + "grad_norm": 203.5133056640625, + "learning_rate": 3.0213412006599216e-07, + "loss": 12.4842, + "step": 449950 + }, + { + "epoch": 0.9089476682409693, + "grad_norm": 0.0, + "learning_rate": 3.0201462947089865e-07, + "loss": 23.1439, + "step": 449960 + }, + { + "epoch": 0.9089678688736531, + "grad_norm": 518.57861328125, + "learning_rate": 3.018951617732169e-07, + "loss": 23.5071, + "step": 449970 + }, + { + "epoch": 0.9089880695063369, + "grad_norm": 513.31787109375, + "learning_rate": 3.01775716973528e-07, + "loss": 35.2477, + "step": 449980 + }, + { + "epoch": 0.9090082701390207, + "grad_norm": 264.6451721191406, + "learning_rate": 3.0165629507241446e-07, + "loss": 14.1119, + "step": 449990 + }, + { + "epoch": 0.9090284707717046, + "grad_norm": 197.44154357910156, + "learning_rate": 3.015368960704584e-07, + "loss": 14.4538, + "step": 450000 + }, + { + "epoch": 0.9090486714043884, + "grad_norm": 371.9814453125, + "learning_rate": 3.014175199682418e-07, + "loss": 15.6022, + "step": 450010 + }, + { + "epoch": 0.9090688720370722, + "grad_norm": 427.51153564453125, + "learning_rate": 3.012981667663456e-07, + "loss": 14.1698, + "step": 450020 + }, + { + "epoch": 0.909089072669756, + "grad_norm": 537.7033081054688, + "learning_rate": 3.011788364653523e-07, + "loss": 17.894, + "step": 450030 + }, + { + "epoch": 0.9091092733024398, + "grad_norm": 263.43603515625, + "learning_rate": 3.010595290658441e-07, + "loss": 7.9788, + "step": 450040 + }, + { + "epoch": 0.9091294739351237, + "grad_norm": 67.04159545898438, + "learning_rate": 3.0094024456840176e-07, + "loss": 12.2793, + "step": 450050 + }, + { + "epoch": 0.9091496745678075, + "grad_norm": 422.98028564453125, + "learning_rate": 3.008209829736064e-07, + "loss": 19.8516, + "step": 450060 + }, + { + "epoch": 0.9091698752004913, + "grad_norm": 129.8915557861328, + "learning_rate": 3.007017442820398e-07, + "loss": 21.2672, + "step": 450070 + }, + { + "epoch": 0.9091900758331751, + "grad_norm": 549.937744140625, + "learning_rate": 3.005825284942837e-07, + "loss": 24.0331, + "step": 450080 + }, + { + "epoch": 0.909210276465859, + "grad_norm": 8.076929092407227, + "learning_rate": 3.004633356109171e-07, + "loss": 24.6605, + "step": 450090 + }, + { + "epoch": 0.9092304770985428, + "grad_norm": 276.8363952636719, + "learning_rate": 3.003441656325229e-07, + "loss": 13.11, + "step": 450100 + }, + { + "epoch": 0.9092506777312266, + "grad_norm": 208.9092254638672, + "learning_rate": 3.002250185596806e-07, + "loss": 15.1485, + "step": 450110 + }, + { + "epoch": 0.9092708783639104, + "grad_norm": 66.46244812011719, + "learning_rate": 3.0010589439297245e-07, + "loss": 31.4674, + "step": 450120 + }, + { + "epoch": 0.9092910789965942, + "grad_norm": 286.9383850097656, + "learning_rate": 2.9998679313297807e-07, + "loss": 18.5123, + "step": 450130 + }, + { + "epoch": 0.9093112796292779, + "grad_norm": 250.16802978515625, + "learning_rate": 2.99867714780277e-07, + "loss": 17.2191, + "step": 450140 + }, + { + "epoch": 0.9093314802619618, + "grad_norm": 104.57228088378906, + "learning_rate": 2.9974865933545207e-07, + "loss": 15.2934, + "step": 450150 + }, + { + "epoch": 0.9093516808946456, + "grad_norm": 123.68235778808594, + "learning_rate": 2.996296267990817e-07, + "loss": 16.0567, + "step": 450160 + }, + { + "epoch": 0.9093718815273294, + "grad_norm": 224.68667602539062, + "learning_rate": 2.9951061717174543e-07, + "loss": 17.8656, + "step": 450170 + }, + { + "epoch": 0.9093920821600132, + "grad_norm": 275.7343444824219, + "learning_rate": 2.9939163045402456e-07, + "loss": 7.4893, + "step": 450180 + }, + { + "epoch": 0.909412282792697, + "grad_norm": 67.62259674072266, + "learning_rate": 2.992726666464996e-07, + "loss": 17.9176, + "step": 450190 + }, + { + "epoch": 0.9094324834253809, + "grad_norm": 259.7737121582031, + "learning_rate": 2.99153725749749e-07, + "loss": 17.5629, + "step": 450200 + }, + { + "epoch": 0.9094526840580647, + "grad_norm": 278.46795654296875, + "learning_rate": 2.990348077643529e-07, + "loss": 12.2591, + "step": 450210 + }, + { + "epoch": 0.9094728846907485, + "grad_norm": 118.11419677734375, + "learning_rate": 2.989159126908914e-07, + "loss": 16.5497, + "step": 450220 + }, + { + "epoch": 0.9094930853234323, + "grad_norm": 325.4405212402344, + "learning_rate": 2.9879704052994395e-07, + "loss": 11.928, + "step": 450230 + }, + { + "epoch": 0.9095132859561161, + "grad_norm": 278.14739990234375, + "learning_rate": 2.986781912820885e-07, + "loss": 9.2348, + "step": 450240 + }, + { + "epoch": 0.9095334865888, + "grad_norm": 533.7655639648438, + "learning_rate": 2.9855936494790516e-07, + "loss": 21.2803, + "step": 450250 + }, + { + "epoch": 0.9095536872214838, + "grad_norm": 41.38686752319336, + "learning_rate": 2.9844056152797505e-07, + "loss": 15.8319, + "step": 450260 + }, + { + "epoch": 0.9095738878541676, + "grad_norm": 178.795654296875, + "learning_rate": 2.983217810228739e-07, + "loss": 5.9369, + "step": 450270 + }, + { + "epoch": 0.9095940884868514, + "grad_norm": 249.68287658691406, + "learning_rate": 2.9820302343318177e-07, + "loss": 19.1524, + "step": 450280 + }, + { + "epoch": 0.9096142891195352, + "grad_norm": 249.4966583251953, + "learning_rate": 2.9808428875947925e-07, + "loss": 13.1276, + "step": 450290 + }, + { + "epoch": 0.909634489752219, + "grad_norm": 6.903567790985107, + "learning_rate": 2.9796557700234317e-07, + "loss": 24.3792, + "step": 450300 + }, + { + "epoch": 0.9096546903849029, + "grad_norm": 182.82485961914062, + "learning_rate": 2.9784688816235194e-07, + "loss": 19.4116, + "step": 450310 + }, + { + "epoch": 0.9096748910175867, + "grad_norm": 173.53346252441406, + "learning_rate": 2.9772822224008515e-07, + "loss": 14.2293, + "step": 450320 + }, + { + "epoch": 0.9096950916502705, + "grad_norm": 170.77554321289062, + "learning_rate": 2.976095792361211e-07, + "loss": 46.01, + "step": 450330 + }, + { + "epoch": 0.9097152922829543, + "grad_norm": 276.0481872558594, + "learning_rate": 2.9749095915103665e-07, + "loss": 25.1989, + "step": 450340 + }, + { + "epoch": 0.9097354929156382, + "grad_norm": 309.44805908203125, + "learning_rate": 2.9737236198541077e-07, + "loss": 28.9171, + "step": 450350 + }, + { + "epoch": 0.909755693548322, + "grad_norm": 99.80960083007812, + "learning_rate": 2.9725378773982295e-07, + "loss": 16.5631, + "step": 450360 + }, + { + "epoch": 0.9097758941810058, + "grad_norm": 277.0606384277344, + "learning_rate": 2.971352364148494e-07, + "loss": 15.4084, + "step": 450370 + }, + { + "epoch": 0.9097960948136896, + "grad_norm": 14.412562370300293, + "learning_rate": 2.970167080110675e-07, + "loss": 10.3078, + "step": 450380 + }, + { + "epoch": 0.9098162954463734, + "grad_norm": 117.36461639404297, + "learning_rate": 2.968982025290568e-07, + "loss": 20.2965, + "step": 450390 + }, + { + "epoch": 0.9098364960790571, + "grad_norm": 42.38365936279297, + "learning_rate": 2.967797199693928e-07, + "loss": 23.8395, + "step": 450400 + }, + { + "epoch": 0.909856696711741, + "grad_norm": 529.2208862304688, + "learning_rate": 2.9666126033265517e-07, + "loss": 18.0251, + "step": 450410 + }, + { + "epoch": 0.9098768973444248, + "grad_norm": 134.37818908691406, + "learning_rate": 2.9654282361941953e-07, + "loss": 20.5174, + "step": 450420 + }, + { + "epoch": 0.9098970979771086, + "grad_norm": 486.904052734375, + "learning_rate": 2.9642440983026324e-07, + "loss": 20.9586, + "step": 450430 + }, + { + "epoch": 0.9099172986097924, + "grad_norm": 410.2285461425781, + "learning_rate": 2.963060189657646e-07, + "loss": 15.4766, + "step": 450440 + }, + { + "epoch": 0.9099374992424762, + "grad_norm": 139.95008850097656, + "learning_rate": 2.961876510264999e-07, + "loss": 18.7565, + "step": 450450 + }, + { + "epoch": 0.9099576998751601, + "grad_norm": 183.5253448486328, + "learning_rate": 2.9606930601304595e-07, + "loss": 16.7339, + "step": 450460 + }, + { + "epoch": 0.9099779005078439, + "grad_norm": 28.533550262451172, + "learning_rate": 2.9595098392597887e-07, + "loss": 13.0484, + "step": 450470 + }, + { + "epoch": 0.9099981011405277, + "grad_norm": 442.8360290527344, + "learning_rate": 2.958326847658771e-07, + "loss": 15.7757, + "step": 450480 + }, + { + "epoch": 0.9100183017732115, + "grad_norm": 149.72361755371094, + "learning_rate": 2.9571440853331634e-07, + "loss": 20.018, + "step": 450490 + }, + { + "epoch": 0.9100385024058953, + "grad_norm": 338.7470397949219, + "learning_rate": 2.9559615522887275e-07, + "loss": 11.6823, + "step": 450500 + }, + { + "epoch": 0.9100587030385792, + "grad_norm": 156.58456420898438, + "learning_rate": 2.954779248531231e-07, + "loss": 13.7843, + "step": 450510 + }, + { + "epoch": 0.910078903671263, + "grad_norm": 204.5081024169922, + "learning_rate": 2.953597174066436e-07, + "loss": 17.9999, + "step": 450520 + }, + { + "epoch": 0.9100991043039468, + "grad_norm": 192.9317626953125, + "learning_rate": 2.952415328900093e-07, + "loss": 13.9295, + "step": 450530 + }, + { + "epoch": 0.9101193049366306, + "grad_norm": 129.8246612548828, + "learning_rate": 2.951233713037971e-07, + "loss": 10.9243, + "step": 450540 + }, + { + "epoch": 0.9101395055693144, + "grad_norm": 299.04266357421875, + "learning_rate": 2.9500523264858473e-07, + "loss": 15.7801, + "step": 450550 + }, + { + "epoch": 0.9101597062019983, + "grad_norm": 707.8751831054688, + "learning_rate": 2.948871169249451e-07, + "loss": 24.1985, + "step": 450560 + }, + { + "epoch": 0.9101799068346821, + "grad_norm": 310.42083740234375, + "learning_rate": 2.9476902413345443e-07, + "loss": 16.73, + "step": 450570 + }, + { + "epoch": 0.9102001074673659, + "grad_norm": 123.99519348144531, + "learning_rate": 2.946509542746895e-07, + "loss": 10.4897, + "step": 450580 + }, + { + "epoch": 0.9102203081000497, + "grad_norm": 291.90667724609375, + "learning_rate": 2.9453290734922537e-07, + "loss": 26.6431, + "step": 450590 + }, + { + "epoch": 0.9102405087327335, + "grad_norm": 292.6350402832031, + "learning_rate": 2.9441488335763656e-07, + "loss": 32.7174, + "step": 450600 + }, + { + "epoch": 0.9102607093654174, + "grad_norm": 7.0587897300720215, + "learning_rate": 2.9429688230049934e-07, + "loss": 13.6948, + "step": 450610 + }, + { + "epoch": 0.9102809099981012, + "grad_norm": 393.41009521484375, + "learning_rate": 2.941789041783888e-07, + "loss": 9.8604, + "step": 450620 + }, + { + "epoch": 0.910301110630785, + "grad_norm": 11.90262508392334, + "learning_rate": 2.940609489918783e-07, + "loss": 11.5078, + "step": 450630 + }, + { + "epoch": 0.9103213112634688, + "grad_norm": 336.51275634765625, + "learning_rate": 2.9394301674154413e-07, + "loss": 15.6038, + "step": 450640 + }, + { + "epoch": 0.9103415118961525, + "grad_norm": 206.1016845703125, + "learning_rate": 2.938251074279619e-07, + "loss": 16.0895, + "step": 450650 + }, + { + "epoch": 0.9103617125288364, + "grad_norm": 543.8297119140625, + "learning_rate": 2.9370722105170504e-07, + "loss": 16.5557, + "step": 450660 + }, + { + "epoch": 0.9103819131615202, + "grad_norm": 30.56165885925293, + "learning_rate": 2.935893576133475e-07, + "loss": 25.3035, + "step": 450670 + }, + { + "epoch": 0.910402113794204, + "grad_norm": 67.37085723876953, + "learning_rate": 2.9347151711346556e-07, + "loss": 12.4123, + "step": 450680 + }, + { + "epoch": 0.9104223144268878, + "grad_norm": 279.19879150390625, + "learning_rate": 2.933536995526326e-07, + "loss": 16.1284, + "step": 450690 + }, + { + "epoch": 0.9104425150595716, + "grad_norm": 54.01791000366211, + "learning_rate": 2.9323590493142206e-07, + "loss": 16.3258, + "step": 450700 + }, + { + "epoch": 0.9104627156922555, + "grad_norm": 204.2699737548828, + "learning_rate": 2.931181332504096e-07, + "loss": 15.5862, + "step": 450710 + }, + { + "epoch": 0.9104829163249393, + "grad_norm": 162.3144989013672, + "learning_rate": 2.930003845101681e-07, + "loss": 15.9014, + "step": 450720 + }, + { + "epoch": 0.9105031169576231, + "grad_norm": 227.80149841308594, + "learning_rate": 2.9288265871127206e-07, + "loss": 11.8717, + "step": 450730 + }, + { + "epoch": 0.9105233175903069, + "grad_norm": 142.5568084716797, + "learning_rate": 2.927649558542955e-07, + "loss": 14.2517, + "step": 450740 + }, + { + "epoch": 0.9105435182229907, + "grad_norm": 250.9642333984375, + "learning_rate": 2.9264727593981024e-07, + "loss": 15.1651, + "step": 450750 + }, + { + "epoch": 0.9105637188556746, + "grad_norm": 179.55856323242188, + "learning_rate": 2.9252961896839236e-07, + "loss": 15.7246, + "step": 450760 + }, + { + "epoch": 0.9105839194883584, + "grad_norm": 405.2208251953125, + "learning_rate": 2.9241198494061427e-07, + "loss": 20.4519, + "step": 450770 + }, + { + "epoch": 0.9106041201210422, + "grad_norm": 47.20588302612305, + "learning_rate": 2.922943738570483e-07, + "loss": 13.0551, + "step": 450780 + }, + { + "epoch": 0.910624320753726, + "grad_norm": 186.8336944580078, + "learning_rate": 2.921767857182689e-07, + "loss": 12.4455, + "step": 450790 + }, + { + "epoch": 0.9106445213864098, + "grad_norm": 224.8080291748047, + "learning_rate": 2.920592205248496e-07, + "loss": 20.2467, + "step": 450800 + }, + { + "epoch": 0.9106647220190937, + "grad_norm": 288.7509460449219, + "learning_rate": 2.919416782773621e-07, + "loss": 17.0196, + "step": 450810 + }, + { + "epoch": 0.9106849226517775, + "grad_norm": 249.38348388671875, + "learning_rate": 2.918241589763793e-07, + "loss": 18.2203, + "step": 450820 + }, + { + "epoch": 0.9107051232844613, + "grad_norm": 108.31822204589844, + "learning_rate": 2.917066626224757e-07, + "loss": 22.2649, + "step": 450830 + }, + { + "epoch": 0.9107253239171451, + "grad_norm": 455.79339599609375, + "learning_rate": 2.9158918921622205e-07, + "loss": 28.5233, + "step": 450840 + }, + { + "epoch": 0.9107455245498289, + "grad_norm": 205.9493865966797, + "learning_rate": 2.914717387581917e-07, + "loss": 27.7138, + "step": 450850 + }, + { + "epoch": 0.9107657251825128, + "grad_norm": 259.10467529296875, + "learning_rate": 2.913543112489564e-07, + "loss": 23.003, + "step": 450860 + }, + { + "epoch": 0.9107859258151966, + "grad_norm": 451.16400146484375, + "learning_rate": 2.912369066890908e-07, + "loss": 13.9283, + "step": 450870 + }, + { + "epoch": 0.9108061264478804, + "grad_norm": 359.8724670410156, + "learning_rate": 2.9111952507916375e-07, + "loss": 25.6942, + "step": 450880 + }, + { + "epoch": 0.9108263270805642, + "grad_norm": 158.18458557128906, + "learning_rate": 2.910021664197493e-07, + "loss": 21.1689, + "step": 450890 + }, + { + "epoch": 0.910846527713248, + "grad_norm": 341.4246520996094, + "learning_rate": 2.908848307114198e-07, + "loss": 18.6615, + "step": 450900 + }, + { + "epoch": 0.9108667283459317, + "grad_norm": 193.39120483398438, + "learning_rate": 2.9076751795474647e-07, + "loss": 13.6904, + "step": 450910 + }, + { + "epoch": 0.9108869289786156, + "grad_norm": 191.11099243164062, + "learning_rate": 2.9065022815030044e-07, + "loss": 9.8379, + "step": 450920 + }, + { + "epoch": 0.9109071296112994, + "grad_norm": 411.2284240722656, + "learning_rate": 2.905329612986546e-07, + "loss": 18.3774, + "step": 450930 + }, + { + "epoch": 0.9109273302439832, + "grad_norm": 164.7104034423828, + "learning_rate": 2.9041571740037967e-07, + "loss": 15.9514, + "step": 450940 + }, + { + "epoch": 0.910947530876667, + "grad_norm": 386.8396911621094, + "learning_rate": 2.9029849645604735e-07, + "loss": 19.9905, + "step": 450950 + }, + { + "epoch": 0.9109677315093508, + "grad_norm": 232.3302459716797, + "learning_rate": 2.9018129846622834e-07, + "loss": 11.5451, + "step": 450960 + }, + { + "epoch": 0.9109879321420347, + "grad_norm": 372.60186767578125, + "learning_rate": 2.900641234314955e-07, + "loss": 20.9357, + "step": 450970 + }, + { + "epoch": 0.9110081327747185, + "grad_norm": 126.22235107421875, + "learning_rate": 2.899469713524183e-07, + "loss": 4.8332, + "step": 450980 + }, + { + "epoch": 0.9110283334074023, + "grad_norm": 14.61534309387207, + "learning_rate": 2.898298422295681e-07, + "loss": 20.8277, + "step": 450990 + }, + { + "epoch": 0.9110485340400861, + "grad_norm": 215.75051879882812, + "learning_rate": 2.8971273606351656e-07, + "loss": 16.382, + "step": 451000 + }, + { + "epoch": 0.9110687346727699, + "grad_norm": 182.18350219726562, + "learning_rate": 2.895956528548338e-07, + "loss": 35.4606, + "step": 451010 + }, + { + "epoch": 0.9110889353054538, + "grad_norm": 148.57958984375, + "learning_rate": 2.8947859260408997e-07, + "loss": 15.8084, + "step": 451020 + }, + { + "epoch": 0.9111091359381376, + "grad_norm": 32.10409164428711, + "learning_rate": 2.8936155531185675e-07, + "loss": 30.7559, + "step": 451030 + }, + { + "epoch": 0.9111293365708214, + "grad_norm": 227.17288208007812, + "learning_rate": 2.892445409787037e-07, + "loss": 31.6167, + "step": 451040 + }, + { + "epoch": 0.9111495372035052, + "grad_norm": 85.67294311523438, + "learning_rate": 2.891275496052015e-07, + "loss": 16.6726, + "step": 451050 + }, + { + "epoch": 0.911169737836189, + "grad_norm": 350.76080322265625, + "learning_rate": 2.8901058119192026e-07, + "loss": 16.188, + "step": 451060 + }, + { + "epoch": 0.9111899384688729, + "grad_norm": 245.30093383789062, + "learning_rate": 2.8889363573943006e-07, + "loss": 12.0636, + "step": 451070 + }, + { + "epoch": 0.9112101391015567, + "grad_norm": 290.5528259277344, + "learning_rate": 2.8877671324829994e-07, + "loss": 16.3702, + "step": 451080 + }, + { + "epoch": 0.9112303397342405, + "grad_norm": 241.7861785888672, + "learning_rate": 2.886598137191021e-07, + "loss": 22.5154, + "step": 451090 + }, + { + "epoch": 0.9112505403669243, + "grad_norm": 20.893571853637695, + "learning_rate": 2.8854293715240455e-07, + "loss": 20.9318, + "step": 451100 + }, + { + "epoch": 0.9112707409996081, + "grad_norm": 121.3875732421875, + "learning_rate": 2.884260835487768e-07, + "loss": 13.9963, + "step": 451110 + }, + { + "epoch": 0.911290941632292, + "grad_norm": 139.95486450195312, + "learning_rate": 2.8830925290878997e-07, + "loss": 20.3732, + "step": 451120 + }, + { + "epoch": 0.9113111422649758, + "grad_norm": 137.38673400878906, + "learning_rate": 2.8819244523301206e-07, + "loss": 13.2261, + "step": 451130 + }, + { + "epoch": 0.9113313428976596, + "grad_norm": 224.9229736328125, + "learning_rate": 2.880756605220114e-07, + "loss": 15.8526, + "step": 451140 + }, + { + "epoch": 0.9113515435303434, + "grad_norm": 468.8535461425781, + "learning_rate": 2.879588987763593e-07, + "loss": 27.8865, + "step": 451150 + }, + { + "epoch": 0.9113717441630271, + "grad_norm": 195.5144500732422, + "learning_rate": 2.878421599966252e-07, + "loss": 19.7361, + "step": 451160 + }, + { + "epoch": 0.911391944795711, + "grad_norm": 250.16378784179688, + "learning_rate": 2.877254441833754e-07, + "loss": 34.297, + "step": 451170 + }, + { + "epoch": 0.9114121454283948, + "grad_norm": 397.02337646484375, + "learning_rate": 2.8760875133718003e-07, + "loss": 43.6365, + "step": 451180 + }, + { + "epoch": 0.9114323460610786, + "grad_norm": 735.8729858398438, + "learning_rate": 2.8749208145860907e-07, + "loss": 13.713, + "step": 451190 + }, + { + "epoch": 0.9114525466937624, + "grad_norm": 160.88473510742188, + "learning_rate": 2.8737543454822993e-07, + "loss": 16.7952, + "step": 451200 + }, + { + "epoch": 0.9114727473264462, + "grad_norm": 216.04766845703125, + "learning_rate": 2.87258810606611e-07, + "loss": 12.7574, + "step": 451210 + }, + { + "epoch": 0.91149294795913, + "grad_norm": 256.7967834472656, + "learning_rate": 2.8714220963432125e-07, + "loss": 15.7725, + "step": 451220 + }, + { + "epoch": 0.9115131485918139, + "grad_norm": 0.3572355806827545, + "learning_rate": 2.870256316319292e-07, + "loss": 24.3407, + "step": 451230 + }, + { + "epoch": 0.9115333492244977, + "grad_norm": 305.8081359863281, + "learning_rate": 2.8690907660000156e-07, + "loss": 13.9204, + "step": 451240 + }, + { + "epoch": 0.9115535498571815, + "grad_norm": 78.4308853149414, + "learning_rate": 2.867925445391079e-07, + "loss": 11.3374, + "step": 451250 + }, + { + "epoch": 0.9115737504898653, + "grad_norm": 505.9815368652344, + "learning_rate": 2.8667603544981604e-07, + "loss": 18.6716, + "step": 451260 + }, + { + "epoch": 0.9115939511225492, + "grad_norm": 448.60125732421875, + "learning_rate": 2.8655954933269395e-07, + "loss": 22.5912, + "step": 451270 + }, + { + "epoch": 0.911614151755233, + "grad_norm": 346.9026794433594, + "learning_rate": 2.8644308618830775e-07, + "loss": 24.7697, + "step": 451280 + }, + { + "epoch": 0.9116343523879168, + "grad_norm": 177.3780517578125, + "learning_rate": 2.86326646017227e-07, + "loss": 12.4185, + "step": 451290 + }, + { + "epoch": 0.9116545530206006, + "grad_norm": 56.058929443359375, + "learning_rate": 2.862102288200186e-07, + "loss": 8.4533, + "step": 451300 + }, + { + "epoch": 0.9116747536532844, + "grad_norm": 61.3682746887207, + "learning_rate": 2.8609383459724915e-07, + "loss": 10.9397, + "step": 451310 + }, + { + "epoch": 0.9116949542859683, + "grad_norm": 295.5820617675781, + "learning_rate": 2.8597746334948773e-07, + "loss": 13.9304, + "step": 451320 + }, + { + "epoch": 0.9117151549186521, + "grad_norm": 363.1051025390625, + "learning_rate": 2.8586111507729887e-07, + "loss": 17.3195, + "step": 451330 + }, + { + "epoch": 0.9117353555513359, + "grad_norm": 201.4821319580078, + "learning_rate": 2.8574478978125266e-07, + "loss": 29.5149, + "step": 451340 + }, + { + "epoch": 0.9117555561840197, + "grad_norm": 168.32260131835938, + "learning_rate": 2.856284874619142e-07, + "loss": 14.6948, + "step": 451350 + }, + { + "epoch": 0.9117757568167035, + "grad_norm": 278.9552917480469, + "learning_rate": 2.855122081198503e-07, + "loss": 14.1625, + "step": 451360 + }, + { + "epoch": 0.9117959574493874, + "grad_norm": 246.78610229492188, + "learning_rate": 2.8539595175562817e-07, + "loss": 14.2023, + "step": 451370 + }, + { + "epoch": 0.9118161580820712, + "grad_norm": 283.81964111328125, + "learning_rate": 2.852797183698147e-07, + "loss": 25.7544, + "step": 451380 + }, + { + "epoch": 0.911836358714755, + "grad_norm": 120.52972412109375, + "learning_rate": 2.851635079629755e-07, + "loss": 17.5716, + "step": 451390 + }, + { + "epoch": 0.9118565593474388, + "grad_norm": 9.556193351745605, + "learning_rate": 2.850473205356774e-07, + "loss": 19.6392, + "step": 451400 + }, + { + "epoch": 0.9118767599801226, + "grad_norm": 252.46270751953125, + "learning_rate": 2.8493115608848764e-07, + "loss": 23.096, + "step": 451410 + }, + { + "epoch": 0.9118969606128063, + "grad_norm": 176.66888427734375, + "learning_rate": 2.8481501462197137e-07, + "loss": 12.809, + "step": 451420 + }, + { + "epoch": 0.9119171612454902, + "grad_norm": 388.5914001464844, + "learning_rate": 2.846988961366942e-07, + "loss": 21.0153, + "step": 451430 + }, + { + "epoch": 0.911937361878174, + "grad_norm": 210.03109741210938, + "learning_rate": 2.8458280063322353e-07, + "loss": 22.5919, + "step": 451440 + }, + { + "epoch": 0.9119575625108578, + "grad_norm": 297.4843444824219, + "learning_rate": 2.844667281121244e-07, + "loss": 16.2806, + "step": 451450 + }, + { + "epoch": 0.9119777631435416, + "grad_norm": 236.39234924316406, + "learning_rate": 2.843506785739614e-07, + "loss": 10.5966, + "step": 451460 + }, + { + "epoch": 0.9119979637762254, + "grad_norm": 35.00349044799805, + "learning_rate": 2.842346520193018e-07, + "loss": 17.5844, + "step": 451470 + }, + { + "epoch": 0.9120181644089093, + "grad_norm": 179.8292999267578, + "learning_rate": 2.8411864844871184e-07, + "loss": 15.5369, + "step": 451480 + }, + { + "epoch": 0.9120383650415931, + "grad_norm": 186.2885284423828, + "learning_rate": 2.8400266786275387e-07, + "loss": 32.9115, + "step": 451490 + }, + { + "epoch": 0.9120585656742769, + "grad_norm": 259.9469299316406, + "learning_rate": 2.838867102619952e-07, + "loss": 16.1085, + "step": 451500 + }, + { + "epoch": 0.9120787663069607, + "grad_norm": 193.43089294433594, + "learning_rate": 2.8377077564700094e-07, + "loss": 10.3301, + "step": 451510 + }, + { + "epoch": 0.9120989669396445, + "grad_norm": 311.5097961425781, + "learning_rate": 2.8365486401833677e-07, + "loss": 20.2049, + "step": 451520 + }, + { + "epoch": 0.9121191675723284, + "grad_norm": 194.8396759033203, + "learning_rate": 2.835389753765655e-07, + "loss": 13.4595, + "step": 451530 + }, + { + "epoch": 0.9121393682050122, + "grad_norm": 443.40216064453125, + "learning_rate": 2.834231097222534e-07, + "loss": 23.4322, + "step": 451540 + }, + { + "epoch": 0.912159568837696, + "grad_norm": 445.602783203125, + "learning_rate": 2.833072670559661e-07, + "loss": 21.8692, + "step": 451550 + }, + { + "epoch": 0.9121797694703798, + "grad_norm": 496.0737609863281, + "learning_rate": 2.83191447378266e-07, + "loss": 18.3607, + "step": 451560 + }, + { + "epoch": 0.9121999701030636, + "grad_norm": 42.452980041503906, + "learning_rate": 2.8307565068971867e-07, + "loss": 17.8633, + "step": 451570 + }, + { + "epoch": 0.9122201707357475, + "grad_norm": 284.5263671875, + "learning_rate": 2.829598769908892e-07, + "loss": 26.0513, + "step": 451580 + }, + { + "epoch": 0.9122403713684313, + "grad_norm": 172.8795928955078, + "learning_rate": 2.8284412628234117e-07, + "loss": 11.8185, + "step": 451590 + }, + { + "epoch": 0.9122605720011151, + "grad_norm": 14.350419044494629, + "learning_rate": 2.8272839856463783e-07, + "loss": 13.9188, + "step": 451600 + }, + { + "epoch": 0.9122807726337989, + "grad_norm": 398.5994873046875, + "learning_rate": 2.8261269383834497e-07, + "loss": 19.949, + "step": 451610 + }, + { + "epoch": 0.9123009732664827, + "grad_norm": 271.53192138671875, + "learning_rate": 2.8249701210402603e-07, + "loss": 20.3639, + "step": 451620 + }, + { + "epoch": 0.9123211738991666, + "grad_norm": 352.9737854003906, + "learning_rate": 2.823813533622438e-07, + "loss": 15.1458, + "step": 451630 + }, + { + "epoch": 0.9123413745318504, + "grad_norm": 438.1044616699219, + "learning_rate": 2.822657176135629e-07, + "loss": 17.7847, + "step": 451640 + }, + { + "epoch": 0.9123615751645342, + "grad_norm": 383.8388366699219, + "learning_rate": 2.821501048585462e-07, + "loss": 62.2985, + "step": 451650 + }, + { + "epoch": 0.912381775797218, + "grad_norm": 194.94883728027344, + "learning_rate": 2.8203451509775825e-07, + "loss": 12.9917, + "step": 451660 + }, + { + "epoch": 0.9124019764299018, + "grad_norm": 2.216585874557495, + "learning_rate": 2.819189483317625e-07, + "loss": 16.7181, + "step": 451670 + }, + { + "epoch": 0.9124221770625855, + "grad_norm": 514.4530029296875, + "learning_rate": 2.818034045611201e-07, + "loss": 20.4691, + "step": 451680 + }, + { + "epoch": 0.9124423776952694, + "grad_norm": 280.14794921875, + "learning_rate": 2.816878837863968e-07, + "loss": 11.5293, + "step": 451690 + }, + { + "epoch": 0.9124625783279532, + "grad_norm": 436.2810974121094, + "learning_rate": 2.815723860081537e-07, + "loss": 17.2589, + "step": 451700 + }, + { + "epoch": 0.912482778960637, + "grad_norm": 122.03319549560547, + "learning_rate": 2.8145691122695496e-07, + "loss": 20.786, + "step": 451710 + }, + { + "epoch": 0.9125029795933208, + "grad_norm": 261.3052673339844, + "learning_rate": 2.8134145944336225e-07, + "loss": 15.168, + "step": 451720 + }, + { + "epoch": 0.9125231802260046, + "grad_norm": 209.5710906982422, + "learning_rate": 2.812260306579401e-07, + "loss": 20.8096, + "step": 451730 + }, + { + "epoch": 0.9125433808586885, + "grad_norm": 430.4251403808594, + "learning_rate": 2.811106248712497e-07, + "loss": 22.5538, + "step": 451740 + }, + { + "epoch": 0.9125635814913723, + "grad_norm": 333.8047180175781, + "learning_rate": 2.8099524208385297e-07, + "loss": 30.343, + "step": 451750 + }, + { + "epoch": 0.9125837821240561, + "grad_norm": 219.22140502929688, + "learning_rate": 2.8087988229631325e-07, + "loss": 10.7576, + "step": 451760 + }, + { + "epoch": 0.9126039827567399, + "grad_norm": 256.58636474609375, + "learning_rate": 2.8076454550919397e-07, + "loss": 20.6302, + "step": 451770 + }, + { + "epoch": 0.9126241833894237, + "grad_norm": 582.3648071289062, + "learning_rate": 2.8064923172305467e-07, + "loss": 27.125, + "step": 451780 + }, + { + "epoch": 0.9126443840221076, + "grad_norm": 168.98902893066406, + "learning_rate": 2.8053394093845833e-07, + "loss": 15.7785, + "step": 451790 + }, + { + "epoch": 0.9126645846547914, + "grad_norm": 6.28079891204834, + "learning_rate": 2.804186731559677e-07, + "loss": 12.2514, + "step": 451800 + }, + { + "epoch": 0.9126847852874752, + "grad_norm": 354.2558288574219, + "learning_rate": 2.8030342837614466e-07, + "loss": 16.6112, + "step": 451810 + }, + { + "epoch": 0.912704985920159, + "grad_norm": 79.20686340332031, + "learning_rate": 2.8018820659954927e-07, + "loss": 14.0123, + "step": 451820 + }, + { + "epoch": 0.9127251865528428, + "grad_norm": 65.2433853149414, + "learning_rate": 2.800730078267444e-07, + "loss": 12.3392, + "step": 451830 + }, + { + "epoch": 0.9127453871855267, + "grad_norm": 9.308917045593262, + "learning_rate": 2.7995783205829185e-07, + "loss": 15.2853, + "step": 451840 + }, + { + "epoch": 0.9127655878182105, + "grad_norm": 254.87281799316406, + "learning_rate": 2.798426792947517e-07, + "loss": 17.1233, + "step": 451850 + }, + { + "epoch": 0.9127857884508943, + "grad_norm": 483.3761901855469, + "learning_rate": 2.7972754953668524e-07, + "loss": 17.6073, + "step": 451860 + }, + { + "epoch": 0.9128059890835781, + "grad_norm": 222.07420349121094, + "learning_rate": 2.796124427846553e-07, + "loss": 9.4997, + "step": 451870 + }, + { + "epoch": 0.912826189716262, + "grad_norm": 184.33493041992188, + "learning_rate": 2.7949735903922195e-07, + "loss": 17.244, + "step": 451880 + }, + { + "epoch": 0.9128463903489458, + "grad_norm": 281.9110107421875, + "learning_rate": 2.7938229830094475e-07, + "loss": 13.8942, + "step": 451890 + }, + { + "epoch": 0.9128665909816296, + "grad_norm": 586.2553100585938, + "learning_rate": 2.792672605703867e-07, + "loss": 24.6446, + "step": 451900 + }, + { + "epoch": 0.9128867916143134, + "grad_norm": 375.984375, + "learning_rate": 2.791522458481077e-07, + "loss": 22.5588, + "step": 451910 + }, + { + "epoch": 0.9129069922469972, + "grad_norm": 335.722412109375, + "learning_rate": 2.79037254134667e-07, + "loss": 25.6251, + "step": 451920 + }, + { + "epoch": 0.9129271928796809, + "grad_norm": 231.74801635742188, + "learning_rate": 2.7892228543062725e-07, + "loss": 9.2204, + "step": 451930 + }, + { + "epoch": 0.9129473935123648, + "grad_norm": 499.72247314453125, + "learning_rate": 2.788073397365465e-07, + "loss": 22.4502, + "step": 451940 + }, + { + "epoch": 0.9129675941450486, + "grad_norm": 169.93235778808594, + "learning_rate": 2.78692417052987e-07, + "loss": 16.9182, + "step": 451950 + }, + { + "epoch": 0.9129877947777324, + "grad_norm": 309.1972961425781, + "learning_rate": 2.785775173805083e-07, + "loss": 26.383, + "step": 451960 + }, + { + "epoch": 0.9130079954104162, + "grad_norm": 432.0539855957031, + "learning_rate": 2.784626407196689e-07, + "loss": 18.6492, + "step": 451970 + }, + { + "epoch": 0.9130281960431, + "grad_norm": 234.69717407226562, + "learning_rate": 2.7834778707103104e-07, + "loss": 13.5171, + "step": 451980 + }, + { + "epoch": 0.9130483966757839, + "grad_norm": 9.804825782775879, + "learning_rate": 2.782329564351532e-07, + "loss": 28.9248, + "step": 451990 + }, + { + "epoch": 0.9130685973084677, + "grad_norm": 436.0420227050781, + "learning_rate": 2.7811814881259503e-07, + "loss": 31.211, + "step": 452000 + }, + { + "epoch": 0.9130887979411515, + "grad_norm": 506.11895751953125, + "learning_rate": 2.7800336420391593e-07, + "loss": 18.9072, + "step": 452010 + }, + { + "epoch": 0.9131089985738353, + "grad_norm": 199.8274688720703, + "learning_rate": 2.7788860260967665e-07, + "loss": 21.4032, + "step": 452020 + }, + { + "epoch": 0.9131291992065191, + "grad_norm": 369.046875, + "learning_rate": 2.77773864030435e-07, + "loss": 14.663, + "step": 452030 + }, + { + "epoch": 0.913149399839203, + "grad_norm": 397.6197204589844, + "learning_rate": 2.7765914846675067e-07, + "loss": 26.5788, + "step": 452040 + }, + { + "epoch": 0.9131696004718868, + "grad_norm": 214.2436065673828, + "learning_rate": 2.775444559191837e-07, + "loss": 16.6265, + "step": 452050 + }, + { + "epoch": 0.9131898011045706, + "grad_norm": 298.3954162597656, + "learning_rate": 2.774297863882919e-07, + "loss": 20.599, + "step": 452060 + }, + { + "epoch": 0.9132100017372544, + "grad_norm": 171.51646423339844, + "learning_rate": 2.773151398746338e-07, + "loss": 6.9806, + "step": 452070 + }, + { + "epoch": 0.9132302023699382, + "grad_norm": 87.55284881591797, + "learning_rate": 2.772005163787689e-07, + "loss": 5.9049, + "step": 452080 + }, + { + "epoch": 0.9132504030026221, + "grad_norm": 202.61053466796875, + "learning_rate": 2.770859159012579e-07, + "loss": 10.9852, + "step": 452090 + }, + { + "epoch": 0.9132706036353059, + "grad_norm": 319.46868896484375, + "learning_rate": 2.7697133844265535e-07, + "loss": 17.7638, + "step": 452100 + }, + { + "epoch": 0.9132908042679897, + "grad_norm": 36.19598388671875, + "learning_rate": 2.768567840035219e-07, + "loss": 28.3298, + "step": 452110 + }, + { + "epoch": 0.9133110049006735, + "grad_norm": 324.45440673828125, + "learning_rate": 2.76742252584416e-07, + "loss": 10.1354, + "step": 452120 + }, + { + "epoch": 0.9133312055333573, + "grad_norm": 19.367141723632812, + "learning_rate": 2.7662774418589555e-07, + "loss": 7.299, + "step": 452130 + }, + { + "epoch": 0.9133514061660412, + "grad_norm": 217.58401489257812, + "learning_rate": 2.765132588085184e-07, + "loss": 21.0982, + "step": 452140 + }, + { + "epoch": 0.913371606798725, + "grad_norm": 206.20396423339844, + "learning_rate": 2.763987964528425e-07, + "loss": 14.6621, + "step": 452150 + }, + { + "epoch": 0.9133918074314088, + "grad_norm": 426.7931823730469, + "learning_rate": 2.7628435711942737e-07, + "loss": 20.1852, + "step": 452160 + }, + { + "epoch": 0.9134120080640926, + "grad_norm": 2748.8115234375, + "learning_rate": 2.7616994080882754e-07, + "loss": 23.0796, + "step": 452170 + }, + { + "epoch": 0.9134322086967764, + "grad_norm": 295.9453430175781, + "learning_rate": 2.7605554752160256e-07, + "loss": 20.84, + "step": 452180 + }, + { + "epoch": 0.9134524093294601, + "grad_norm": 50.751956939697266, + "learning_rate": 2.7594117725831096e-07, + "loss": 7.0311, + "step": 452190 + }, + { + "epoch": 0.913472609962144, + "grad_norm": 290.8677673339844, + "learning_rate": 2.758268300195094e-07, + "loss": 12.3131, + "step": 452200 + }, + { + "epoch": 0.9134928105948278, + "grad_norm": 352.98046875, + "learning_rate": 2.757125058057536e-07, + "loss": 11.1176, + "step": 452210 + }, + { + "epoch": 0.9135130112275116, + "grad_norm": 257.6129455566406, + "learning_rate": 2.755982046176031e-07, + "loss": 19.519, + "step": 452220 + }, + { + "epoch": 0.9135332118601954, + "grad_norm": 180.9540252685547, + "learning_rate": 2.754839264556136e-07, + "loss": 6.2478, + "step": 452230 + }, + { + "epoch": 0.9135534124928792, + "grad_norm": 153.62515258789062, + "learning_rate": 2.7536967132034186e-07, + "loss": 12.1595, + "step": 452240 + }, + { + "epoch": 0.9135736131255631, + "grad_norm": 466.9817199707031, + "learning_rate": 2.752554392123463e-07, + "loss": 22.4284, + "step": 452250 + }, + { + "epoch": 0.9135938137582469, + "grad_norm": 384.5799560546875, + "learning_rate": 2.7514123013218153e-07, + "loss": 21.8164, + "step": 452260 + }, + { + "epoch": 0.9136140143909307, + "grad_norm": 119.99606323242188, + "learning_rate": 2.750270440804065e-07, + "loss": 11.328, + "step": 452270 + }, + { + "epoch": 0.9136342150236145, + "grad_norm": 128.53558349609375, + "learning_rate": 2.749128810575763e-07, + "loss": 14.0993, + "step": 452280 + }, + { + "epoch": 0.9136544156562983, + "grad_norm": 305.25958251953125, + "learning_rate": 2.747987410642472e-07, + "loss": 21.608, + "step": 452290 + }, + { + "epoch": 0.9136746162889822, + "grad_norm": 198.42884826660156, + "learning_rate": 2.746846241009765e-07, + "loss": 11.7095, + "step": 452300 + }, + { + "epoch": 0.913694816921666, + "grad_norm": 420.90496826171875, + "learning_rate": 2.745705301683188e-07, + "loss": 15.0137, + "step": 452310 + }, + { + "epoch": 0.9137150175543498, + "grad_norm": 220.3802947998047, + "learning_rate": 2.7445645926683253e-07, + "loss": 21.6016, + "step": 452320 + }, + { + "epoch": 0.9137352181870336, + "grad_norm": 223.92161560058594, + "learning_rate": 2.7434241139707106e-07, + "loss": 9.5915, + "step": 452330 + }, + { + "epoch": 0.9137554188197174, + "grad_norm": 140.13250732421875, + "learning_rate": 2.742283865595924e-07, + "loss": 12.0885, + "step": 452340 + }, + { + "epoch": 0.9137756194524013, + "grad_norm": 329.86859130859375, + "learning_rate": 2.7411438475495155e-07, + "loss": 16.692, + "step": 452350 + }, + { + "epoch": 0.9137958200850851, + "grad_norm": 502.484130859375, + "learning_rate": 2.740004059837031e-07, + "loss": 19.292, + "step": 452360 + }, + { + "epoch": 0.9138160207177689, + "grad_norm": 303.68548583984375, + "learning_rate": 2.738864502464045e-07, + "loss": 8.5126, + "step": 452370 + }, + { + "epoch": 0.9138362213504527, + "grad_norm": 286.79205322265625, + "learning_rate": 2.737725175436101e-07, + "loss": 12.9968, + "step": 452380 + }, + { + "epoch": 0.9138564219831365, + "grad_norm": 390.5729675292969, + "learning_rate": 2.7365860787587405e-07, + "loss": 11.5651, + "step": 452390 + }, + { + "epoch": 0.9138766226158204, + "grad_norm": 29.03893280029297, + "learning_rate": 2.735447212437531e-07, + "loss": 26.2479, + "step": 452400 + }, + { + "epoch": 0.9138968232485042, + "grad_norm": 0.7239755392074585, + "learning_rate": 2.734308576478023e-07, + "loss": 12.455, + "step": 452410 + }, + { + "epoch": 0.913917023881188, + "grad_norm": 493.9660339355469, + "learning_rate": 2.733170170885768e-07, + "loss": 22.2287, + "step": 452420 + }, + { + "epoch": 0.9139372245138718, + "grad_norm": 378.9903564453125, + "learning_rate": 2.7320319956662957e-07, + "loss": 18.7744, + "step": 452430 + }, + { + "epoch": 0.9139574251465555, + "grad_norm": 359.2613830566406, + "learning_rate": 2.730894050825178e-07, + "loss": 7.5829, + "step": 452440 + }, + { + "epoch": 0.9139776257792394, + "grad_norm": 48.83420181274414, + "learning_rate": 2.72975633636795e-07, + "loss": 10.8604, + "step": 452450 + }, + { + "epoch": 0.9139978264119232, + "grad_norm": 263.2594299316406, + "learning_rate": 2.728618852300147e-07, + "loss": 10.6731, + "step": 452460 + }, + { + "epoch": 0.914018027044607, + "grad_norm": 360.8517150878906, + "learning_rate": 2.727481598627324e-07, + "loss": 13.3083, + "step": 452470 + }, + { + "epoch": 0.9140382276772908, + "grad_norm": 280.552978515625, + "learning_rate": 2.7263445753550275e-07, + "loss": 6.6467, + "step": 452480 + }, + { + "epoch": 0.9140584283099746, + "grad_norm": 247.21461486816406, + "learning_rate": 2.725207782488792e-07, + "loss": 17.2739, + "step": 452490 + }, + { + "epoch": 0.9140786289426585, + "grad_norm": 305.6524963378906, + "learning_rate": 2.724071220034158e-07, + "loss": 20.9711, + "step": 452500 + }, + { + "epoch": 0.9140988295753423, + "grad_norm": 564.3761596679688, + "learning_rate": 2.72293488799667e-07, + "loss": 23.1078, + "step": 452510 + }, + { + "epoch": 0.9141190302080261, + "grad_norm": 189.10385131835938, + "learning_rate": 2.7217987863818684e-07, + "loss": 19.2934, + "step": 452520 + }, + { + "epoch": 0.9141392308407099, + "grad_norm": 134.81488037109375, + "learning_rate": 2.7206629151952715e-07, + "loss": 19.1449, + "step": 452530 + }, + { + "epoch": 0.9141594314733937, + "grad_norm": 143.1569061279297, + "learning_rate": 2.7195272744424405e-07, + "loss": 16.5561, + "step": 452540 + }, + { + "epoch": 0.9141796321060776, + "grad_norm": 330.8506164550781, + "learning_rate": 2.7183918641288943e-07, + "loss": 10.3609, + "step": 452550 + }, + { + "epoch": 0.9141998327387614, + "grad_norm": 350.2809143066406, + "learning_rate": 2.717256684260172e-07, + "loss": 22.1234, + "step": 452560 + }, + { + "epoch": 0.9142200333714452, + "grad_norm": 84.64785766601562, + "learning_rate": 2.716121734841814e-07, + "loss": 23.5274, + "step": 452570 + }, + { + "epoch": 0.914240234004129, + "grad_norm": 136.28900146484375, + "learning_rate": 2.714987015879328e-07, + "loss": 19.7956, + "step": 452580 + }, + { + "epoch": 0.9142604346368128, + "grad_norm": 428.3825988769531, + "learning_rate": 2.7138525273782746e-07, + "loss": 20.0656, + "step": 452590 + }, + { + "epoch": 0.9142806352694967, + "grad_norm": 147.80023193359375, + "learning_rate": 2.712718269344161e-07, + "loss": 12.2215, + "step": 452600 + }, + { + "epoch": 0.9143008359021805, + "grad_norm": 77.33128356933594, + "learning_rate": 2.711584241782528e-07, + "loss": 13.2031, + "step": 452610 + }, + { + "epoch": 0.9143210365348643, + "grad_norm": 159.60865783691406, + "learning_rate": 2.7104504446988867e-07, + "loss": 20.644, + "step": 452620 + }, + { + "epoch": 0.9143412371675481, + "grad_norm": 228.53274536132812, + "learning_rate": 2.709316878098789e-07, + "loss": 14.8703, + "step": 452630 + }, + { + "epoch": 0.9143614378002319, + "grad_norm": 386.92388916015625, + "learning_rate": 2.708183541987741e-07, + "loss": 20.069, + "step": 452640 + }, + { + "epoch": 0.9143816384329158, + "grad_norm": 188.64630126953125, + "learning_rate": 2.707050436371267e-07, + "loss": 17.4205, + "step": 452650 + }, + { + "epoch": 0.9144018390655996, + "grad_norm": 142.79185485839844, + "learning_rate": 2.7059175612548947e-07, + "loss": 18.5848, + "step": 452660 + }, + { + "epoch": 0.9144220396982834, + "grad_norm": 284.08392333984375, + "learning_rate": 2.7047849166441487e-07, + "loss": 25.1389, + "step": 452670 + }, + { + "epoch": 0.9144422403309672, + "grad_norm": 389.1358337402344, + "learning_rate": 2.703652502544535e-07, + "loss": 24.7348, + "step": 452680 + }, + { + "epoch": 0.914462440963651, + "grad_norm": 382.4786376953125, + "learning_rate": 2.702520318961588e-07, + "loss": 17.6049, + "step": 452690 + }, + { + "epoch": 0.9144826415963347, + "grad_norm": 223.2645263671875, + "learning_rate": 2.701388365900831e-07, + "loss": 15.3738, + "step": 452700 + }, + { + "epoch": 0.9145028422290186, + "grad_norm": 236.0268096923828, + "learning_rate": 2.7002566433677547e-07, + "loss": 16.6175, + "step": 452710 + }, + { + "epoch": 0.9145230428617024, + "grad_norm": 98.96418762207031, + "learning_rate": 2.699125151367893e-07, + "loss": 17.9103, + "step": 452720 + }, + { + "epoch": 0.9145432434943862, + "grad_norm": 177.8522491455078, + "learning_rate": 2.697993889906764e-07, + "loss": 14.3337, + "step": 452730 + }, + { + "epoch": 0.91456344412707, + "grad_norm": 269.5322570800781, + "learning_rate": 2.6968628589898735e-07, + "loss": 7.494, + "step": 452740 + }, + { + "epoch": 0.9145836447597538, + "grad_norm": 123.6279067993164, + "learning_rate": 2.6957320586227354e-07, + "loss": 24.4679, + "step": 452750 + }, + { + "epoch": 0.9146038453924377, + "grad_norm": 5.928144454956055, + "learning_rate": 2.694601488810855e-07, + "loss": 20.4435, + "step": 452760 + }, + { + "epoch": 0.9146240460251215, + "grad_norm": 353.43511962890625, + "learning_rate": 2.6934711495597676e-07, + "loss": 29.1884, + "step": 452770 + }, + { + "epoch": 0.9146442466578053, + "grad_norm": 492.0124206542969, + "learning_rate": 2.6923410408749516e-07, + "loss": 16.7438, + "step": 452780 + }, + { + "epoch": 0.9146644472904891, + "grad_norm": 220.84674072265625, + "learning_rate": 2.6912111627619255e-07, + "loss": 16.7628, + "step": 452790 + }, + { + "epoch": 0.9146846479231729, + "grad_norm": 145.1833038330078, + "learning_rate": 2.690081515226206e-07, + "loss": 21.2504, + "step": 452800 + }, + { + "epoch": 0.9147048485558568, + "grad_norm": 264.7141418457031, + "learning_rate": 2.6889520982732897e-07, + "loss": 11.6204, + "step": 452810 + }, + { + "epoch": 0.9147250491885406, + "grad_norm": 103.72254180908203, + "learning_rate": 2.6878229119086776e-07, + "loss": 8.374, + "step": 452820 + }, + { + "epoch": 0.9147452498212244, + "grad_norm": 144.16172790527344, + "learning_rate": 2.6866939561378867e-07, + "loss": 16.1434, + "step": 452830 + }, + { + "epoch": 0.9147654504539082, + "grad_norm": 234.51316833496094, + "learning_rate": 2.685565230966408e-07, + "loss": 15.9326, + "step": 452840 + }, + { + "epoch": 0.914785651086592, + "grad_norm": 421.41058349609375, + "learning_rate": 2.684436736399737e-07, + "loss": 15.4687, + "step": 452850 + }, + { + "epoch": 0.9148058517192759, + "grad_norm": 87.86022186279297, + "learning_rate": 2.6833084724433965e-07, + "loss": 25.2908, + "step": 452860 + }, + { + "epoch": 0.9148260523519597, + "grad_norm": 320.94854736328125, + "learning_rate": 2.6821804391028603e-07, + "loss": 19.5364, + "step": 452870 + }, + { + "epoch": 0.9148462529846435, + "grad_norm": 63.65008544921875, + "learning_rate": 2.681052636383641e-07, + "loss": 13.9083, + "step": 452880 + }, + { + "epoch": 0.9148664536173273, + "grad_norm": 685.5750732421875, + "learning_rate": 2.679925064291239e-07, + "loss": 24.0978, + "step": 452890 + }, + { + "epoch": 0.9148866542500111, + "grad_norm": 83.85057830810547, + "learning_rate": 2.6787977228311336e-07, + "loss": 12.3388, + "step": 452900 + }, + { + "epoch": 0.914906854882695, + "grad_norm": 245.42599487304688, + "learning_rate": 2.677670612008837e-07, + "loss": 25.9351, + "step": 452910 + }, + { + "epoch": 0.9149270555153788, + "grad_norm": 267.9879455566406, + "learning_rate": 2.676543731829823e-07, + "loss": 22.9067, + "step": 452920 + }, + { + "epoch": 0.9149472561480626, + "grad_norm": 465.3280029296875, + "learning_rate": 2.6754170822996026e-07, + "loss": 12.1629, + "step": 452930 + }, + { + "epoch": 0.9149674567807464, + "grad_norm": 811.5821533203125, + "learning_rate": 2.6742906634236564e-07, + "loss": 16.7378, + "step": 452940 + }, + { + "epoch": 0.9149876574134301, + "grad_norm": 279.7994689941406, + "learning_rate": 2.6731644752074846e-07, + "loss": 16.232, + "step": 452950 + }, + { + "epoch": 0.915007858046114, + "grad_norm": 164.19772338867188, + "learning_rate": 2.6720385176565664e-07, + "loss": 10.9066, + "step": 452960 + }, + { + "epoch": 0.9150280586787978, + "grad_norm": 604.0054931640625, + "learning_rate": 2.6709127907763864e-07, + "loss": 29.5591, + "step": 452970 + }, + { + "epoch": 0.9150482593114816, + "grad_norm": 18.724149703979492, + "learning_rate": 2.6697872945724455e-07, + "loss": 14.1053, + "step": 452980 + }, + { + "epoch": 0.9150684599441654, + "grad_norm": 279.6636047363281, + "learning_rate": 2.668662029050217e-07, + "loss": 8.1674, + "step": 452990 + }, + { + "epoch": 0.9150886605768492, + "grad_norm": 237.82156372070312, + "learning_rate": 2.6675369942151864e-07, + "loss": 22.8053, + "step": 453000 + }, + { + "epoch": 0.915108861209533, + "grad_norm": 422.0186767578125, + "learning_rate": 2.666412190072837e-07, + "loss": 28.2567, + "step": 453010 + }, + { + "epoch": 0.9151290618422169, + "grad_norm": 394.2140197753906, + "learning_rate": 2.665287616628659e-07, + "loss": 10.5564, + "step": 453020 + }, + { + "epoch": 0.9151492624749007, + "grad_norm": 183.66497802734375, + "learning_rate": 2.6641632738881315e-07, + "loss": 16.4966, + "step": 453030 + }, + { + "epoch": 0.9151694631075845, + "grad_norm": 135.10885620117188, + "learning_rate": 2.663039161856723e-07, + "loss": 15.2203, + "step": 453040 + }, + { + "epoch": 0.9151896637402683, + "grad_norm": 163.83843994140625, + "learning_rate": 2.6619152805399286e-07, + "loss": 19.0739, + "step": 453050 + }, + { + "epoch": 0.9152098643729522, + "grad_norm": 206.0903778076172, + "learning_rate": 2.660791629943216e-07, + "loss": 27.0069, + "step": 453060 + }, + { + "epoch": 0.915230065005636, + "grad_norm": 32.92919158935547, + "learning_rate": 2.659668210072058e-07, + "loss": 10.3376, + "step": 453070 + }, + { + "epoch": 0.9152502656383198, + "grad_norm": 344.9319152832031, + "learning_rate": 2.658545020931935e-07, + "loss": 29.5457, + "step": 453080 + }, + { + "epoch": 0.9152704662710036, + "grad_norm": 248.47747802734375, + "learning_rate": 2.657422062528325e-07, + "loss": 15.6671, + "step": 453090 + }, + { + "epoch": 0.9152906669036874, + "grad_norm": 392.9204406738281, + "learning_rate": 2.656299334866702e-07, + "loss": 20.1031, + "step": 453100 + }, + { + "epoch": 0.9153108675363713, + "grad_norm": 236.53485107421875, + "learning_rate": 2.655176837952528e-07, + "loss": 24.4267, + "step": 453110 + }, + { + "epoch": 0.9153310681690551, + "grad_norm": 264.0382080078125, + "learning_rate": 2.654054571791287e-07, + "loss": 10.8446, + "step": 453120 + }, + { + "epoch": 0.9153512688017389, + "grad_norm": 356.65625, + "learning_rate": 2.6529325363884364e-07, + "loss": 15.9139, + "step": 453130 + }, + { + "epoch": 0.9153714694344227, + "grad_norm": 99.18729400634766, + "learning_rate": 2.651810731749449e-07, + "loss": 12.5715, + "step": 453140 + }, + { + "epoch": 0.9153916700671065, + "grad_norm": 252.50363159179688, + "learning_rate": 2.650689157879799e-07, + "loss": 7.7056, + "step": 453150 + }, + { + "epoch": 0.9154118706997904, + "grad_norm": 295.3934020996094, + "learning_rate": 2.649567814784937e-07, + "loss": 37.6581, + "step": 453160 + }, + { + "epoch": 0.9154320713324742, + "grad_norm": 242.6433868408203, + "learning_rate": 2.6484467024703476e-07, + "loss": 18.3434, + "step": 453170 + }, + { + "epoch": 0.915452271965158, + "grad_norm": 326.314697265625, + "learning_rate": 2.647325820941488e-07, + "loss": 15.2285, + "step": 453180 + }, + { + "epoch": 0.9154724725978418, + "grad_norm": 351.17059326171875, + "learning_rate": 2.6462051702038085e-07, + "loss": 16.0792, + "step": 453190 + }, + { + "epoch": 0.9154926732305256, + "grad_norm": 325.4627380371094, + "learning_rate": 2.6450847502627883e-07, + "loss": 24.9354, + "step": 453200 + }, + { + "epoch": 0.9155128738632093, + "grad_norm": 263.8374938964844, + "learning_rate": 2.6439645611238795e-07, + "loss": 15.9869, + "step": 453210 + }, + { + "epoch": 0.9155330744958932, + "grad_norm": 495.61895751953125, + "learning_rate": 2.642844602792544e-07, + "loss": 17.7545, + "step": 453220 + }, + { + "epoch": 0.915553275128577, + "grad_norm": 90.01434326171875, + "learning_rate": 2.6417248752742374e-07, + "loss": 18.3905, + "step": 453230 + }, + { + "epoch": 0.9155734757612608, + "grad_norm": 260.0401611328125, + "learning_rate": 2.640605378574429e-07, + "loss": 26.9076, + "step": 453240 + }, + { + "epoch": 0.9155936763939446, + "grad_norm": 118.38766479492188, + "learning_rate": 2.639486112698564e-07, + "loss": 19.0116, + "step": 453250 + }, + { + "epoch": 0.9156138770266284, + "grad_norm": 365.8596496582031, + "learning_rate": 2.6383670776520933e-07, + "loss": 25.7866, + "step": 453260 + }, + { + "epoch": 0.9156340776593123, + "grad_norm": 200.04348754882812, + "learning_rate": 2.637248273440479e-07, + "loss": 19.1621, + "step": 453270 + }, + { + "epoch": 0.9156542782919961, + "grad_norm": 321.8495788574219, + "learning_rate": 2.6361297000691787e-07, + "loss": 11.9167, + "step": 453280 + }, + { + "epoch": 0.9156744789246799, + "grad_norm": 258.5777893066406, + "learning_rate": 2.6350113575436266e-07, + "loss": 9.2933, + "step": 453290 + }, + { + "epoch": 0.9156946795573637, + "grad_norm": 309.27978515625, + "learning_rate": 2.6338932458692847e-07, + "loss": 13.4667, + "step": 453300 + }, + { + "epoch": 0.9157148801900475, + "grad_norm": 919.72607421875, + "learning_rate": 2.6327753650516205e-07, + "loss": 48.5722, + "step": 453310 + }, + { + "epoch": 0.9157350808227314, + "grad_norm": 356.7960510253906, + "learning_rate": 2.631657715096048e-07, + "loss": 10.7685, + "step": 453320 + }, + { + "epoch": 0.9157552814554152, + "grad_norm": 71.45125579833984, + "learning_rate": 2.630540296008027e-07, + "loss": 4.6038, + "step": 453330 + }, + { + "epoch": 0.915775482088099, + "grad_norm": 0.0, + "learning_rate": 2.629423107793022e-07, + "loss": 16.0654, + "step": 453340 + }, + { + "epoch": 0.9157956827207828, + "grad_norm": 299.66754150390625, + "learning_rate": 2.6283061504564553e-07, + "loss": 13.7657, + "step": 453350 + }, + { + "epoch": 0.9158158833534666, + "grad_norm": 367.063720703125, + "learning_rate": 2.6271894240037785e-07, + "loss": 18.1173, + "step": 453360 + }, + { + "epoch": 0.9158360839861505, + "grad_norm": 374.85101318359375, + "learning_rate": 2.626072928440432e-07, + "loss": 16.9255, + "step": 453370 + }, + { + "epoch": 0.9158562846188343, + "grad_norm": 164.22169494628906, + "learning_rate": 2.6249566637718714e-07, + "loss": 17.5023, + "step": 453380 + }, + { + "epoch": 0.9158764852515181, + "grad_norm": 188.7342987060547, + "learning_rate": 2.623840630003516e-07, + "loss": 13.6479, + "step": 453390 + }, + { + "epoch": 0.9158966858842019, + "grad_norm": 0.0, + "learning_rate": 2.622724827140816e-07, + "loss": 24.684, + "step": 453400 + }, + { + "epoch": 0.9159168865168857, + "grad_norm": 271.82586669921875, + "learning_rate": 2.6216092551892116e-07, + "loss": 17.3753, + "step": 453410 + }, + { + "epoch": 0.9159370871495696, + "grad_norm": 386.52496337890625, + "learning_rate": 2.6204939141541376e-07, + "loss": 18.1273, + "step": 453420 + }, + { + "epoch": 0.9159572877822534, + "grad_norm": 215.76119995117188, + "learning_rate": 2.6193788040410286e-07, + "loss": 20.3966, + "step": 453430 + }, + { + "epoch": 0.9159774884149372, + "grad_norm": 442.1326904296875, + "learning_rate": 2.618263924855324e-07, + "loss": 18.699, + "step": 453440 + }, + { + "epoch": 0.915997689047621, + "grad_norm": 49.77064514160156, + "learning_rate": 2.617149276602454e-07, + "loss": 21.666, + "step": 453450 + }, + { + "epoch": 0.9160178896803048, + "grad_norm": 4.305212497711182, + "learning_rate": 2.616034859287847e-07, + "loss": 11.1293, + "step": 453460 + }, + { + "epoch": 0.9160380903129886, + "grad_norm": 145.03042602539062, + "learning_rate": 2.614920672916943e-07, + "loss": 12.7993, + "step": 453470 + }, + { + "epoch": 0.9160582909456724, + "grad_norm": 165.0931396484375, + "learning_rate": 2.61380671749516e-07, + "loss": 12.0623, + "step": 453480 + }, + { + "epoch": 0.9160784915783562, + "grad_norm": 65.10508728027344, + "learning_rate": 2.6126929930279486e-07, + "loss": 17.926, + "step": 453490 + }, + { + "epoch": 0.91609869221104, + "grad_norm": 221.85946655273438, + "learning_rate": 2.611579499520722e-07, + "loss": 23.5891, + "step": 453500 + }, + { + "epoch": 0.9161188928437238, + "grad_norm": 179.46832275390625, + "learning_rate": 2.610466236978898e-07, + "loss": 22.8867, + "step": 453510 + }, + { + "epoch": 0.9161390934764077, + "grad_norm": 165.3672637939453, + "learning_rate": 2.6093532054079276e-07, + "loss": 18.5289, + "step": 453520 + }, + { + "epoch": 0.9161592941090915, + "grad_norm": 136.2425537109375, + "learning_rate": 2.6082404048132114e-07, + "loss": 14.3849, + "step": 453530 + }, + { + "epoch": 0.9161794947417753, + "grad_norm": 280.4484558105469, + "learning_rate": 2.6071278352001904e-07, + "loss": 15.8007, + "step": 453540 + }, + { + "epoch": 0.9161996953744591, + "grad_norm": 91.72692108154297, + "learning_rate": 2.606015496574277e-07, + "loss": 20.0773, + "step": 453550 + }, + { + "epoch": 0.9162198960071429, + "grad_norm": 232.214599609375, + "learning_rate": 2.604903388940899e-07, + "loss": 20.5344, + "step": 453560 + }, + { + "epoch": 0.9162400966398268, + "grad_norm": 145.7259521484375, + "learning_rate": 2.60379151230547e-07, + "loss": 13.2599, + "step": 453570 + }, + { + "epoch": 0.9162602972725106, + "grad_norm": 229.9413299560547, + "learning_rate": 2.602679866673413e-07, + "loss": 17.8836, + "step": 453580 + }, + { + "epoch": 0.9162804979051944, + "grad_norm": 345.7672119140625, + "learning_rate": 2.601568452050146e-07, + "loss": 27.2276, + "step": 453590 + }, + { + "epoch": 0.9163006985378782, + "grad_norm": 277.6082458496094, + "learning_rate": 2.600457268441092e-07, + "loss": 21.4439, + "step": 453600 + }, + { + "epoch": 0.916320899170562, + "grad_norm": 171.35084533691406, + "learning_rate": 2.599346315851653e-07, + "loss": 19.7956, + "step": 453610 + }, + { + "epoch": 0.9163410998032459, + "grad_norm": 1177.286865234375, + "learning_rate": 2.598235594287246e-07, + "loss": 46.3974, + "step": 453620 + }, + { + "epoch": 0.9163613004359297, + "grad_norm": 5.111498832702637, + "learning_rate": 2.597125103753301e-07, + "loss": 17.405, + "step": 453630 + }, + { + "epoch": 0.9163815010686135, + "grad_norm": 431.3167419433594, + "learning_rate": 2.596014844255218e-07, + "loss": 19.3773, + "step": 453640 + }, + { + "epoch": 0.9164017017012973, + "grad_norm": 548.6310424804688, + "learning_rate": 2.594904815798399e-07, + "loss": 15.4044, + "step": 453650 + }, + { + "epoch": 0.9164219023339811, + "grad_norm": 331.8158264160156, + "learning_rate": 2.5937950183882734e-07, + "loss": 21.1692, + "step": 453660 + }, + { + "epoch": 0.916442102966665, + "grad_norm": 53.809814453125, + "learning_rate": 2.5926854520302414e-07, + "loss": 32.3545, + "step": 453670 + }, + { + "epoch": 0.9164623035993488, + "grad_norm": 217.87533569335938, + "learning_rate": 2.591576116729705e-07, + "loss": 15.405, + "step": 453680 + }, + { + "epoch": 0.9164825042320326, + "grad_norm": 187.37620544433594, + "learning_rate": 2.590467012492076e-07, + "loss": 21.7085, + "step": 453690 + }, + { + "epoch": 0.9165027048647164, + "grad_norm": 281.9234619140625, + "learning_rate": 2.589358139322767e-07, + "loss": 22.0715, + "step": 453700 + }, + { + "epoch": 0.9165229054974002, + "grad_norm": 397.6363220214844, + "learning_rate": 2.5882494972271746e-07, + "loss": 10.1635, + "step": 453710 + }, + { + "epoch": 0.9165431061300839, + "grad_norm": 292.94915771484375, + "learning_rate": 2.587141086210698e-07, + "loss": 8.4192, + "step": 453720 + }, + { + "epoch": 0.9165633067627678, + "grad_norm": 342.94427490234375, + "learning_rate": 2.586032906278757e-07, + "loss": 16.3067, + "step": 453730 + }, + { + "epoch": 0.9165835073954516, + "grad_norm": 515.53271484375, + "learning_rate": 2.584924957436735e-07, + "loss": 17.0453, + "step": 453740 + }, + { + "epoch": 0.9166037080281354, + "grad_norm": 363.7118225097656, + "learning_rate": 2.583817239690034e-07, + "loss": 12.1022, + "step": 453750 + }, + { + "epoch": 0.9166239086608192, + "grad_norm": 140.2545623779297, + "learning_rate": 2.5827097530440605e-07, + "loss": 18.2588, + "step": 453760 + }, + { + "epoch": 0.916644109293503, + "grad_norm": 216.16690063476562, + "learning_rate": 2.581602497504204e-07, + "loss": 13.8425, + "step": 453770 + }, + { + "epoch": 0.9166643099261869, + "grad_norm": 170.5027618408203, + "learning_rate": 2.580495473075878e-07, + "loss": 12.6689, + "step": 453780 + }, + { + "epoch": 0.9166845105588707, + "grad_norm": 374.31842041015625, + "learning_rate": 2.579388679764455e-07, + "loss": 20.037, + "step": 453790 + }, + { + "epoch": 0.9167047111915545, + "grad_norm": 421.94769287109375, + "learning_rate": 2.578282117575343e-07, + "loss": 20.8775, + "step": 453800 + }, + { + "epoch": 0.9167249118242383, + "grad_norm": 51.00823974609375, + "learning_rate": 2.577175786513936e-07, + "loss": 31.8737, + "step": 453810 + }, + { + "epoch": 0.9167451124569221, + "grad_norm": 259.5227355957031, + "learning_rate": 2.576069686585614e-07, + "loss": 9.5977, + "step": 453820 + }, + { + "epoch": 0.916765313089606, + "grad_norm": 507.0741882324219, + "learning_rate": 2.5749638177957834e-07, + "loss": 22.4468, + "step": 453830 + }, + { + "epoch": 0.9167855137222898, + "grad_norm": 264.82342529296875, + "learning_rate": 2.5738581801498187e-07, + "loss": 26.934, + "step": 453840 + }, + { + "epoch": 0.9168057143549736, + "grad_norm": 188.7342987060547, + "learning_rate": 2.5727527736531256e-07, + "loss": 15.6602, + "step": 453850 + }, + { + "epoch": 0.9168259149876574, + "grad_norm": 338.59942626953125, + "learning_rate": 2.571647598311089e-07, + "loss": 26.7214, + "step": 453860 + }, + { + "epoch": 0.9168461156203412, + "grad_norm": 212.51951599121094, + "learning_rate": 2.5705426541290765e-07, + "loss": 26.4734, + "step": 453870 + }, + { + "epoch": 0.9168663162530251, + "grad_norm": 250.728271484375, + "learning_rate": 2.56943794111249e-07, + "loss": 13.9403, + "step": 453880 + }, + { + "epoch": 0.9168865168857089, + "grad_norm": 256.1279296875, + "learning_rate": 2.5683334592667195e-07, + "loss": 18.4427, + "step": 453890 + }, + { + "epoch": 0.9169067175183927, + "grad_norm": 517.9696655273438, + "learning_rate": 2.5672292085971276e-07, + "loss": 20.7408, + "step": 453900 + }, + { + "epoch": 0.9169269181510765, + "grad_norm": 674.7428588867188, + "learning_rate": 2.5661251891091087e-07, + "loss": 27.3759, + "step": 453910 + }, + { + "epoch": 0.9169471187837603, + "grad_norm": 44.62065124511719, + "learning_rate": 2.5650214008080544e-07, + "loss": 10.5466, + "step": 453920 + }, + { + "epoch": 0.9169673194164442, + "grad_norm": 211.87799072265625, + "learning_rate": 2.5639178436993205e-07, + "loss": 6.9806, + "step": 453930 + }, + { + "epoch": 0.916987520049128, + "grad_norm": 272.81170654296875, + "learning_rate": 2.5628145177882926e-07, + "loss": 15.2975, + "step": 453940 + }, + { + "epoch": 0.9170077206818118, + "grad_norm": 165.90725708007812, + "learning_rate": 2.561711423080365e-07, + "loss": 15.7969, + "step": 453950 + }, + { + "epoch": 0.9170279213144956, + "grad_norm": 400.4488220214844, + "learning_rate": 2.5606085595809015e-07, + "loss": 18.5943, + "step": 453960 + }, + { + "epoch": 0.9170481219471794, + "grad_norm": 256.30218505859375, + "learning_rate": 2.559505927295275e-07, + "loss": 14.317, + "step": 453970 + }, + { + "epoch": 0.9170683225798631, + "grad_norm": 300.9992370605469, + "learning_rate": 2.5584035262288585e-07, + "loss": 16.0037, + "step": 453980 + }, + { + "epoch": 0.917088523212547, + "grad_norm": 518.5432739257812, + "learning_rate": 2.557301356387043e-07, + "loss": 16.5282, + "step": 453990 + }, + { + "epoch": 0.9171087238452308, + "grad_norm": 549.3116455078125, + "learning_rate": 2.556199417775174e-07, + "loss": 20.3889, + "step": 454000 + }, + { + "epoch": 0.9171289244779146, + "grad_norm": 397.176513671875, + "learning_rate": 2.555097710398635e-07, + "loss": 16.9556, + "step": 454010 + }, + { + "epoch": 0.9171491251105984, + "grad_norm": 572.1047973632812, + "learning_rate": 2.553996234262801e-07, + "loss": 18.0201, + "step": 454020 + }, + { + "epoch": 0.9171693257432822, + "grad_norm": 170.86328125, + "learning_rate": 2.5528949893730393e-07, + "loss": 12.7383, + "step": 454030 + }, + { + "epoch": 0.9171895263759661, + "grad_norm": 530.5274658203125, + "learning_rate": 2.551793975734701e-07, + "loss": 22.9562, + "step": 454040 + }, + { + "epoch": 0.9172097270086499, + "grad_norm": 643.59375, + "learning_rate": 2.550693193353171e-07, + "loss": 19.9046, + "step": 454050 + }, + { + "epoch": 0.9172299276413337, + "grad_norm": 701.5162963867188, + "learning_rate": 2.5495926422338115e-07, + "loss": 28.291, + "step": 454060 + }, + { + "epoch": 0.9172501282740175, + "grad_norm": 262.4273376464844, + "learning_rate": 2.548492322381968e-07, + "loss": 16.2135, + "step": 454070 + }, + { + "epoch": 0.9172703289067013, + "grad_norm": 281.5733947753906, + "learning_rate": 2.547392233803031e-07, + "loss": 11.4153, + "step": 454080 + }, + { + "epoch": 0.9172905295393852, + "grad_norm": 0.0, + "learning_rate": 2.5462923765023404e-07, + "loss": 20.6734, + "step": 454090 + }, + { + "epoch": 0.917310730172069, + "grad_norm": 170.24087524414062, + "learning_rate": 2.5451927504852757e-07, + "loss": 15.0851, + "step": 454100 + }, + { + "epoch": 0.9173309308047528, + "grad_norm": 202.82864379882812, + "learning_rate": 2.544093355757171e-07, + "loss": 11.4643, + "step": 454110 + }, + { + "epoch": 0.9173511314374366, + "grad_norm": 210.89051818847656, + "learning_rate": 2.5429941923234114e-07, + "loss": 18.046, + "step": 454120 + }, + { + "epoch": 0.9173713320701204, + "grad_norm": 712.810791015625, + "learning_rate": 2.541895260189342e-07, + "loss": 20.078, + "step": 454130 + }, + { + "epoch": 0.9173915327028043, + "grad_norm": 353.95068359375, + "learning_rate": 2.5407965593603147e-07, + "loss": 9.7528, + "step": 454140 + }, + { + "epoch": 0.9174117333354881, + "grad_norm": 23.561250686645508, + "learning_rate": 2.539698089841691e-07, + "loss": 27.3173, + "step": 454150 + }, + { + "epoch": 0.9174319339681719, + "grad_norm": 252.623046875, + "learning_rate": 2.538599851638818e-07, + "loss": 10.0199, + "step": 454160 + }, + { + "epoch": 0.9174521346008557, + "grad_norm": 300.888671875, + "learning_rate": 2.537501844757062e-07, + "loss": 11.7257, + "step": 454170 + }, + { + "epoch": 0.9174723352335395, + "grad_norm": 495.1927795410156, + "learning_rate": 2.5364040692017644e-07, + "loss": 17.0178, + "step": 454180 + }, + { + "epoch": 0.9174925358662234, + "grad_norm": 0.0, + "learning_rate": 2.5353065249782647e-07, + "loss": 18.3665, + "step": 454190 + }, + { + "epoch": 0.9175127364989072, + "grad_norm": 56.5859375, + "learning_rate": 2.534209212091937e-07, + "loss": 19.8699, + "step": 454200 + }, + { + "epoch": 0.917532937131591, + "grad_norm": 183.22332763671875, + "learning_rate": 2.5331121305481154e-07, + "loss": 13.1073, + "step": 454210 + }, + { + "epoch": 0.9175531377642748, + "grad_norm": 450.3115234375, + "learning_rate": 2.53201528035214e-07, + "loss": 17.4463, + "step": 454220 + }, + { + "epoch": 0.9175733383969585, + "grad_norm": 493.7592468261719, + "learning_rate": 2.530918661509368e-07, + "loss": 15.6409, + "step": 454230 + }, + { + "epoch": 0.9175935390296424, + "grad_norm": 193.6795654296875, + "learning_rate": 2.529822274025151e-07, + "loss": 34.6854, + "step": 454240 + }, + { + "epoch": 0.9176137396623262, + "grad_norm": 202.70217895507812, + "learning_rate": 2.5287261179048117e-07, + "loss": 13.2721, + "step": 454250 + }, + { + "epoch": 0.91763394029501, + "grad_norm": 41.909339904785156, + "learning_rate": 2.5276301931537015e-07, + "loss": 11.1418, + "step": 454260 + }, + { + "epoch": 0.9176541409276938, + "grad_norm": 281.51202392578125, + "learning_rate": 2.5265344997771726e-07, + "loss": 14.5339, + "step": 454270 + }, + { + "epoch": 0.9176743415603776, + "grad_norm": 454.9624938964844, + "learning_rate": 2.525439037780558e-07, + "loss": 18.4829, + "step": 454280 + }, + { + "epoch": 0.9176945421930615, + "grad_norm": 433.8101501464844, + "learning_rate": 2.5243438071691826e-07, + "loss": 10.5991, + "step": 454290 + }, + { + "epoch": 0.9177147428257453, + "grad_norm": 178.11285400390625, + "learning_rate": 2.523248807948403e-07, + "loss": 17.7645, + "step": 454300 + }, + { + "epoch": 0.9177349434584291, + "grad_norm": 75.87867736816406, + "learning_rate": 2.522154040123559e-07, + "loss": 30.4693, + "step": 454310 + }, + { + "epoch": 0.9177551440911129, + "grad_norm": 78.83842468261719, + "learning_rate": 2.521059503699974e-07, + "loss": 50.0412, + "step": 454320 + }, + { + "epoch": 0.9177753447237967, + "grad_norm": 419.480712890625, + "learning_rate": 2.5199651986829777e-07, + "loss": 11.1858, + "step": 454330 + }, + { + "epoch": 0.9177955453564806, + "grad_norm": 446.3019104003906, + "learning_rate": 2.518871125077926e-07, + "loss": 25.6513, + "step": 454340 + }, + { + "epoch": 0.9178157459891644, + "grad_norm": 466.9568786621094, + "learning_rate": 2.5177772828901327e-07, + "loss": 16.6025, + "step": 454350 + }, + { + "epoch": 0.9178359466218482, + "grad_norm": 231.08163452148438, + "learning_rate": 2.5166836721249254e-07, + "loss": 22.5752, + "step": 454360 + }, + { + "epoch": 0.917856147254532, + "grad_norm": 224.1905517578125, + "learning_rate": 2.515590292787656e-07, + "loss": 15.2264, + "step": 454370 + }, + { + "epoch": 0.9178763478872158, + "grad_norm": 112.1280746459961, + "learning_rate": 2.5144971448836263e-07, + "loss": 19.4447, + "step": 454380 + }, + { + "epoch": 0.9178965485198997, + "grad_norm": 330.6990966796875, + "learning_rate": 2.5134042284181927e-07, + "loss": 11.428, + "step": 454390 + }, + { + "epoch": 0.9179167491525835, + "grad_norm": 127.68366241455078, + "learning_rate": 2.5123115433966615e-07, + "loss": 14.6926, + "step": 454400 + }, + { + "epoch": 0.9179369497852673, + "grad_norm": 307.2967834472656, + "learning_rate": 2.5112190898243627e-07, + "loss": 24.1661, + "step": 454410 + }, + { + "epoch": 0.9179571504179511, + "grad_norm": 176.82020568847656, + "learning_rate": 2.5101268677066247e-07, + "loss": 16.3142, + "step": 454420 + }, + { + "epoch": 0.9179773510506349, + "grad_norm": 1019.8779907226562, + "learning_rate": 2.5090348770487604e-07, + "loss": 17.838, + "step": 454430 + }, + { + "epoch": 0.9179975516833188, + "grad_norm": 17.085533142089844, + "learning_rate": 2.50794311785611e-07, + "loss": 17.5736, + "step": 454440 + }, + { + "epoch": 0.9180177523160026, + "grad_norm": 397.52734375, + "learning_rate": 2.5068515901339794e-07, + "loss": 15.178, + "step": 454450 + }, + { + "epoch": 0.9180379529486864, + "grad_norm": 303.3193054199219, + "learning_rate": 2.505760293887699e-07, + "loss": 10.8081, + "step": 454460 + }, + { + "epoch": 0.9180581535813702, + "grad_norm": 173.09710693359375, + "learning_rate": 2.5046692291225803e-07, + "loss": 5.3988, + "step": 454470 + }, + { + "epoch": 0.918078354214054, + "grad_norm": 294.963623046875, + "learning_rate": 2.503578395843936e-07, + "loss": 14.7642, + "step": 454480 + }, + { + "epoch": 0.9180985548467377, + "grad_norm": 119.04206085205078, + "learning_rate": 2.502487794057101e-07, + "loss": 17.0179, + "step": 454490 + }, + { + "epoch": 0.9181187554794216, + "grad_norm": 343.16058349609375, + "learning_rate": 2.5013974237673824e-07, + "loss": 19.2829, + "step": 454500 + }, + { + "epoch": 0.9181389561121054, + "grad_norm": 0.0, + "learning_rate": 2.50030728498008e-07, + "loss": 18.3615, + "step": 454510 + }, + { + "epoch": 0.9181591567447892, + "grad_norm": 197.31698608398438, + "learning_rate": 2.499217377700519e-07, + "loss": 14.0502, + "step": 454520 + }, + { + "epoch": 0.918179357377473, + "grad_norm": 261.01910400390625, + "learning_rate": 2.498127701934022e-07, + "loss": 12.4974, + "step": 454530 + }, + { + "epoch": 0.9181995580101568, + "grad_norm": 106.8160171508789, + "learning_rate": 2.49703825768588e-07, + "loss": 15.5191, + "step": 454540 + }, + { + "epoch": 0.9182197586428407, + "grad_norm": 354.593017578125, + "learning_rate": 2.49594904496141e-07, + "loss": 22.9946, + "step": 454550 + }, + { + "epoch": 0.9182399592755245, + "grad_norm": 262.3971252441406, + "learning_rate": 2.494860063765936e-07, + "loss": 30.4192, + "step": 454560 + }, + { + "epoch": 0.9182601599082083, + "grad_norm": 167.65670776367188, + "learning_rate": 2.493771314104743e-07, + "loss": 9.0718, + "step": 454570 + }, + { + "epoch": 0.9182803605408921, + "grad_norm": 720.4822998046875, + "learning_rate": 2.492682795983148e-07, + "loss": 27.2784, + "step": 454580 + }, + { + "epoch": 0.918300561173576, + "grad_norm": 269.57147216796875, + "learning_rate": 2.4915945094064476e-07, + "loss": 20.2397, + "step": 454590 + }, + { + "epoch": 0.9183207618062598, + "grad_norm": 455.30487060546875, + "learning_rate": 2.4905064543799706e-07, + "loss": 35.0193, + "step": 454600 + }, + { + "epoch": 0.9183409624389436, + "grad_norm": 482.8698425292969, + "learning_rate": 2.4894186309089906e-07, + "loss": 30.1231, + "step": 454610 + }, + { + "epoch": 0.9183611630716274, + "grad_norm": 209.74327087402344, + "learning_rate": 2.48833103899882e-07, + "loss": 8.6898, + "step": 454620 + }, + { + "epoch": 0.9183813637043112, + "grad_norm": 127.46502685546875, + "learning_rate": 2.487243678654772e-07, + "loss": 23.6524, + "step": 454630 + }, + { + "epoch": 0.918401564336995, + "grad_norm": 287.8888854980469, + "learning_rate": 2.486156549882135e-07, + "loss": 24.7229, + "step": 454640 + }, + { + "epoch": 0.9184217649696789, + "grad_norm": 433.9644775390625, + "learning_rate": 2.485069652686195e-07, + "loss": 18.8414, + "step": 454650 + }, + { + "epoch": 0.9184419656023627, + "grad_norm": 0.0, + "learning_rate": 2.4839829870722756e-07, + "loss": 10.8807, + "step": 454660 + }, + { + "epoch": 0.9184621662350465, + "grad_norm": 46.72091293334961, + "learning_rate": 2.482896553045661e-07, + "loss": 14.1955, + "step": 454670 + }, + { + "epoch": 0.9184823668677303, + "grad_norm": 419.1227111816406, + "learning_rate": 2.4818103506116355e-07, + "loss": 19.7585, + "step": 454680 + }, + { + "epoch": 0.9185025675004141, + "grad_norm": 182.9734649658203, + "learning_rate": 2.4807243797755064e-07, + "loss": 17.2594, + "step": 454690 + }, + { + "epoch": 0.918522768133098, + "grad_norm": 303.3350524902344, + "learning_rate": 2.479638640542564e-07, + "loss": 19.9566, + "step": 454700 + }, + { + "epoch": 0.9185429687657818, + "grad_norm": 259.21917724609375, + "learning_rate": 2.478553132918099e-07, + "loss": 16.9143, + "step": 454710 + }, + { + "epoch": 0.9185631693984656, + "grad_norm": 220.04345703125, + "learning_rate": 2.477467856907401e-07, + "loss": 21.5817, + "step": 454720 + }, + { + "epoch": 0.9185833700311494, + "grad_norm": 250.58963012695312, + "learning_rate": 2.4763828125157654e-07, + "loss": 17.703, + "step": 454730 + }, + { + "epoch": 0.9186035706638332, + "grad_norm": 231.0832061767578, + "learning_rate": 2.4752979997484774e-07, + "loss": 25.8946, + "step": 454740 + }, + { + "epoch": 0.918623771296517, + "grad_norm": 191.92022705078125, + "learning_rate": 2.474213418610816e-07, + "loss": 10.4869, + "step": 454750 + }, + { + "epoch": 0.9186439719292008, + "grad_norm": 118.54442596435547, + "learning_rate": 2.4731290691080766e-07, + "loss": 15.8718, + "step": 454760 + }, + { + "epoch": 0.9186641725618846, + "grad_norm": 472.7878112792969, + "learning_rate": 2.472044951245539e-07, + "loss": 13.8702, + "step": 454770 + }, + { + "epoch": 0.9186843731945684, + "grad_norm": 41.55654525756836, + "learning_rate": 2.470961065028499e-07, + "loss": 7.9889, + "step": 454780 + }, + { + "epoch": 0.9187045738272522, + "grad_norm": 57.816062927246094, + "learning_rate": 2.4698774104622235e-07, + "loss": 17.0623, + "step": 454790 + }, + { + "epoch": 0.918724774459936, + "grad_norm": 249.79714965820312, + "learning_rate": 2.4687939875519984e-07, + "loss": 12.9687, + "step": 454800 + }, + { + "epoch": 0.9187449750926199, + "grad_norm": 1019.9679565429688, + "learning_rate": 2.4677107963031134e-07, + "loss": 23.4064, + "step": 454810 + }, + { + "epoch": 0.9187651757253037, + "grad_norm": 15.089723587036133, + "learning_rate": 2.4666278367208417e-07, + "loss": 8.9919, + "step": 454820 + }, + { + "epoch": 0.9187853763579875, + "grad_norm": 226.8956298828125, + "learning_rate": 2.465545108810452e-07, + "loss": 13.0729, + "step": 454830 + }, + { + "epoch": 0.9188055769906713, + "grad_norm": 19.46324920654297, + "learning_rate": 2.464462612577234e-07, + "loss": 18.5806, + "step": 454840 + }, + { + "epoch": 0.9188257776233552, + "grad_norm": 318.8092041015625, + "learning_rate": 2.463380348026467e-07, + "loss": 19.0252, + "step": 454850 + }, + { + "epoch": 0.918845978256039, + "grad_norm": 317.57550048828125, + "learning_rate": 2.4622983151634083e-07, + "loss": 19.1349, + "step": 454860 + }, + { + "epoch": 0.9188661788887228, + "grad_norm": 118.0604019165039, + "learning_rate": 2.461216513993342e-07, + "loss": 7.3784, + "step": 454870 + }, + { + "epoch": 0.9188863795214066, + "grad_norm": 300.44110107421875, + "learning_rate": 2.460134944521547e-07, + "loss": 14.0023, + "step": 454880 + }, + { + "epoch": 0.9189065801540904, + "grad_norm": 59.587764739990234, + "learning_rate": 2.459053606753292e-07, + "loss": 17.6721, + "step": 454890 + }, + { + "epoch": 0.9189267807867743, + "grad_norm": 458.0155944824219, + "learning_rate": 2.457972500693834e-07, + "loss": 27.4783, + "step": 454900 + }, + { + "epoch": 0.9189469814194581, + "grad_norm": 169.9490203857422, + "learning_rate": 2.456891626348451e-07, + "loss": 11.7725, + "step": 454910 + }, + { + "epoch": 0.9189671820521419, + "grad_norm": 269.10235595703125, + "learning_rate": 2.455810983722429e-07, + "loss": 20.9167, + "step": 454920 + }, + { + "epoch": 0.9189873826848257, + "grad_norm": 309.55657958984375, + "learning_rate": 2.4547305728210015e-07, + "loss": 19.0819, + "step": 454930 + }, + { + "epoch": 0.9190075833175095, + "grad_norm": 28.484663009643555, + "learning_rate": 2.453650393649448e-07, + "loss": 17.6875, + "step": 454940 + }, + { + "epoch": 0.9190277839501934, + "grad_norm": 145.0044708251953, + "learning_rate": 2.4525704462130485e-07, + "loss": 10.4286, + "step": 454950 + }, + { + "epoch": 0.9190479845828772, + "grad_norm": 0.0, + "learning_rate": 2.4514907305170476e-07, + "loss": 11.6454, + "step": 454960 + }, + { + "epoch": 0.919068185215561, + "grad_norm": 241.74693298339844, + "learning_rate": 2.4504112465667085e-07, + "loss": 16.4838, + "step": 454970 + }, + { + "epoch": 0.9190883858482448, + "grad_norm": 88.93653869628906, + "learning_rate": 2.44933199436731e-07, + "loss": 11.3602, + "step": 454980 + }, + { + "epoch": 0.9191085864809286, + "grad_norm": 89.9294662475586, + "learning_rate": 2.448252973924087e-07, + "loss": 17.4705, + "step": 454990 + }, + { + "epoch": 0.9191287871136123, + "grad_norm": 78.2550048828125, + "learning_rate": 2.447174185242324e-07, + "loss": 15.4892, + "step": 455000 + }, + { + "epoch": 0.9191489877462962, + "grad_norm": 72.71954345703125, + "learning_rate": 2.446095628327261e-07, + "loss": 21.7942, + "step": 455010 + }, + { + "epoch": 0.91916918837898, + "grad_norm": 283.3192138671875, + "learning_rate": 2.4450173031841607e-07, + "loss": 19.0136, + "step": 455020 + }, + { + "epoch": 0.9191893890116638, + "grad_norm": 182.2919464111328, + "learning_rate": 2.4439392098182804e-07, + "loss": 19.1115, + "step": 455030 + }, + { + "epoch": 0.9192095896443476, + "grad_norm": 6.921985626220703, + "learning_rate": 2.442861348234865e-07, + "loss": 15.7835, + "step": 455040 + }, + { + "epoch": 0.9192297902770314, + "grad_norm": 232.60023498535156, + "learning_rate": 2.4417837184391833e-07, + "loss": 18.8307, + "step": 455050 + }, + { + "epoch": 0.9192499909097153, + "grad_norm": 146.5553741455078, + "learning_rate": 2.4407063204364703e-07, + "loss": 15.4173, + "step": 455060 + }, + { + "epoch": 0.9192701915423991, + "grad_norm": 87.2750244140625, + "learning_rate": 2.4396291542319985e-07, + "loss": 9.5828, + "step": 455070 + }, + { + "epoch": 0.9192903921750829, + "grad_norm": 324.0497741699219, + "learning_rate": 2.438552219831003e-07, + "loss": 19.6949, + "step": 455080 + }, + { + "epoch": 0.9193105928077667, + "grad_norm": 119.35296630859375, + "learning_rate": 2.43747551723873e-07, + "loss": 14.4704, + "step": 455090 + }, + { + "epoch": 0.9193307934404505, + "grad_norm": 38.295467376708984, + "learning_rate": 2.4363990464604357e-07, + "loss": 14.0668, + "step": 455100 + }, + { + "epoch": 0.9193509940731344, + "grad_norm": 327.8507385253906, + "learning_rate": 2.435322807501367e-07, + "loss": 12.0949, + "step": 455110 + }, + { + "epoch": 0.9193711947058182, + "grad_norm": 181.30738830566406, + "learning_rate": 2.4342468003667576e-07, + "loss": 11.6783, + "step": 455120 + }, + { + "epoch": 0.919391395338502, + "grad_norm": 172.4878692626953, + "learning_rate": 2.4331710250618647e-07, + "loss": 14.4112, + "step": 455130 + }, + { + "epoch": 0.9194115959711858, + "grad_norm": 207.41802978515625, + "learning_rate": 2.432095481591934e-07, + "loss": 15.1611, + "step": 455140 + }, + { + "epoch": 0.9194317966038696, + "grad_norm": 105.40169525146484, + "learning_rate": 2.4310201699621896e-07, + "loss": 7.7742, + "step": 455150 + }, + { + "epoch": 0.9194519972365535, + "grad_norm": 671.6128540039062, + "learning_rate": 2.429945090177888e-07, + "loss": 15.4885, + "step": 455160 + }, + { + "epoch": 0.9194721978692373, + "grad_norm": 474.05694580078125, + "learning_rate": 2.4288702422442633e-07, + "loss": 20.5155, + "step": 455170 + }, + { + "epoch": 0.9194923985019211, + "grad_norm": 305.14703369140625, + "learning_rate": 2.4277956261665624e-07, + "loss": 23.5529, + "step": 455180 + }, + { + "epoch": 0.9195125991346049, + "grad_norm": 270.3517761230469, + "learning_rate": 2.426721241950003e-07, + "loss": 19.0494, + "step": 455190 + }, + { + "epoch": 0.9195327997672887, + "grad_norm": 52.737709045410156, + "learning_rate": 2.4256470895998363e-07, + "loss": 10.0723, + "step": 455200 + }, + { + "epoch": 0.9195530003999726, + "grad_norm": 494.6665344238281, + "learning_rate": 2.4245731691213137e-07, + "loss": 19.9762, + "step": 455210 + }, + { + "epoch": 0.9195732010326564, + "grad_norm": 191.9027557373047, + "learning_rate": 2.423499480519631e-07, + "loss": 13.1072, + "step": 455220 + }, + { + "epoch": 0.9195934016653402, + "grad_norm": 450.24920654296875, + "learning_rate": 2.4224260238000454e-07, + "loss": 15.7209, + "step": 455230 + }, + { + "epoch": 0.919613602298024, + "grad_norm": 457.0672302246094, + "learning_rate": 2.421352798967791e-07, + "loss": 17.7888, + "step": 455240 + }, + { + "epoch": 0.9196338029307078, + "grad_norm": 25.905193328857422, + "learning_rate": 2.420279806028092e-07, + "loss": 15.7499, + "step": 455250 + }, + { + "epoch": 0.9196540035633916, + "grad_norm": 327.5270690917969, + "learning_rate": 2.4192070449861717e-07, + "loss": 19.2746, + "step": 455260 + }, + { + "epoch": 0.9196742041960754, + "grad_norm": 322.00762939453125, + "learning_rate": 2.418134515847276e-07, + "loss": 14.7609, + "step": 455270 + }, + { + "epoch": 0.9196944048287592, + "grad_norm": 283.40240478515625, + "learning_rate": 2.417062218616617e-07, + "loss": 18.1085, + "step": 455280 + }, + { + "epoch": 0.919714605461443, + "grad_norm": 108.97318267822266, + "learning_rate": 2.415990153299419e-07, + "loss": 9.2425, + "step": 455290 + }, + { + "epoch": 0.9197348060941268, + "grad_norm": 206.3502960205078, + "learning_rate": 2.414918319900922e-07, + "loss": 14.3758, + "step": 455300 + }, + { + "epoch": 0.9197550067268107, + "grad_norm": 139.03282165527344, + "learning_rate": 2.413846718426338e-07, + "loss": 7.5249, + "step": 455310 + }, + { + "epoch": 0.9197752073594945, + "grad_norm": 43.54553985595703, + "learning_rate": 2.412775348880897e-07, + "loss": 11.8814, + "step": 455320 + }, + { + "epoch": 0.9197954079921783, + "grad_norm": 180.7390899658203, + "learning_rate": 2.4117042112698107e-07, + "loss": 22.3416, + "step": 455330 + }, + { + "epoch": 0.9198156086248621, + "grad_norm": 370.84210205078125, + "learning_rate": 2.410633305598309e-07, + "loss": 10.3919, + "step": 455340 + }, + { + "epoch": 0.9198358092575459, + "grad_norm": 460.71710205078125, + "learning_rate": 2.4095626318716146e-07, + "loss": 17.2197, + "step": 455350 + }, + { + "epoch": 0.9198560098902298, + "grad_norm": 40.18421936035156, + "learning_rate": 2.40849219009493e-07, + "loss": 15.7161, + "step": 455360 + }, + { + "epoch": 0.9198762105229136, + "grad_norm": 352.50836181640625, + "learning_rate": 2.407421980273489e-07, + "loss": 14.1091, + "step": 455370 + }, + { + "epoch": 0.9198964111555974, + "grad_norm": 205.72628784179688, + "learning_rate": 2.406352002412499e-07, + "loss": 7.1285, + "step": 455380 + }, + { + "epoch": 0.9199166117882812, + "grad_norm": 200.0044403076172, + "learning_rate": 2.4052822565171775e-07, + "loss": 36.7614, + "step": 455390 + }, + { + "epoch": 0.919936812420965, + "grad_norm": 207.90643310546875, + "learning_rate": 2.404212742592743e-07, + "loss": 12.0063, + "step": 455400 + }, + { + "epoch": 0.9199570130536489, + "grad_norm": 166.22991943359375, + "learning_rate": 2.4031434606443914e-07, + "loss": 17.9532, + "step": 455410 + }, + { + "epoch": 0.9199772136863327, + "grad_norm": 689.3635864257812, + "learning_rate": 2.4020744106773573e-07, + "loss": 16.9967, + "step": 455420 + }, + { + "epoch": 0.9199974143190165, + "grad_norm": 146.4396514892578, + "learning_rate": 2.401005592696837e-07, + "loss": 5.9636, + "step": 455430 + }, + { + "epoch": 0.9200176149517003, + "grad_norm": 326.1533508300781, + "learning_rate": 2.399937006708036e-07, + "loss": 14.2867, + "step": 455440 + }, + { + "epoch": 0.9200378155843841, + "grad_norm": 137.99993896484375, + "learning_rate": 2.3988686527161686e-07, + "loss": 9.7865, + "step": 455450 + }, + { + "epoch": 0.920058016217068, + "grad_norm": 453.38641357421875, + "learning_rate": 2.3978005307264517e-07, + "loss": 14.568, + "step": 455460 + }, + { + "epoch": 0.9200782168497518, + "grad_norm": 268.1697692871094, + "learning_rate": 2.396732640744076e-07, + "loss": 15.3073, + "step": 455470 + }, + { + "epoch": 0.9200984174824356, + "grad_norm": 202.7796173095703, + "learning_rate": 2.395664982774243e-07, + "loss": 10.6338, + "step": 455480 + }, + { + "epoch": 0.9201186181151194, + "grad_norm": 256.9638977050781, + "learning_rate": 2.3945975568221814e-07, + "loss": 13.3608, + "step": 455490 + }, + { + "epoch": 0.9201388187478032, + "grad_norm": 298.02587890625, + "learning_rate": 2.3935303628930705e-07, + "loss": 19.9997, + "step": 455500 + }, + { + "epoch": 0.9201590193804869, + "grad_norm": 51.55033874511719, + "learning_rate": 2.392463400992112e-07, + "loss": 26.3338, + "step": 455510 + }, + { + "epoch": 0.9201792200131708, + "grad_norm": 608.9950561523438, + "learning_rate": 2.3913966711245185e-07, + "loss": 17.7827, + "step": 455520 + }, + { + "epoch": 0.9201994206458546, + "grad_norm": 417.44873046875, + "learning_rate": 2.390330173295491e-07, + "loss": 18.4131, + "step": 455530 + }, + { + "epoch": 0.9202196212785384, + "grad_norm": 193.98619079589844, + "learning_rate": 2.389263907510209e-07, + "loss": 13.5151, + "step": 455540 + }, + { + "epoch": 0.9202398219112222, + "grad_norm": 455.22021484375, + "learning_rate": 2.388197873773879e-07, + "loss": 20.0675, + "step": 455550 + }, + { + "epoch": 0.920260022543906, + "grad_norm": 185.51930236816406, + "learning_rate": 2.387132072091708e-07, + "loss": 13.7994, + "step": 455560 + }, + { + "epoch": 0.9202802231765899, + "grad_norm": 472.70367431640625, + "learning_rate": 2.3860665024688757e-07, + "loss": 13.6387, + "step": 455570 + }, + { + "epoch": 0.9203004238092737, + "grad_norm": 191.1717987060547, + "learning_rate": 2.3850011649105774e-07, + "loss": 16.7665, + "step": 455580 + }, + { + "epoch": 0.9203206244419575, + "grad_norm": 397.5945739746094, + "learning_rate": 2.3839360594220094e-07, + "loss": 9.1233, + "step": 455590 + }, + { + "epoch": 0.9203408250746413, + "grad_norm": 342.2540588378906, + "learning_rate": 2.3828711860083676e-07, + "loss": 12.2973, + "step": 455600 + }, + { + "epoch": 0.9203610257073251, + "grad_norm": 223.0743408203125, + "learning_rate": 2.3818065446748306e-07, + "loss": 14.3779, + "step": 455610 + }, + { + "epoch": 0.920381226340009, + "grad_norm": 454.0294189453125, + "learning_rate": 2.380742135426589e-07, + "loss": 27.183, + "step": 455620 + }, + { + "epoch": 0.9204014269726928, + "grad_norm": 218.7557830810547, + "learning_rate": 2.3796779582688444e-07, + "loss": 17.7765, + "step": 455630 + }, + { + "epoch": 0.9204216276053766, + "grad_norm": 418.7548522949219, + "learning_rate": 2.3786140132067703e-07, + "loss": 14.2868, + "step": 455640 + }, + { + "epoch": 0.9204418282380604, + "grad_norm": 293.7866516113281, + "learning_rate": 2.3775503002455514e-07, + "loss": 15.8268, + "step": 455650 + }, + { + "epoch": 0.9204620288707442, + "grad_norm": 301.7937316894531, + "learning_rate": 2.3764868193903835e-07, + "loss": 33.7642, + "step": 455660 + }, + { + "epoch": 0.9204822295034281, + "grad_norm": 463.62994384765625, + "learning_rate": 2.3754235706464457e-07, + "loss": 14.4391, + "step": 455670 + }, + { + "epoch": 0.9205024301361119, + "grad_norm": 430.42596435546875, + "learning_rate": 2.3743605540189063e-07, + "loss": 23.2213, + "step": 455680 + }, + { + "epoch": 0.9205226307687957, + "grad_norm": 209.38778686523438, + "learning_rate": 2.3732977695129612e-07, + "loss": 16.8263, + "step": 455690 + }, + { + "epoch": 0.9205428314014795, + "grad_norm": 154.90745544433594, + "learning_rate": 2.3722352171337836e-07, + "loss": 19.6068, + "step": 455700 + }, + { + "epoch": 0.9205630320341633, + "grad_norm": 468.6112060546875, + "learning_rate": 2.3711728968865643e-07, + "loss": 20.4635, + "step": 455710 + }, + { + "epoch": 0.9205832326668472, + "grad_norm": 424.70806884765625, + "learning_rate": 2.3701108087764657e-07, + "loss": 10.8851, + "step": 455720 + }, + { + "epoch": 0.920603433299531, + "grad_norm": 267.2959289550781, + "learning_rate": 2.3690489528086668e-07, + "loss": 9.1479, + "step": 455730 + }, + { + "epoch": 0.9206236339322148, + "grad_norm": 345.7020263671875, + "learning_rate": 2.367987328988347e-07, + "loss": 9.4651, + "step": 455740 + }, + { + "epoch": 0.9206438345648986, + "grad_norm": 311.5840759277344, + "learning_rate": 2.366925937320691e-07, + "loss": 21.4974, + "step": 455750 + }, + { + "epoch": 0.9206640351975824, + "grad_norm": 279.4144592285156, + "learning_rate": 2.36586477781085e-07, + "loss": 23.7127, + "step": 455760 + }, + { + "epoch": 0.9206842358302662, + "grad_norm": 575.6854248046875, + "learning_rate": 2.3648038504640036e-07, + "loss": 17.166, + "step": 455770 + }, + { + "epoch": 0.92070443646295, + "grad_norm": 601.4854125976562, + "learning_rate": 2.3637431552853363e-07, + "loss": 16.4343, + "step": 455780 + }, + { + "epoch": 0.9207246370956338, + "grad_norm": 610.1043701171875, + "learning_rate": 2.362682692280005e-07, + "loss": 27.5912, + "step": 455790 + }, + { + "epoch": 0.9207448377283176, + "grad_norm": 372.38665771484375, + "learning_rate": 2.361622461453178e-07, + "loss": 20.647, + "step": 455800 + }, + { + "epoch": 0.9207650383610014, + "grad_norm": 182.55914306640625, + "learning_rate": 2.3605624628100178e-07, + "loss": 15.2259, + "step": 455810 + }, + { + "epoch": 0.9207852389936853, + "grad_norm": 343.69329833984375, + "learning_rate": 2.3595026963557145e-07, + "loss": 17.3321, + "step": 455820 + }, + { + "epoch": 0.9208054396263691, + "grad_norm": 263.2419738769531, + "learning_rate": 2.3584431620954085e-07, + "loss": 17.0018, + "step": 455830 + }, + { + "epoch": 0.9208256402590529, + "grad_norm": 129.1766357421875, + "learning_rate": 2.357383860034268e-07, + "loss": 16.7368, + "step": 455840 + }, + { + "epoch": 0.9208458408917367, + "grad_norm": 298.4427795410156, + "learning_rate": 2.3563247901774666e-07, + "loss": 11.9806, + "step": 455850 + }, + { + "epoch": 0.9208660415244205, + "grad_norm": 232.2042999267578, + "learning_rate": 2.3552659525301557e-07, + "loss": 12.4643, + "step": 455860 + }, + { + "epoch": 0.9208862421571044, + "grad_norm": 304.7831115722656, + "learning_rate": 2.354207347097498e-07, + "loss": 20.6315, + "step": 455870 + }, + { + "epoch": 0.9209064427897882, + "grad_norm": 396.6595153808594, + "learning_rate": 2.3531489738846613e-07, + "loss": 29.7388, + "step": 455880 + }, + { + "epoch": 0.920926643422472, + "grad_norm": 237.63577270507812, + "learning_rate": 2.3520908328968027e-07, + "loss": 23.1591, + "step": 455890 + }, + { + "epoch": 0.9209468440551558, + "grad_norm": 525.749755859375, + "learning_rate": 2.351032924139063e-07, + "loss": 20.3525, + "step": 455900 + }, + { + "epoch": 0.9209670446878396, + "grad_norm": 22.981351852416992, + "learning_rate": 2.349975247616615e-07, + "loss": 22.067, + "step": 455910 + }, + { + "epoch": 0.9209872453205235, + "grad_norm": 10.424921035766602, + "learning_rate": 2.3489178033345994e-07, + "loss": 14.1302, + "step": 455920 + }, + { + "epoch": 0.9210074459532073, + "grad_norm": 384.0202941894531, + "learning_rate": 2.34786059129819e-07, + "loss": 16.9353, + "step": 455930 + }, + { + "epoch": 0.9210276465858911, + "grad_norm": 303.44439697265625, + "learning_rate": 2.3468036115125215e-07, + "loss": 14.637, + "step": 455940 + }, + { + "epoch": 0.9210478472185749, + "grad_norm": 376.6944580078125, + "learning_rate": 2.3457468639827563e-07, + "loss": 22.1572, + "step": 455950 + }, + { + "epoch": 0.9210680478512587, + "grad_norm": 133.5134735107422, + "learning_rate": 2.344690348714046e-07, + "loss": 13.4534, + "step": 455960 + }, + { + "epoch": 0.9210882484839426, + "grad_norm": 264.57220458984375, + "learning_rate": 2.3436340657115253e-07, + "loss": 21.2485, + "step": 455970 + }, + { + "epoch": 0.9211084491166264, + "grad_norm": 271.7082824707031, + "learning_rate": 2.3425780149803623e-07, + "loss": 7.5798, + "step": 455980 + }, + { + "epoch": 0.9211286497493102, + "grad_norm": 312.15020751953125, + "learning_rate": 2.3415221965256807e-07, + "loss": 25.5096, + "step": 455990 + }, + { + "epoch": 0.921148850381994, + "grad_norm": 6.742545127868652, + "learning_rate": 2.3404666103526542e-07, + "loss": 19.401, + "step": 456000 + }, + { + "epoch": 0.9211690510146778, + "grad_norm": 75.15931701660156, + "learning_rate": 2.3394112564664062e-07, + "loss": 10.5847, + "step": 456010 + }, + { + "epoch": 0.9211892516473615, + "grad_norm": 522.3594970703125, + "learning_rate": 2.338356134872083e-07, + "loss": 19.4345, + "step": 456020 + }, + { + "epoch": 0.9212094522800454, + "grad_norm": 394.4520263671875, + "learning_rate": 2.3373012455748356e-07, + "loss": 22.1055, + "step": 456030 + }, + { + "epoch": 0.9212296529127292, + "grad_norm": 343.9664611816406, + "learning_rate": 2.3362465885798046e-07, + "loss": 19.8991, + "step": 456040 + }, + { + "epoch": 0.921249853545413, + "grad_norm": 0.0, + "learning_rate": 2.3351921638921193e-07, + "loss": 17.1099, + "step": 456050 + }, + { + "epoch": 0.9212700541780968, + "grad_norm": 111.21340942382812, + "learning_rate": 2.3341379715169254e-07, + "loss": 9.5939, + "step": 456060 + }, + { + "epoch": 0.9212902548107806, + "grad_norm": 159.31533813476562, + "learning_rate": 2.33308401145938e-07, + "loss": 6.9512, + "step": 456070 + }, + { + "epoch": 0.9213104554434645, + "grad_norm": 552.3495483398438, + "learning_rate": 2.3320302837245846e-07, + "loss": 19.0945, + "step": 456080 + }, + { + "epoch": 0.9213306560761483, + "grad_norm": 248.4833984375, + "learning_rate": 2.3309767883176903e-07, + "loss": 13.281, + "step": 456090 + }, + { + "epoch": 0.9213508567088321, + "grad_norm": 290.37841796875, + "learning_rate": 2.3299235252438434e-07, + "loss": 33.2221, + "step": 456100 + }, + { + "epoch": 0.9213710573415159, + "grad_norm": 402.7010803222656, + "learning_rate": 2.3288704945081675e-07, + "loss": 17.8335, + "step": 456110 + }, + { + "epoch": 0.9213912579741997, + "grad_norm": 406.3812561035156, + "learning_rate": 2.327817696115786e-07, + "loss": 22.4548, + "step": 456120 + }, + { + "epoch": 0.9214114586068836, + "grad_norm": 246.31358337402344, + "learning_rate": 2.3267651300718397e-07, + "loss": 12.0949, + "step": 456130 + }, + { + "epoch": 0.9214316592395674, + "grad_norm": 322.2605285644531, + "learning_rate": 2.325712796381474e-07, + "loss": 9.4732, + "step": 456140 + }, + { + "epoch": 0.9214518598722512, + "grad_norm": 855.1376953125, + "learning_rate": 2.3246606950497851e-07, + "loss": 16.9807, + "step": 456150 + }, + { + "epoch": 0.921472060504935, + "grad_norm": 379.2497253417969, + "learning_rate": 2.3236088260819188e-07, + "loss": 17.8963, + "step": 456160 + }, + { + "epoch": 0.9214922611376188, + "grad_norm": 177.31777954101562, + "learning_rate": 2.3225571894830047e-07, + "loss": 31.2069, + "step": 456170 + }, + { + "epoch": 0.9215124617703027, + "grad_norm": 412.48968505859375, + "learning_rate": 2.3215057852581712e-07, + "loss": 11.0542, + "step": 456180 + }, + { + "epoch": 0.9215326624029865, + "grad_norm": 343.11834716796875, + "learning_rate": 2.3204546134125207e-07, + "loss": 14.3232, + "step": 456190 + }, + { + "epoch": 0.9215528630356703, + "grad_norm": 515.6344604492188, + "learning_rate": 2.319403673951204e-07, + "loss": 31.6484, + "step": 456200 + }, + { + "epoch": 0.9215730636683541, + "grad_norm": 183.17941284179688, + "learning_rate": 2.3183529668793282e-07, + "loss": 15.7709, + "step": 456210 + }, + { + "epoch": 0.9215932643010379, + "grad_norm": 585.870849609375, + "learning_rate": 2.3173024922020114e-07, + "loss": 28.472, + "step": 456220 + }, + { + "epoch": 0.9216134649337218, + "grad_norm": 326.94207763671875, + "learning_rate": 2.3162522499243833e-07, + "loss": 17.6644, + "step": 456230 + }, + { + "epoch": 0.9216336655664056, + "grad_norm": 14.029626846313477, + "learning_rate": 2.3152022400515561e-07, + "loss": 10.2781, + "step": 456240 + }, + { + "epoch": 0.9216538661990894, + "grad_norm": 214.8909454345703, + "learning_rate": 2.314152462588659e-07, + "loss": 14.2531, + "step": 456250 + }, + { + "epoch": 0.9216740668317732, + "grad_norm": 210.4156951904297, + "learning_rate": 2.3131029175407883e-07, + "loss": 16.5687, + "step": 456260 + }, + { + "epoch": 0.921694267464457, + "grad_norm": 186.1177215576172, + "learning_rate": 2.3120536049130727e-07, + "loss": 14.2814, + "step": 456270 + }, + { + "epoch": 0.9217144680971407, + "grad_norm": 497.6822509765625, + "learning_rate": 2.3110045247106305e-07, + "loss": 17.227, + "step": 456280 + }, + { + "epoch": 0.9217346687298246, + "grad_norm": 189.3236083984375, + "learning_rate": 2.3099556769385578e-07, + "loss": 19.3881, + "step": 456290 + }, + { + "epoch": 0.9217548693625084, + "grad_norm": 373.1511535644531, + "learning_rate": 2.3089070616019838e-07, + "loss": 24.6633, + "step": 456300 + }, + { + "epoch": 0.9217750699951922, + "grad_norm": 139.13961791992188, + "learning_rate": 2.3078586787060098e-07, + "loss": 18.6484, + "step": 456310 + }, + { + "epoch": 0.921795270627876, + "grad_norm": 191.7769775390625, + "learning_rate": 2.306810528255754e-07, + "loss": 20.177, + "step": 456320 + }, + { + "epoch": 0.9218154712605598, + "grad_norm": 216.1976776123047, + "learning_rate": 2.3057626102563125e-07, + "loss": 18.3794, + "step": 456330 + }, + { + "epoch": 0.9218356718932437, + "grad_norm": 235.76002502441406, + "learning_rate": 2.3047149247127975e-07, + "loss": 18.2839, + "step": 456340 + }, + { + "epoch": 0.9218558725259275, + "grad_norm": 203.26577758789062, + "learning_rate": 2.3036674716303277e-07, + "loss": 10.951, + "step": 456350 + }, + { + "epoch": 0.9218760731586113, + "grad_norm": 175.94342041015625, + "learning_rate": 2.3026202510139928e-07, + "loss": 9.0335, + "step": 456360 + }, + { + "epoch": 0.9218962737912951, + "grad_norm": 116.4832992553711, + "learning_rate": 2.3015732628688948e-07, + "loss": 19.4607, + "step": 456370 + }, + { + "epoch": 0.921916474423979, + "grad_norm": 109.354248046875, + "learning_rate": 2.300526507200146e-07, + "loss": 21.0007, + "step": 456380 + }, + { + "epoch": 0.9219366750566628, + "grad_norm": 153.26980590820312, + "learning_rate": 2.2994799840128533e-07, + "loss": 12.0502, + "step": 456390 + }, + { + "epoch": 0.9219568756893466, + "grad_norm": 277.7088623046875, + "learning_rate": 2.2984336933121076e-07, + "loss": 19.344, + "step": 456400 + }, + { + "epoch": 0.9219770763220304, + "grad_norm": 222.34278869628906, + "learning_rate": 2.2973876351030046e-07, + "loss": 19.7468, + "step": 456410 + }, + { + "epoch": 0.9219972769547142, + "grad_norm": 117.16346740722656, + "learning_rate": 2.2963418093906453e-07, + "loss": 11.0947, + "step": 456420 + }, + { + "epoch": 0.922017477587398, + "grad_norm": 273.9718322753906, + "learning_rate": 2.2952962161801485e-07, + "loss": 20.6282, + "step": 456430 + }, + { + "epoch": 0.9220376782200819, + "grad_norm": 423.72808837890625, + "learning_rate": 2.2942508554765764e-07, + "loss": 25.9765, + "step": 456440 + }, + { + "epoch": 0.9220578788527657, + "grad_norm": 286.5534973144531, + "learning_rate": 2.2932057272850416e-07, + "loss": 20.3886, + "step": 456450 + }, + { + "epoch": 0.9220780794854495, + "grad_norm": 265.9811706542969, + "learning_rate": 2.2921608316106402e-07, + "loss": 13.5875, + "step": 456460 + }, + { + "epoch": 0.9220982801181333, + "grad_norm": 275.2646789550781, + "learning_rate": 2.2911161684584626e-07, + "loss": 15.3887, + "step": 456470 + }, + { + "epoch": 0.9221184807508171, + "grad_norm": 626.2711791992188, + "learning_rate": 2.290071737833588e-07, + "loss": 12.1256, + "step": 456480 + }, + { + "epoch": 0.922138681383501, + "grad_norm": 791.0081787109375, + "learning_rate": 2.2890275397411288e-07, + "loss": 26.3437, + "step": 456490 + }, + { + "epoch": 0.9221588820161848, + "grad_norm": 203.3986358642578, + "learning_rate": 2.287983574186159e-07, + "loss": 17.2147, + "step": 456500 + }, + { + "epoch": 0.9221790826488686, + "grad_norm": 125.64102935791016, + "learning_rate": 2.2869398411737687e-07, + "loss": 9.2064, + "step": 456510 + }, + { + "epoch": 0.9221992832815524, + "grad_norm": 374.40863037109375, + "learning_rate": 2.2858963407090484e-07, + "loss": 11.6159, + "step": 456520 + }, + { + "epoch": 0.9222194839142362, + "grad_norm": 126.5657958984375, + "learning_rate": 2.2848530727970775e-07, + "loss": 14.4298, + "step": 456530 + }, + { + "epoch": 0.92223968454692, + "grad_norm": 201.17803955078125, + "learning_rate": 2.2838100374429518e-07, + "loss": 25.9431, + "step": 456540 + }, + { + "epoch": 0.9222598851796038, + "grad_norm": 388.0455322265625, + "learning_rate": 2.2827672346517448e-07, + "loss": 18.5784, + "step": 456550 + }, + { + "epoch": 0.9222800858122876, + "grad_norm": 192.1857452392578, + "learning_rate": 2.2817246644285472e-07, + "loss": 19.548, + "step": 456560 + }, + { + "epoch": 0.9223002864449714, + "grad_norm": 357.21453857421875, + "learning_rate": 2.2806823267784327e-07, + "loss": 10.3948, + "step": 456570 + }, + { + "epoch": 0.9223204870776552, + "grad_norm": 462.7223815917969, + "learning_rate": 2.2796402217064806e-07, + "loss": 42.9697, + "step": 456580 + }, + { + "epoch": 0.9223406877103391, + "grad_norm": 94.40213775634766, + "learning_rate": 2.2785983492177867e-07, + "loss": 17.3635, + "step": 456590 + }, + { + "epoch": 0.9223608883430229, + "grad_norm": 312.3440246582031, + "learning_rate": 2.2775567093174022e-07, + "loss": 37.7345, + "step": 456600 + }, + { + "epoch": 0.9223810889757067, + "grad_norm": 353.4170227050781, + "learning_rate": 2.2765153020104292e-07, + "loss": 17.2546, + "step": 456610 + }, + { + "epoch": 0.9224012896083905, + "grad_norm": 107.74503326416016, + "learning_rate": 2.27547412730193e-07, + "loss": 22.4956, + "step": 456620 + }, + { + "epoch": 0.9224214902410743, + "grad_norm": 261.94525146484375, + "learning_rate": 2.274433185196978e-07, + "loss": 25.4751, + "step": 456630 + }, + { + "epoch": 0.9224416908737582, + "grad_norm": 74.53716278076172, + "learning_rate": 2.2733924757006531e-07, + "loss": 17.1028, + "step": 456640 + }, + { + "epoch": 0.922461891506442, + "grad_norm": 199.27627563476562, + "learning_rate": 2.2723519988180232e-07, + "loss": 24.8385, + "step": 456650 + }, + { + "epoch": 0.9224820921391258, + "grad_norm": 261.4853515625, + "learning_rate": 2.2713117545541618e-07, + "loss": 8.0366, + "step": 456660 + }, + { + "epoch": 0.9225022927718096, + "grad_norm": 293.28741455078125, + "learning_rate": 2.270271742914132e-07, + "loss": 22.0636, + "step": 456670 + }, + { + "epoch": 0.9225224934044934, + "grad_norm": 206.2681121826172, + "learning_rate": 2.269231963903018e-07, + "loss": 13.1123, + "step": 456680 + }, + { + "epoch": 0.9225426940371773, + "grad_norm": 242.92726135253906, + "learning_rate": 2.2681924175258773e-07, + "loss": 12.7175, + "step": 456690 + }, + { + "epoch": 0.9225628946698611, + "grad_norm": 190.275390625, + "learning_rate": 2.2671531037877724e-07, + "loss": 7.0395, + "step": 456700 + }, + { + "epoch": 0.9225830953025449, + "grad_norm": 184.04701232910156, + "learning_rate": 2.2661140226937773e-07, + "loss": 17.0297, + "step": 456710 + }, + { + "epoch": 0.9226032959352287, + "grad_norm": 282.6993408203125, + "learning_rate": 2.2650751742489542e-07, + "loss": 17.5334, + "step": 456720 + }, + { + "epoch": 0.9226234965679125, + "grad_norm": 478.75469970703125, + "learning_rate": 2.2640365584583602e-07, + "loss": 14.0422, + "step": 456730 + }, + { + "epoch": 0.9226436972005964, + "grad_norm": 299.892333984375, + "learning_rate": 2.2629981753270636e-07, + "loss": 25.043, + "step": 456740 + }, + { + "epoch": 0.9226638978332802, + "grad_norm": 441.7330322265625, + "learning_rate": 2.2619600248601327e-07, + "loss": 15.7347, + "step": 456750 + }, + { + "epoch": 0.922684098465964, + "grad_norm": 244.79605102539062, + "learning_rate": 2.2609221070626132e-07, + "loss": 21.4933, + "step": 456760 + }, + { + "epoch": 0.9227042990986478, + "grad_norm": 347.6989440917969, + "learning_rate": 2.259884421939562e-07, + "loss": 15.3209, + "step": 456770 + }, + { + "epoch": 0.9227244997313316, + "grad_norm": 708.950439453125, + "learning_rate": 2.2588469694960535e-07, + "loss": 19.6421, + "step": 456780 + }, + { + "epoch": 0.9227447003640153, + "grad_norm": 401.40875244140625, + "learning_rate": 2.2578097497371333e-07, + "loss": 13.2815, + "step": 456790 + }, + { + "epoch": 0.9227649009966992, + "grad_norm": 156.8737335205078, + "learning_rate": 2.2567727626678527e-07, + "loss": 15.3242, + "step": 456800 + }, + { + "epoch": 0.922785101629383, + "grad_norm": 314.9941101074219, + "learning_rate": 2.2557360082932745e-07, + "loss": 21.0692, + "step": 456810 + }, + { + "epoch": 0.9228053022620668, + "grad_norm": 317.0625, + "learning_rate": 2.2546994866184557e-07, + "loss": 13.397, + "step": 456820 + }, + { + "epoch": 0.9228255028947506, + "grad_norm": 192.15399169921875, + "learning_rate": 2.253663197648426e-07, + "loss": 20.4725, + "step": 456830 + }, + { + "epoch": 0.9228457035274344, + "grad_norm": 204.11090087890625, + "learning_rate": 2.2526271413882528e-07, + "loss": 11.4744, + "step": 456840 + }, + { + "epoch": 0.9228659041601183, + "grad_norm": 152.6605987548828, + "learning_rate": 2.2515913178429937e-07, + "loss": 15.3138, + "step": 456850 + }, + { + "epoch": 0.9228861047928021, + "grad_norm": 496.0419921875, + "learning_rate": 2.2505557270176837e-07, + "loss": 21.9908, + "step": 456860 + }, + { + "epoch": 0.9229063054254859, + "grad_norm": 282.78680419921875, + "learning_rate": 2.249520368917374e-07, + "loss": 16.0992, + "step": 456870 + }, + { + "epoch": 0.9229265060581697, + "grad_norm": 581.7693481445312, + "learning_rate": 2.2484852435471106e-07, + "loss": 23.6697, + "step": 456880 + }, + { + "epoch": 0.9229467066908535, + "grad_norm": 118.70586395263672, + "learning_rate": 2.2474503509119394e-07, + "loss": 16.9308, + "step": 456890 + }, + { + "epoch": 0.9229669073235374, + "grad_norm": 466.8150939941406, + "learning_rate": 2.2464156910168954e-07, + "loss": 20.8201, + "step": 456900 + }, + { + "epoch": 0.9229871079562212, + "grad_norm": 70.92750549316406, + "learning_rate": 2.2453812638670413e-07, + "loss": 15.4252, + "step": 456910 + }, + { + "epoch": 0.923007308588905, + "grad_norm": 290.128173828125, + "learning_rate": 2.2443470694673953e-07, + "loss": 12.7111, + "step": 456920 + }, + { + "epoch": 0.9230275092215888, + "grad_norm": 381.5926818847656, + "learning_rate": 2.2433131078230196e-07, + "loss": 28.5388, + "step": 456930 + }, + { + "epoch": 0.9230477098542726, + "grad_norm": 381.0793151855469, + "learning_rate": 2.242279378938944e-07, + "loss": 19.7802, + "step": 456940 + }, + { + "epoch": 0.9230679104869565, + "grad_norm": 345.0141296386719, + "learning_rate": 2.2412458828201977e-07, + "loss": 20.9494, + "step": 456950 + }, + { + "epoch": 0.9230881111196403, + "grad_norm": 44.365257263183594, + "learning_rate": 2.2402126194718322e-07, + "loss": 14.3114, + "step": 456960 + }, + { + "epoch": 0.9231083117523241, + "grad_norm": 308.3829345703125, + "learning_rate": 2.2391795888988822e-07, + "loss": 26.8005, + "step": 456970 + }, + { + "epoch": 0.9231285123850079, + "grad_norm": 0.0, + "learning_rate": 2.2381467911063658e-07, + "loss": 29.0832, + "step": 456980 + }, + { + "epoch": 0.9231487130176917, + "grad_norm": 233.3600311279297, + "learning_rate": 2.237114226099335e-07, + "loss": 39.1211, + "step": 456990 + }, + { + "epoch": 0.9231689136503756, + "grad_norm": 336.5529479980469, + "learning_rate": 2.2360818938828189e-07, + "loss": 21.1256, + "step": 457000 + }, + { + "epoch": 0.9231891142830594, + "grad_norm": 145.19363403320312, + "learning_rate": 2.2350497944618466e-07, + "loss": 15.1682, + "step": 457010 + }, + { + "epoch": 0.9232093149157432, + "grad_norm": 334.2428894042969, + "learning_rate": 2.234017927841442e-07, + "loss": 20.9393, + "step": 457020 + }, + { + "epoch": 0.923229515548427, + "grad_norm": 94.92315673828125, + "learning_rate": 2.2329862940266511e-07, + "loss": 19.3263, + "step": 457030 + }, + { + "epoch": 0.9232497161811108, + "grad_norm": 169.41757202148438, + "learning_rate": 2.2319548930224865e-07, + "loss": 8.1505, + "step": 457040 + }, + { + "epoch": 0.9232699168137946, + "grad_norm": 308.775390625, + "learning_rate": 2.2309237248339776e-07, + "loss": 17.2342, + "step": 457050 + }, + { + "epoch": 0.9232901174464784, + "grad_norm": 247.99966430664062, + "learning_rate": 2.2298927894661481e-07, + "loss": 17.2972, + "step": 457060 + }, + { + "epoch": 0.9233103180791622, + "grad_norm": 348.5246887207031, + "learning_rate": 2.2288620869240384e-07, + "loss": 22.9325, + "step": 457070 + }, + { + "epoch": 0.923330518711846, + "grad_norm": 245.3953094482422, + "learning_rate": 2.2278316172126612e-07, + "loss": 16.3009, + "step": 457080 + }, + { + "epoch": 0.9233507193445298, + "grad_norm": 1451.7257080078125, + "learning_rate": 2.2268013803370292e-07, + "loss": 29.4351, + "step": 457090 + }, + { + "epoch": 0.9233709199772137, + "grad_norm": 274.67156982421875, + "learning_rate": 2.2257713763021826e-07, + "loss": 22.262, + "step": 457100 + }, + { + "epoch": 0.9233911206098975, + "grad_norm": 184.2248992919922, + "learning_rate": 2.2247416051131288e-07, + "loss": 14.3081, + "step": 457110 + }, + { + "epoch": 0.9234113212425813, + "grad_norm": 124.99649810791016, + "learning_rate": 2.2237120667748856e-07, + "loss": 14.8649, + "step": 457120 + }, + { + "epoch": 0.9234315218752651, + "grad_norm": 208.64932250976562, + "learning_rate": 2.2226827612924774e-07, + "loss": 19.1672, + "step": 457130 + }, + { + "epoch": 0.9234517225079489, + "grad_norm": 72.52130889892578, + "learning_rate": 2.221653688670916e-07, + "loss": 17.0607, + "step": 457140 + }, + { + "epoch": 0.9234719231406328, + "grad_norm": 278.6165466308594, + "learning_rate": 2.220624848915226e-07, + "loss": 19.9385, + "step": 457150 + }, + { + "epoch": 0.9234921237733166, + "grad_norm": 160.53811645507812, + "learning_rate": 2.2195962420304083e-07, + "loss": 15.4356, + "step": 457160 + }, + { + "epoch": 0.9235123244060004, + "grad_norm": 363.5828857421875, + "learning_rate": 2.2185678680214927e-07, + "loss": 23.8777, + "step": 457170 + }, + { + "epoch": 0.9235325250386842, + "grad_norm": 270.5760498046875, + "learning_rate": 2.2175397268934807e-07, + "loss": 20.7764, + "step": 457180 + }, + { + "epoch": 0.923552725671368, + "grad_norm": 77.8431167602539, + "learning_rate": 2.216511818651379e-07, + "loss": 17.9534, + "step": 457190 + }, + { + "epoch": 0.9235729263040519, + "grad_norm": 148.47154235839844, + "learning_rate": 2.2154841433002062e-07, + "loss": 12.5748, + "step": 457200 + }, + { + "epoch": 0.9235931269367357, + "grad_norm": 401.5377197265625, + "learning_rate": 2.2144567008449636e-07, + "loss": 25.8941, + "step": 457210 + }, + { + "epoch": 0.9236133275694195, + "grad_norm": 362.03363037109375, + "learning_rate": 2.2134294912906696e-07, + "loss": 24.5876, + "step": 457220 + }, + { + "epoch": 0.9236335282021033, + "grad_norm": 390.0622253417969, + "learning_rate": 2.2124025146423255e-07, + "loss": 15.1685, + "step": 457230 + }, + { + "epoch": 0.9236537288347871, + "grad_norm": 0.0, + "learning_rate": 2.2113757709049277e-07, + "loss": 10.7024, + "step": 457240 + }, + { + "epoch": 0.923673929467471, + "grad_norm": 388.9189758300781, + "learning_rate": 2.210349260083494e-07, + "loss": 19.8318, + "step": 457250 + }, + { + "epoch": 0.9236941301001548, + "grad_norm": 453.5459899902344, + "learning_rate": 2.2093229821830263e-07, + "loss": 12.7006, + "step": 457260 + }, + { + "epoch": 0.9237143307328386, + "grad_norm": 196.50888061523438, + "learning_rate": 2.208296937208515e-07, + "loss": 8.2976, + "step": 457270 + }, + { + "epoch": 0.9237345313655224, + "grad_norm": 334.31744384765625, + "learning_rate": 2.2072711251649615e-07, + "loss": 8.1786, + "step": 457280 + }, + { + "epoch": 0.9237547319982062, + "grad_norm": 406.2618713378906, + "learning_rate": 2.2062455460573838e-07, + "loss": 20.883, + "step": 457290 + }, + { + "epoch": 0.9237749326308899, + "grad_norm": 364.6490173339844, + "learning_rate": 2.2052201998907673e-07, + "loss": 15.8866, + "step": 457300 + }, + { + "epoch": 0.9237951332635738, + "grad_norm": 363.32379150390625, + "learning_rate": 2.2041950866701078e-07, + "loss": 30.2554, + "step": 457310 + }, + { + "epoch": 0.9238153338962576, + "grad_norm": 110.7328872680664, + "learning_rate": 2.2031702064004067e-07, + "loss": 12.139, + "step": 457320 + }, + { + "epoch": 0.9238355345289414, + "grad_norm": 303.4820556640625, + "learning_rate": 2.2021455590866546e-07, + "loss": 26.7076, + "step": 457330 + }, + { + "epoch": 0.9238557351616252, + "grad_norm": 38.45009994506836, + "learning_rate": 2.2011211447338477e-07, + "loss": 19.7323, + "step": 457340 + }, + { + "epoch": 0.923875935794309, + "grad_norm": 771.2612915039062, + "learning_rate": 2.200096963346976e-07, + "loss": 23.089, + "step": 457350 + }, + { + "epoch": 0.9238961364269929, + "grad_norm": 284.0710754394531, + "learning_rate": 2.199073014931047e-07, + "loss": 15.2532, + "step": 457360 + }, + { + "epoch": 0.9239163370596767, + "grad_norm": 25.208005905151367, + "learning_rate": 2.198049299491023e-07, + "loss": 16.2739, + "step": 457370 + }, + { + "epoch": 0.9239365376923605, + "grad_norm": 164.5516357421875, + "learning_rate": 2.1970258170319114e-07, + "loss": 7.5858, + "step": 457380 + }, + { + "epoch": 0.9239567383250443, + "grad_norm": 163.2032012939453, + "learning_rate": 2.1960025675587082e-07, + "loss": 10.7046, + "step": 457390 + }, + { + "epoch": 0.9239769389577281, + "grad_norm": 203.40997314453125, + "learning_rate": 2.1949795510763872e-07, + "loss": 14.4185, + "step": 457400 + }, + { + "epoch": 0.923997139590412, + "grad_norm": 430.99896240234375, + "learning_rate": 2.1939567675899333e-07, + "loss": 25.7025, + "step": 457410 + }, + { + "epoch": 0.9240173402230958, + "grad_norm": 367.4797058105469, + "learning_rate": 2.1929342171043366e-07, + "loss": 32.4341, + "step": 457420 + }, + { + "epoch": 0.9240375408557796, + "grad_norm": 156.14569091796875, + "learning_rate": 2.191911899624588e-07, + "loss": 26.5189, + "step": 457430 + }, + { + "epoch": 0.9240577414884634, + "grad_norm": 296.7413330078125, + "learning_rate": 2.1908898151556502e-07, + "loss": 16.9909, + "step": 457440 + }, + { + "epoch": 0.9240779421211472, + "grad_norm": 399.5950012207031, + "learning_rate": 2.189867963702519e-07, + "loss": 17.4734, + "step": 457450 + }, + { + "epoch": 0.9240981427538311, + "grad_norm": 264.9706115722656, + "learning_rate": 2.188846345270179e-07, + "loss": 16.9213, + "step": 457460 + }, + { + "epoch": 0.9241183433865149, + "grad_norm": 265.282958984375, + "learning_rate": 2.1878249598636047e-07, + "loss": 13.9011, + "step": 457470 + }, + { + "epoch": 0.9241385440191987, + "grad_norm": 45.12477493286133, + "learning_rate": 2.186803807487764e-07, + "loss": 13.6294, + "step": 457480 + }, + { + "epoch": 0.9241587446518825, + "grad_norm": 412.4893798828125, + "learning_rate": 2.1857828881476472e-07, + "loss": 24.5977, + "step": 457490 + }, + { + "epoch": 0.9241789452845663, + "grad_norm": 245.3011474609375, + "learning_rate": 2.1847622018482283e-07, + "loss": 18.715, + "step": 457500 + }, + { + "epoch": 0.9241991459172502, + "grad_norm": 393.8460998535156, + "learning_rate": 2.1837417485944755e-07, + "loss": 20.525, + "step": 457510 + }, + { + "epoch": 0.924219346549934, + "grad_norm": 929.6036376953125, + "learning_rate": 2.1827215283913683e-07, + "loss": 34.2877, + "step": 457520 + }, + { + "epoch": 0.9242395471826178, + "grad_norm": 518.9688720703125, + "learning_rate": 2.1817015412438692e-07, + "loss": 30.2135, + "step": 457530 + }, + { + "epoch": 0.9242597478153016, + "grad_norm": 453.6609191894531, + "learning_rate": 2.1806817871569686e-07, + "loss": 26.5946, + "step": 457540 + }, + { + "epoch": 0.9242799484479854, + "grad_norm": 221.75369262695312, + "learning_rate": 2.1796622661356238e-07, + "loss": 25.0084, + "step": 457550 + }, + { + "epoch": 0.9243001490806692, + "grad_norm": 132.42124938964844, + "learning_rate": 2.1786429781847972e-07, + "loss": 22.873, + "step": 457560 + }, + { + "epoch": 0.924320349713353, + "grad_norm": 391.724365234375, + "learning_rate": 2.1776239233094687e-07, + "loss": 11.0618, + "step": 457570 + }, + { + "epoch": 0.9243405503460368, + "grad_norm": 198.1086883544922, + "learning_rate": 2.176605101514606e-07, + "loss": 25.2933, + "step": 457580 + }, + { + "epoch": 0.9243607509787206, + "grad_norm": 670.4555053710938, + "learning_rate": 2.175586512805161e-07, + "loss": 28.8527, + "step": 457590 + }, + { + "epoch": 0.9243809516114044, + "grad_norm": 295.0966491699219, + "learning_rate": 2.174568157186102e-07, + "loss": 18.4053, + "step": 457600 + }, + { + "epoch": 0.9244011522440883, + "grad_norm": 1169.2147216796875, + "learning_rate": 2.1735500346624083e-07, + "loss": 17.9884, + "step": 457610 + }, + { + "epoch": 0.9244213528767721, + "grad_norm": 340.5728759765625, + "learning_rate": 2.1725321452390314e-07, + "loss": 15.4388, + "step": 457620 + }, + { + "epoch": 0.9244415535094559, + "grad_norm": 235.09950256347656, + "learning_rate": 2.1715144889209284e-07, + "loss": 19.7492, + "step": 457630 + }, + { + "epoch": 0.9244617541421397, + "grad_norm": 302.98150634765625, + "learning_rate": 2.1704970657130675e-07, + "loss": 13.1334, + "step": 457640 + }, + { + "epoch": 0.9244819547748235, + "grad_norm": 224.8277587890625, + "learning_rate": 2.1694798756204005e-07, + "loss": 10.285, + "step": 457650 + }, + { + "epoch": 0.9245021554075074, + "grad_norm": 438.7237854003906, + "learning_rate": 2.1684629186478846e-07, + "loss": 13.7399, + "step": 457660 + }, + { + "epoch": 0.9245223560401912, + "grad_norm": 240.50230407714844, + "learning_rate": 2.1674461948004766e-07, + "loss": 15.1875, + "step": 457670 + }, + { + "epoch": 0.924542556672875, + "grad_norm": 237.82205200195312, + "learning_rate": 2.1664297040831394e-07, + "loss": 12.1434, + "step": 457680 + }, + { + "epoch": 0.9245627573055588, + "grad_norm": 8.642843246459961, + "learning_rate": 2.1654134465008247e-07, + "loss": 10.0281, + "step": 457690 + }, + { + "epoch": 0.9245829579382426, + "grad_norm": 431.3695068359375, + "learning_rate": 2.1643974220584729e-07, + "loss": 24.1967, + "step": 457700 + }, + { + "epoch": 0.9246031585709265, + "grad_norm": 150.09141540527344, + "learning_rate": 2.1633816307610577e-07, + "loss": 6.7439, + "step": 457710 + }, + { + "epoch": 0.9246233592036103, + "grad_norm": 293.7038269042969, + "learning_rate": 2.1623660726135197e-07, + "loss": 19.2351, + "step": 457720 + }, + { + "epoch": 0.9246435598362941, + "grad_norm": 79.47240447998047, + "learning_rate": 2.161350747620794e-07, + "loss": 13.3978, + "step": 457730 + }, + { + "epoch": 0.9246637604689779, + "grad_norm": 193.63145446777344, + "learning_rate": 2.1603356557878486e-07, + "loss": 11.6504, + "step": 457740 + }, + { + "epoch": 0.9246839611016617, + "grad_norm": 243.01817321777344, + "learning_rate": 2.1593207971196296e-07, + "loss": 15.138, + "step": 457750 + }, + { + "epoch": 0.9247041617343456, + "grad_norm": 393.2857666015625, + "learning_rate": 2.1583061716210774e-07, + "loss": 11.4301, + "step": 457760 + }, + { + "epoch": 0.9247243623670294, + "grad_norm": 105.41272735595703, + "learning_rate": 2.1572917792971326e-07, + "loss": 20.4093, + "step": 457770 + }, + { + "epoch": 0.9247445629997132, + "grad_norm": 122.74444580078125, + "learning_rate": 2.1562776201527525e-07, + "loss": 12.8214, + "step": 457780 + }, + { + "epoch": 0.924764763632397, + "grad_norm": 784.9725952148438, + "learning_rate": 2.1552636941928717e-07, + "loss": 17.4607, + "step": 457790 + }, + { + "epoch": 0.9247849642650808, + "grad_norm": 472.62603759765625, + "learning_rate": 2.154250001422431e-07, + "loss": 19.6794, + "step": 457800 + }, + { + "epoch": 0.9248051648977647, + "grad_norm": 208.9678192138672, + "learning_rate": 2.1532365418463708e-07, + "loss": 8.3243, + "step": 457810 + }, + { + "epoch": 0.9248253655304484, + "grad_norm": 255.95599365234375, + "learning_rate": 2.1522233154696314e-07, + "loss": 11.1677, + "step": 457820 + }, + { + "epoch": 0.9248455661631322, + "grad_norm": 196.7720489501953, + "learning_rate": 2.151210322297159e-07, + "loss": 20.5409, + "step": 457830 + }, + { + "epoch": 0.924865766795816, + "grad_norm": 61.60475158691406, + "learning_rate": 2.1501975623338833e-07, + "loss": 12.5555, + "step": 457840 + }, + { + "epoch": 0.9248859674284998, + "grad_norm": 143.92803955078125, + "learning_rate": 2.1491850355847332e-07, + "loss": 16.1914, + "step": 457850 + }, + { + "epoch": 0.9249061680611836, + "grad_norm": 90.24784088134766, + "learning_rate": 2.1481727420546605e-07, + "loss": 7.4153, + "step": 457860 + }, + { + "epoch": 0.9249263686938675, + "grad_norm": 307.8219909667969, + "learning_rate": 2.147160681748589e-07, + "loss": 20.5707, + "step": 457870 + }, + { + "epoch": 0.9249465693265513, + "grad_norm": 204.69508361816406, + "learning_rate": 2.1461488546714425e-07, + "loss": 18.323, + "step": 457880 + }, + { + "epoch": 0.9249667699592351, + "grad_norm": 204.1322021484375, + "learning_rate": 2.1451372608281674e-07, + "loss": 8.0543, + "step": 457890 + }, + { + "epoch": 0.9249869705919189, + "grad_norm": 258.7735900878906, + "learning_rate": 2.1441259002236924e-07, + "loss": 17.6502, + "step": 457900 + }, + { + "epoch": 0.9250071712246027, + "grad_norm": 178.43128967285156, + "learning_rate": 2.1431147728629476e-07, + "loss": 13.9585, + "step": 457910 + }, + { + "epoch": 0.9250273718572866, + "grad_norm": 264.40045166015625, + "learning_rate": 2.1421038787508508e-07, + "loss": 13.8546, + "step": 457920 + }, + { + "epoch": 0.9250475724899704, + "grad_norm": 193.4455108642578, + "learning_rate": 2.1410932178923372e-07, + "loss": 17.9401, + "step": 457930 + }, + { + "epoch": 0.9250677731226542, + "grad_norm": 277.23077392578125, + "learning_rate": 2.1400827902923304e-07, + "loss": 12.5947, + "step": 457940 + }, + { + "epoch": 0.925087973755338, + "grad_norm": 94.73963165283203, + "learning_rate": 2.1390725959557546e-07, + "loss": 15.1187, + "step": 457950 + }, + { + "epoch": 0.9251081743880218, + "grad_norm": 228.7657012939453, + "learning_rate": 2.1380626348875278e-07, + "loss": 16.4715, + "step": 457960 + }, + { + "epoch": 0.9251283750207057, + "grad_norm": 179.51947021484375, + "learning_rate": 2.137052907092596e-07, + "loss": 12.7447, + "step": 457970 + }, + { + "epoch": 0.9251485756533895, + "grad_norm": 287.54949951171875, + "learning_rate": 2.13604341257585e-07, + "loss": 22.2403, + "step": 457980 + }, + { + "epoch": 0.9251687762860733, + "grad_norm": 251.56422424316406, + "learning_rate": 2.135034151342219e-07, + "loss": 19.6641, + "step": 457990 + }, + { + "epoch": 0.9251889769187571, + "grad_norm": 266.3399353027344, + "learning_rate": 2.134025123396638e-07, + "loss": 18.5667, + "step": 458000 + }, + { + "epoch": 0.9252091775514409, + "grad_norm": 270.826904296875, + "learning_rate": 2.1330163287440087e-07, + "loss": 9.1103, + "step": 458010 + }, + { + "epoch": 0.9252293781841248, + "grad_norm": 2.8657827377319336, + "learning_rate": 2.1320077673892493e-07, + "loss": 17.3413, + "step": 458020 + }, + { + "epoch": 0.9252495788168086, + "grad_norm": 355.48699951171875, + "learning_rate": 2.1309994393372836e-07, + "loss": 19.1438, + "step": 458030 + }, + { + "epoch": 0.9252697794494924, + "grad_norm": 15.83651351928711, + "learning_rate": 2.1299913445930242e-07, + "loss": 17.0119, + "step": 458040 + }, + { + "epoch": 0.9252899800821762, + "grad_norm": 19.4829158782959, + "learning_rate": 2.1289834831613675e-07, + "loss": 13.3193, + "step": 458050 + }, + { + "epoch": 0.92531018071486, + "grad_norm": 285.29827880859375, + "learning_rate": 2.127975855047243e-07, + "loss": 14.4128, + "step": 458060 + }, + { + "epoch": 0.9253303813475438, + "grad_norm": 360.4769287109375, + "learning_rate": 2.126968460255563e-07, + "loss": 20.0909, + "step": 458070 + }, + { + "epoch": 0.9253505819802276, + "grad_norm": 115.21932983398438, + "learning_rate": 2.1259612987912348e-07, + "loss": 48.6393, + "step": 458080 + }, + { + "epoch": 0.9253707826129114, + "grad_norm": 185.64874267578125, + "learning_rate": 2.1249543706591602e-07, + "loss": 7.2848, + "step": 458090 + }, + { + "epoch": 0.9253909832455952, + "grad_norm": 167.89633178710938, + "learning_rate": 2.123947675864252e-07, + "loss": 10.263, + "step": 458100 + }, + { + "epoch": 0.925411183878279, + "grad_norm": 54.32759094238281, + "learning_rate": 2.1229412144114225e-07, + "loss": 12.2169, + "step": 458110 + }, + { + "epoch": 0.9254313845109629, + "grad_norm": 273.45697021484375, + "learning_rate": 2.121934986305557e-07, + "loss": 10.3457, + "step": 458120 + }, + { + "epoch": 0.9254515851436467, + "grad_norm": 154.4072265625, + "learning_rate": 2.120928991551585e-07, + "loss": 17.0332, + "step": 458130 + }, + { + "epoch": 0.9254717857763305, + "grad_norm": 183.60134887695312, + "learning_rate": 2.1199232301543915e-07, + "loss": 20.2376, + "step": 458140 + }, + { + "epoch": 0.9254919864090143, + "grad_norm": 152.31246948242188, + "learning_rate": 2.1189177021188888e-07, + "loss": 37.2988, + "step": 458150 + }, + { + "epoch": 0.9255121870416981, + "grad_norm": 110.86585235595703, + "learning_rate": 2.117912407449979e-07, + "loss": 13.2511, + "step": 458160 + }, + { + "epoch": 0.925532387674382, + "grad_norm": 131.7351531982422, + "learning_rate": 2.116907346152547e-07, + "loss": 13.2585, + "step": 458170 + }, + { + "epoch": 0.9255525883070658, + "grad_norm": 154.50274658203125, + "learning_rate": 2.1159025182315052e-07, + "loss": 15.4772, + "step": 458180 + }, + { + "epoch": 0.9255727889397496, + "grad_norm": 158.2553253173828, + "learning_rate": 2.11489792369175e-07, + "loss": 15.0562, + "step": 458190 + }, + { + "epoch": 0.9255929895724334, + "grad_norm": 134.0874786376953, + "learning_rate": 2.1138935625381663e-07, + "loss": 16.9204, + "step": 458200 + }, + { + "epoch": 0.9256131902051172, + "grad_norm": 400.1285400390625, + "learning_rate": 2.1128894347756613e-07, + "loss": 18.6318, + "step": 458210 + }, + { + "epoch": 0.925633390837801, + "grad_norm": 300.58599853515625, + "learning_rate": 2.1118855404091253e-07, + "loss": 26.1327, + "step": 458220 + }, + { + "epoch": 0.9256535914704849, + "grad_norm": 185.72238159179688, + "learning_rate": 2.110881879443455e-07, + "loss": 14.9031, + "step": 458230 + }, + { + "epoch": 0.9256737921031687, + "grad_norm": 129.05772399902344, + "learning_rate": 2.1098784518835292e-07, + "loss": 18.2543, + "step": 458240 + }, + { + "epoch": 0.9256939927358525, + "grad_norm": 520.874755859375, + "learning_rate": 2.1088752577342607e-07, + "loss": 28.8492, + "step": 458250 + }, + { + "epoch": 0.9257141933685363, + "grad_norm": 834.5855712890625, + "learning_rate": 2.1078722970005182e-07, + "loss": 16.1469, + "step": 458260 + }, + { + "epoch": 0.9257343940012202, + "grad_norm": 203.6182403564453, + "learning_rate": 2.1068695696871922e-07, + "loss": 13.5855, + "step": 458270 + }, + { + "epoch": 0.925754594633904, + "grad_norm": 276.94000244140625, + "learning_rate": 2.1058670757991783e-07, + "loss": 17.2852, + "step": 458280 + }, + { + "epoch": 0.9257747952665878, + "grad_norm": 299.1051025390625, + "learning_rate": 2.104864815341362e-07, + "loss": 17.8666, + "step": 458290 + }, + { + "epoch": 0.9257949958992716, + "grad_norm": 154.6279296875, + "learning_rate": 2.103862788318628e-07, + "loss": 15.1453, + "step": 458300 + }, + { + "epoch": 0.9258151965319554, + "grad_norm": 187.29559326171875, + "learning_rate": 2.102860994735856e-07, + "loss": 10.3892, + "step": 458310 + }, + { + "epoch": 0.9258353971646393, + "grad_norm": 368.73345947265625, + "learning_rate": 2.1018594345979305e-07, + "loss": 20.8475, + "step": 458320 + }, + { + "epoch": 0.925855597797323, + "grad_norm": 376.1015625, + "learning_rate": 2.1008581079097312e-07, + "loss": 21.4322, + "step": 458330 + }, + { + "epoch": 0.9258757984300068, + "grad_norm": 327.84954833984375, + "learning_rate": 2.0998570146761376e-07, + "loss": 18.1575, + "step": 458340 + }, + { + "epoch": 0.9258959990626906, + "grad_norm": 46.97186279296875, + "learning_rate": 2.098856154902029e-07, + "loss": 12.9281, + "step": 458350 + }, + { + "epoch": 0.9259161996953744, + "grad_norm": 190.0801239013672, + "learning_rate": 2.0978555285922963e-07, + "loss": 32.2267, + "step": 458360 + }, + { + "epoch": 0.9259364003280582, + "grad_norm": 76.44590759277344, + "learning_rate": 2.0968551357518018e-07, + "loss": 14.1983, + "step": 458370 + }, + { + "epoch": 0.9259566009607421, + "grad_norm": 655.9341430664062, + "learning_rate": 2.0958549763854196e-07, + "loss": 24.3508, + "step": 458380 + }, + { + "epoch": 0.9259768015934259, + "grad_norm": 199.93775939941406, + "learning_rate": 2.0948550504980403e-07, + "loss": 14.0939, + "step": 458390 + }, + { + "epoch": 0.9259970022261097, + "grad_norm": 129.3128204345703, + "learning_rate": 2.0938553580945208e-07, + "loss": 23.2577, + "step": 458400 + }, + { + "epoch": 0.9260172028587935, + "grad_norm": 328.13800048828125, + "learning_rate": 2.092855899179741e-07, + "loss": 9.1749, + "step": 458410 + }, + { + "epoch": 0.9260374034914773, + "grad_norm": 220.08375549316406, + "learning_rate": 2.0918566737585688e-07, + "loss": 21.2348, + "step": 458420 + }, + { + "epoch": 0.9260576041241612, + "grad_norm": 157.90724182128906, + "learning_rate": 2.0908576818358783e-07, + "loss": 13.8015, + "step": 458430 + }, + { + "epoch": 0.926077804756845, + "grad_norm": 161.1211700439453, + "learning_rate": 2.0898589234165378e-07, + "loss": 13.2482, + "step": 458440 + }, + { + "epoch": 0.9260980053895288, + "grad_norm": 401.2912902832031, + "learning_rate": 2.0888603985054156e-07, + "loss": 20.68, + "step": 458450 + }, + { + "epoch": 0.9261182060222126, + "grad_norm": 231.30166625976562, + "learning_rate": 2.0878621071073745e-07, + "loss": 10.3691, + "step": 458460 + }, + { + "epoch": 0.9261384066548964, + "grad_norm": 250.1029052734375, + "learning_rate": 2.086864049227283e-07, + "loss": 15.4583, + "step": 458470 + }, + { + "epoch": 0.9261586072875803, + "grad_norm": 475.3943786621094, + "learning_rate": 2.085866224870009e-07, + "loss": 11.1181, + "step": 458480 + }, + { + "epoch": 0.9261788079202641, + "grad_norm": 300.5265808105469, + "learning_rate": 2.0848686340404045e-07, + "loss": 32.7259, + "step": 458490 + }, + { + "epoch": 0.9261990085529479, + "grad_norm": 225.87728881835938, + "learning_rate": 2.083871276743338e-07, + "loss": 17.3624, + "step": 458500 + }, + { + "epoch": 0.9262192091856317, + "grad_norm": 289.4924621582031, + "learning_rate": 2.0828741529836771e-07, + "loss": 16.8729, + "step": 458510 + }, + { + "epoch": 0.9262394098183155, + "grad_norm": 9.950053215026855, + "learning_rate": 2.0818772627662743e-07, + "loss": 12.1771, + "step": 458520 + }, + { + "epoch": 0.9262596104509994, + "grad_norm": 7.057925701141357, + "learning_rate": 2.0808806060959864e-07, + "loss": 27.0788, + "step": 458530 + }, + { + "epoch": 0.9262798110836832, + "grad_norm": 281.7425842285156, + "learning_rate": 2.0798841829776816e-07, + "loss": 10.2757, + "step": 458540 + }, + { + "epoch": 0.926300011716367, + "grad_norm": 192.39663696289062, + "learning_rate": 2.0788879934162064e-07, + "loss": 17.1062, + "step": 458550 + }, + { + "epoch": 0.9263202123490508, + "grad_norm": 190.81417846679688, + "learning_rate": 2.077892037416418e-07, + "loss": 15.5512, + "step": 458560 + }, + { + "epoch": 0.9263404129817346, + "grad_norm": 244.1360626220703, + "learning_rate": 2.0768963149831678e-07, + "loss": 13.6383, + "step": 458570 + }, + { + "epoch": 0.9263606136144183, + "grad_norm": 275.1950378417969, + "learning_rate": 2.0759008261213242e-07, + "loss": 15.0651, + "step": 458580 + }, + { + "epoch": 0.9263808142471022, + "grad_norm": 158.13926696777344, + "learning_rate": 2.0749055708357168e-07, + "loss": 19.8864, + "step": 458590 + }, + { + "epoch": 0.926401014879786, + "grad_norm": 256.86767578125, + "learning_rate": 2.0739105491312028e-07, + "loss": 17.6625, + "step": 458600 + }, + { + "epoch": 0.9264212155124698, + "grad_norm": 722.1422119140625, + "learning_rate": 2.0729157610126448e-07, + "loss": 25.766, + "step": 458610 + }, + { + "epoch": 0.9264414161451536, + "grad_norm": 141.5935516357422, + "learning_rate": 2.0719212064848838e-07, + "loss": 19.278, + "step": 458620 + }, + { + "epoch": 0.9264616167778374, + "grad_norm": 353.7138671875, + "learning_rate": 2.07092688555276e-07, + "loss": 17.0386, + "step": 458630 + }, + { + "epoch": 0.9264818174105213, + "grad_norm": 118.1501235961914, + "learning_rate": 2.0699327982211304e-07, + "loss": 13.3947, + "step": 458640 + }, + { + "epoch": 0.9265020180432051, + "grad_norm": 270.81756591796875, + "learning_rate": 2.068938944494836e-07, + "loss": 21.4207, + "step": 458650 + }, + { + "epoch": 0.9265222186758889, + "grad_norm": 610.9850463867188, + "learning_rate": 2.0679453243787174e-07, + "loss": 12.7193, + "step": 458660 + }, + { + "epoch": 0.9265424193085727, + "grad_norm": 0.0, + "learning_rate": 2.0669519378776147e-07, + "loss": 15.17, + "step": 458670 + }, + { + "epoch": 0.9265626199412565, + "grad_norm": 285.80963134765625, + "learning_rate": 2.0659587849963801e-07, + "loss": 19.3955, + "step": 458680 + }, + { + "epoch": 0.9265828205739404, + "grad_norm": 300.5206298828125, + "learning_rate": 2.0649658657398487e-07, + "loss": 11.1576, + "step": 458690 + }, + { + "epoch": 0.9266030212066242, + "grad_norm": 416.0434265136719, + "learning_rate": 2.0639731801128603e-07, + "loss": 19.9857, + "step": 458700 + }, + { + "epoch": 0.926623221839308, + "grad_norm": 319.1828918457031, + "learning_rate": 2.0629807281202508e-07, + "loss": 20.9035, + "step": 458710 + }, + { + "epoch": 0.9266434224719918, + "grad_norm": 280.5700378417969, + "learning_rate": 2.0619885097668658e-07, + "loss": 10.4198, + "step": 458720 + }, + { + "epoch": 0.9266636231046756, + "grad_norm": 367.6854248046875, + "learning_rate": 2.0609965250575237e-07, + "loss": 20.3417, + "step": 458730 + }, + { + "epoch": 0.9266838237373595, + "grad_norm": 4.055483818054199, + "learning_rate": 2.0600047739970762e-07, + "loss": 10.6959, + "step": 458740 + }, + { + "epoch": 0.9267040243700433, + "grad_norm": 157.8720245361328, + "learning_rate": 2.0590132565903475e-07, + "loss": 11.8822, + "step": 458750 + }, + { + "epoch": 0.9267242250027271, + "grad_norm": 376.6069641113281, + "learning_rate": 2.058021972842178e-07, + "loss": 17.6613, + "step": 458760 + }, + { + "epoch": 0.9267444256354109, + "grad_norm": 472.8377685546875, + "learning_rate": 2.057030922757397e-07, + "loss": 15.236, + "step": 458770 + }, + { + "epoch": 0.9267646262680947, + "grad_norm": 222.2113494873047, + "learning_rate": 2.056040106340823e-07, + "loss": 10.7457, + "step": 458780 + }, + { + "epoch": 0.9267848269007786, + "grad_norm": 48.7750244140625, + "learning_rate": 2.0550495235973023e-07, + "loss": 11.8005, + "step": 458790 + }, + { + "epoch": 0.9268050275334624, + "grad_norm": 192.03140258789062, + "learning_rate": 2.054059174531653e-07, + "loss": 23.3923, + "step": 458800 + }, + { + "epoch": 0.9268252281661462, + "grad_norm": 143.8888702392578, + "learning_rate": 2.0530690591487047e-07, + "loss": 18.0165, + "step": 458810 + }, + { + "epoch": 0.92684542879883, + "grad_norm": 257.7686462402344, + "learning_rate": 2.0520791774532757e-07, + "loss": 18.7763, + "step": 458820 + }, + { + "epoch": 0.9268656294315138, + "grad_norm": 308.28472900390625, + "learning_rate": 2.0510895294502066e-07, + "loss": 14.1266, + "step": 458830 + }, + { + "epoch": 0.9268858300641976, + "grad_norm": 291.55487060546875, + "learning_rate": 2.0501001151443156e-07, + "loss": 16.9696, + "step": 458840 + }, + { + "epoch": 0.9269060306968814, + "grad_norm": 255.02993774414062, + "learning_rate": 2.0491109345404102e-07, + "loss": 14.6789, + "step": 458850 + }, + { + "epoch": 0.9269262313295652, + "grad_norm": 519.9100341796875, + "learning_rate": 2.0481219876433257e-07, + "loss": 19.5401, + "step": 458860 + }, + { + "epoch": 0.926946431962249, + "grad_norm": 301.23931884765625, + "learning_rate": 2.0471332744578853e-07, + "loss": 25.7286, + "step": 458870 + }, + { + "epoch": 0.9269666325949328, + "grad_norm": 442.61456298828125, + "learning_rate": 2.0461447949888912e-07, + "loss": 14.013, + "step": 458880 + }, + { + "epoch": 0.9269868332276167, + "grad_norm": 251.81700134277344, + "learning_rate": 2.0451565492411672e-07, + "loss": 20.6521, + "step": 458890 + }, + { + "epoch": 0.9270070338603005, + "grad_norm": 182.24066162109375, + "learning_rate": 2.0441685372195487e-07, + "loss": 16.012, + "step": 458900 + }, + { + "epoch": 0.9270272344929843, + "grad_norm": 296.1080627441406, + "learning_rate": 2.043180758928831e-07, + "loss": 26.0667, + "step": 458910 + }, + { + "epoch": 0.9270474351256681, + "grad_norm": 222.44517517089844, + "learning_rate": 2.0421932143738276e-07, + "loss": 13.0511, + "step": 458920 + }, + { + "epoch": 0.9270676357583519, + "grad_norm": 248.0188446044922, + "learning_rate": 2.041205903559368e-07, + "loss": 13.8143, + "step": 458930 + }, + { + "epoch": 0.9270878363910358, + "grad_norm": 108.15481567382812, + "learning_rate": 2.0402188264902533e-07, + "loss": 20.7155, + "step": 458940 + }, + { + "epoch": 0.9271080370237196, + "grad_norm": 377.7518615722656, + "learning_rate": 2.039231983171286e-07, + "loss": 16.3114, + "step": 458950 + }, + { + "epoch": 0.9271282376564034, + "grad_norm": 0.0, + "learning_rate": 2.0382453736072838e-07, + "loss": 17.6866, + "step": 458960 + }, + { + "epoch": 0.9271484382890872, + "grad_norm": 234.6780548095703, + "learning_rate": 2.0372589978030654e-07, + "loss": 12.0828, + "step": 458970 + }, + { + "epoch": 0.927168638921771, + "grad_norm": 617.6521606445312, + "learning_rate": 2.0362728557634327e-07, + "loss": 31.6445, + "step": 458980 + }, + { + "epoch": 0.9271888395544549, + "grad_norm": 113.47299194335938, + "learning_rate": 2.0352869474931758e-07, + "loss": 11.6212, + "step": 458990 + }, + { + "epoch": 0.9272090401871387, + "grad_norm": 197.9394989013672, + "learning_rate": 2.0343012729971244e-07, + "loss": 9.7091, + "step": 459000 + }, + { + "epoch": 0.9272292408198225, + "grad_norm": 355.50469970703125, + "learning_rate": 2.0333158322800696e-07, + "loss": 15.216, + "step": 459010 + }, + { + "epoch": 0.9272494414525063, + "grad_norm": 208.5948028564453, + "learning_rate": 2.0323306253468123e-07, + "loss": 11.2102, + "step": 459020 + }, + { + "epoch": 0.9272696420851901, + "grad_norm": 379.1370544433594, + "learning_rate": 2.0313456522021603e-07, + "loss": 15.8408, + "step": 459030 + }, + { + "epoch": 0.927289842717874, + "grad_norm": 278.2328186035156, + "learning_rate": 2.0303609128509038e-07, + "loss": 21.7229, + "step": 459040 + }, + { + "epoch": 0.9273100433505578, + "grad_norm": 376.0787048339844, + "learning_rate": 2.0293764072978618e-07, + "loss": 23.0545, + "step": 459050 + }, + { + "epoch": 0.9273302439832416, + "grad_norm": 256.8787841796875, + "learning_rate": 2.0283921355478187e-07, + "loss": 18.2476, + "step": 459060 + }, + { + "epoch": 0.9273504446159254, + "grad_norm": 347.57073974609375, + "learning_rate": 2.0274080976055655e-07, + "loss": 12.9471, + "step": 459070 + }, + { + "epoch": 0.9273706452486092, + "grad_norm": 344.43109130859375, + "learning_rate": 2.0264242934759147e-07, + "loss": 21.7921, + "step": 459080 + }, + { + "epoch": 0.927390845881293, + "grad_norm": 359.5399169921875, + "learning_rate": 2.025440723163652e-07, + "loss": 22.9477, + "step": 459090 + }, + { + "epoch": 0.9274110465139768, + "grad_norm": 123.47303009033203, + "learning_rate": 2.0244573866735673e-07, + "loss": 19.4989, + "step": 459100 + }, + { + "epoch": 0.9274312471466606, + "grad_norm": 194.1509552001953, + "learning_rate": 2.0234742840104627e-07, + "loss": 16.2509, + "step": 459110 + }, + { + "epoch": 0.9274514477793444, + "grad_norm": 173.22320556640625, + "learning_rate": 2.0224914151791285e-07, + "loss": 10.2063, + "step": 459120 + }, + { + "epoch": 0.9274716484120282, + "grad_norm": 308.11761474609375, + "learning_rate": 2.0215087801843504e-07, + "loss": 15.6687, + "step": 459130 + }, + { + "epoch": 0.927491849044712, + "grad_norm": 392.9170837402344, + "learning_rate": 2.0205263790309125e-07, + "loss": 12.4466, + "step": 459140 + }, + { + "epoch": 0.9275120496773959, + "grad_norm": 669.3555297851562, + "learning_rate": 2.0195442117236176e-07, + "loss": 29.092, + "step": 459150 + }, + { + "epoch": 0.9275322503100797, + "grad_norm": 282.663330078125, + "learning_rate": 2.0185622782672497e-07, + "loss": 15.6815, + "step": 459160 + }, + { + "epoch": 0.9275524509427635, + "grad_norm": 255.64881896972656, + "learning_rate": 2.0175805786665782e-07, + "loss": 15.1651, + "step": 459170 + }, + { + "epoch": 0.9275726515754473, + "grad_norm": 313.9009094238281, + "learning_rate": 2.0165991129263984e-07, + "loss": 11.7104, + "step": 459180 + }, + { + "epoch": 0.9275928522081311, + "grad_norm": 254.2653350830078, + "learning_rate": 2.0156178810515127e-07, + "loss": 23.887, + "step": 459190 + }, + { + "epoch": 0.927613052840815, + "grad_norm": 262.0769958496094, + "learning_rate": 2.0146368830466668e-07, + "loss": 24.3206, + "step": 459200 + }, + { + "epoch": 0.9276332534734988, + "grad_norm": 402.42437744140625, + "learning_rate": 2.0136561189166682e-07, + "loss": 11.1628, + "step": 459210 + }, + { + "epoch": 0.9276534541061826, + "grad_norm": 177.794677734375, + "learning_rate": 2.0126755886662907e-07, + "loss": 16.3253, + "step": 459220 + }, + { + "epoch": 0.9276736547388664, + "grad_norm": 276.5149230957031, + "learning_rate": 2.0116952923003142e-07, + "loss": 14.3878, + "step": 459230 + }, + { + "epoch": 0.9276938553715502, + "grad_norm": 516.1245727539062, + "learning_rate": 2.0107152298235067e-07, + "loss": 19.8517, + "step": 459240 + }, + { + "epoch": 0.9277140560042341, + "grad_norm": 46.9970817565918, + "learning_rate": 2.0097354012406535e-07, + "loss": 24.6953, + "step": 459250 + }, + { + "epoch": 0.9277342566369179, + "grad_norm": 471.01531982421875, + "learning_rate": 2.0087558065565394e-07, + "loss": 25.1619, + "step": 459260 + }, + { + "epoch": 0.9277544572696017, + "grad_norm": 144.0623779296875, + "learning_rate": 2.007776445775922e-07, + "loss": 17.7077, + "step": 459270 + }, + { + "epoch": 0.9277746579022855, + "grad_norm": 280.4176940917969, + "learning_rate": 2.006797318903575e-07, + "loss": 17.9041, + "step": 459280 + }, + { + "epoch": 0.9277948585349693, + "grad_norm": 260.37921142578125, + "learning_rate": 2.0058184259442893e-07, + "loss": 16.1751, + "step": 459290 + }, + { + "epoch": 0.9278150591676532, + "grad_norm": 289.43194580078125, + "learning_rate": 2.0048397669028164e-07, + "loss": 20.1105, + "step": 459300 + }, + { + "epoch": 0.927835259800337, + "grad_norm": 294.12603759765625, + "learning_rate": 2.003861341783936e-07, + "loss": 16.9188, + "step": 459310 + }, + { + "epoch": 0.9278554604330208, + "grad_norm": 164.79388427734375, + "learning_rate": 2.0028831505924162e-07, + "loss": 20.3254, + "step": 459320 + }, + { + "epoch": 0.9278756610657046, + "grad_norm": 446.665283203125, + "learning_rate": 2.0019051933330204e-07, + "loss": 15.017, + "step": 459330 + }, + { + "epoch": 0.9278958616983884, + "grad_norm": 85.39521789550781, + "learning_rate": 2.000927470010511e-07, + "loss": 10.0591, + "step": 459340 + }, + { + "epoch": 0.9279160623310722, + "grad_norm": 9.294930458068848, + "learning_rate": 1.9999499806296674e-07, + "loss": 12.9359, + "step": 459350 + }, + { + "epoch": 0.927936262963756, + "grad_norm": 286.6814270019531, + "learning_rate": 1.9989727251952418e-07, + "loss": 25.4458, + "step": 459360 + }, + { + "epoch": 0.9279564635964398, + "grad_norm": 530.6024780273438, + "learning_rate": 1.9979957037120078e-07, + "loss": 20.9247, + "step": 459370 + }, + { + "epoch": 0.9279766642291236, + "grad_norm": 336.87677001953125, + "learning_rate": 1.9970189161847175e-07, + "loss": 16.0934, + "step": 459380 + }, + { + "epoch": 0.9279968648618074, + "grad_norm": 95.1301040649414, + "learning_rate": 1.996042362618128e-07, + "loss": 17.521, + "step": 459390 + }, + { + "epoch": 0.9280170654944913, + "grad_norm": 198.83937072753906, + "learning_rate": 1.995066043017013e-07, + "loss": 39.8352, + "step": 459400 + }, + { + "epoch": 0.9280372661271751, + "grad_norm": 310.2725524902344, + "learning_rate": 1.9940899573861195e-07, + "loss": 13.0402, + "step": 459410 + }, + { + "epoch": 0.9280574667598589, + "grad_norm": 279.5280456542969, + "learning_rate": 1.993114105730215e-07, + "loss": 20.8009, + "step": 459420 + }, + { + "epoch": 0.9280776673925427, + "grad_norm": 207.68809509277344, + "learning_rate": 1.9921384880540406e-07, + "loss": 16.6055, + "step": 459430 + }, + { + "epoch": 0.9280978680252265, + "grad_norm": 276.0572814941406, + "learning_rate": 1.9911631043623704e-07, + "loss": 16.8136, + "step": 459440 + }, + { + "epoch": 0.9281180686579104, + "grad_norm": 249.64663696289062, + "learning_rate": 1.99018795465995e-07, + "loss": 16.5146, + "step": 459450 + }, + { + "epoch": 0.9281382692905942, + "grad_norm": 247.5889129638672, + "learning_rate": 1.9892130389515207e-07, + "loss": 12.4523, + "step": 459460 + }, + { + "epoch": 0.928158469923278, + "grad_norm": 97.71456909179688, + "learning_rate": 1.9882383572418508e-07, + "loss": 8.6723, + "step": 459470 + }, + { + "epoch": 0.9281786705559618, + "grad_norm": 687.3977661132812, + "learning_rate": 1.987263909535686e-07, + "loss": 24.0275, + "step": 459480 + }, + { + "epoch": 0.9281988711886456, + "grad_norm": 395.609375, + "learning_rate": 1.986289695837762e-07, + "loss": 25.2735, + "step": 459490 + }, + { + "epoch": 0.9282190718213295, + "grad_norm": 325.3480529785156, + "learning_rate": 1.9853157161528468e-07, + "loss": 15.8198, + "step": 459500 + }, + { + "epoch": 0.9282392724540133, + "grad_norm": 106.27111053466797, + "learning_rate": 1.984341970485687e-07, + "loss": 10.0832, + "step": 459510 + }, + { + "epoch": 0.9282594730866971, + "grad_norm": 172.36770629882812, + "learning_rate": 1.9833684588410062e-07, + "loss": 16.7191, + "step": 459520 + }, + { + "epoch": 0.9282796737193809, + "grad_norm": 913.9033203125, + "learning_rate": 1.9823951812235675e-07, + "loss": 33.0155, + "step": 459530 + }, + { + "epoch": 0.9282998743520647, + "grad_norm": 512.4244995117188, + "learning_rate": 1.981422137638117e-07, + "loss": 25.0951, + "step": 459540 + }, + { + "epoch": 0.9283200749847486, + "grad_norm": 178.0298614501953, + "learning_rate": 1.98044932808939e-07, + "loss": 18.1507, + "step": 459550 + }, + { + "epoch": 0.9283402756174324, + "grad_norm": 0.9513123035430908, + "learning_rate": 1.9794767525821212e-07, + "loss": 13.7926, + "step": 459560 + }, + { + "epoch": 0.9283604762501162, + "grad_norm": 322.00299072265625, + "learning_rate": 1.9785044111210627e-07, + "loss": 13.1647, + "step": 459570 + }, + { + "epoch": 0.9283806768828, + "grad_norm": 391.02349853515625, + "learning_rate": 1.977532303710955e-07, + "loss": 18.097, + "step": 459580 + }, + { + "epoch": 0.9284008775154838, + "grad_norm": 600.1901245117188, + "learning_rate": 1.9765604303565223e-07, + "loss": 22.4555, + "step": 459590 + }, + { + "epoch": 0.9284210781481677, + "grad_norm": 70.98042297363281, + "learning_rate": 1.9755887910625103e-07, + "loss": 17.0909, + "step": 459600 + }, + { + "epoch": 0.9284412787808514, + "grad_norm": 367.9173583984375, + "learning_rate": 1.9746173858336604e-07, + "loss": 17.8391, + "step": 459610 + }, + { + "epoch": 0.9284614794135352, + "grad_norm": 32.95124053955078, + "learning_rate": 1.9736462146747015e-07, + "loss": 15.9649, + "step": 459620 + }, + { + "epoch": 0.928481680046219, + "grad_norm": 267.5936584472656, + "learning_rate": 1.972675277590358e-07, + "loss": 15.8187, + "step": 459630 + }, + { + "epoch": 0.9285018806789028, + "grad_norm": 231.7164764404297, + "learning_rate": 1.9717045745853758e-07, + "loss": 13.5195, + "step": 459640 + }, + { + "epoch": 0.9285220813115866, + "grad_norm": 308.99993896484375, + "learning_rate": 1.9707341056644737e-07, + "loss": 22.4319, + "step": 459650 + }, + { + "epoch": 0.9285422819442705, + "grad_norm": 252.5726318359375, + "learning_rate": 1.9697638708323918e-07, + "loss": 15.8626, + "step": 459660 + }, + { + "epoch": 0.9285624825769543, + "grad_norm": 285.3663330078125, + "learning_rate": 1.9687938700938602e-07, + "loss": 39.4787, + "step": 459670 + }, + { + "epoch": 0.9285826832096381, + "grad_norm": 192.2904052734375, + "learning_rate": 1.967824103453597e-07, + "loss": 29.4373, + "step": 459680 + }, + { + "epoch": 0.9286028838423219, + "grad_norm": 340.7438659667969, + "learning_rate": 1.9668545709163378e-07, + "loss": 14.9998, + "step": 459690 + }, + { + "epoch": 0.9286230844750057, + "grad_norm": 413.1592712402344, + "learning_rate": 1.9658852724868005e-07, + "loss": 23.5365, + "step": 459700 + }, + { + "epoch": 0.9286432851076896, + "grad_norm": 412.8314208984375, + "learning_rate": 1.9649162081697094e-07, + "loss": 26.8419, + "step": 459710 + }, + { + "epoch": 0.9286634857403734, + "grad_norm": 320.90557861328125, + "learning_rate": 1.963947377969788e-07, + "loss": 12.5075, + "step": 459720 + }, + { + "epoch": 0.9286836863730572, + "grad_norm": 271.4992370605469, + "learning_rate": 1.9629787818917722e-07, + "loss": 17.1874, + "step": 459730 + }, + { + "epoch": 0.928703887005741, + "grad_norm": 39.38309097290039, + "learning_rate": 1.9620104199403688e-07, + "loss": 16.7725, + "step": 459740 + }, + { + "epoch": 0.9287240876384248, + "grad_norm": 208.1249542236328, + "learning_rate": 1.961042292120291e-07, + "loss": 13.5535, + "step": 459750 + }, + { + "epoch": 0.9287442882711087, + "grad_norm": 178.13792419433594, + "learning_rate": 1.9600743984362792e-07, + "loss": 14.1426, + "step": 459760 + }, + { + "epoch": 0.9287644889037925, + "grad_norm": 226.3883056640625, + "learning_rate": 1.959106738893035e-07, + "loss": 11.4676, + "step": 459770 + }, + { + "epoch": 0.9287846895364763, + "grad_norm": 127.52801513671875, + "learning_rate": 1.958139313495272e-07, + "loss": 16.1323, + "step": 459780 + }, + { + "epoch": 0.9288048901691601, + "grad_norm": 0.7948424816131592, + "learning_rate": 1.957172122247708e-07, + "loss": 31.0293, + "step": 459790 + }, + { + "epoch": 0.9288250908018439, + "grad_norm": 191.9970703125, + "learning_rate": 1.9562051651550784e-07, + "loss": 13.3462, + "step": 459800 + }, + { + "epoch": 0.9288452914345278, + "grad_norm": 117.13938903808594, + "learning_rate": 1.9552384422220627e-07, + "loss": 13.4455, + "step": 459810 + }, + { + "epoch": 0.9288654920672116, + "grad_norm": 305.8866882324219, + "learning_rate": 1.954271953453385e-07, + "loss": 19.3293, + "step": 459820 + }, + { + "epoch": 0.9288856926998954, + "grad_norm": 302.30975341796875, + "learning_rate": 1.953305698853769e-07, + "loss": 26.1453, + "step": 459830 + }, + { + "epoch": 0.9289058933325792, + "grad_norm": 486.2826232910156, + "learning_rate": 1.9523396784279114e-07, + "loss": 24.521, + "step": 459840 + }, + { + "epoch": 0.928926093965263, + "grad_norm": 443.88958740234375, + "learning_rate": 1.9513738921805192e-07, + "loss": 16.7113, + "step": 459850 + }, + { + "epoch": 0.9289462945979468, + "grad_norm": 228.5526580810547, + "learning_rate": 1.9504083401162999e-07, + "loss": 21.6349, + "step": 459860 + }, + { + "epoch": 0.9289664952306306, + "grad_norm": 313.4875183105469, + "learning_rate": 1.9494430222399774e-07, + "loss": 18.5073, + "step": 459870 + }, + { + "epoch": 0.9289866958633144, + "grad_norm": 124.33599090576172, + "learning_rate": 1.948477938556226e-07, + "loss": 14.3797, + "step": 459880 + }, + { + "epoch": 0.9290068964959982, + "grad_norm": 159.2218017578125, + "learning_rate": 1.9475130890697691e-07, + "loss": 31.771, + "step": 459890 + }, + { + "epoch": 0.929027097128682, + "grad_norm": 254.6146697998047, + "learning_rate": 1.9465484737853092e-07, + "loss": 21.2994, + "step": 459900 + }, + { + "epoch": 0.9290472977613659, + "grad_norm": 213.3077392578125, + "learning_rate": 1.945584092707542e-07, + "loss": 12.993, + "step": 459910 + }, + { + "epoch": 0.9290674983940497, + "grad_norm": 505.9075622558594, + "learning_rate": 1.944619945841164e-07, + "loss": 11.2377, + "step": 459920 + }, + { + "epoch": 0.9290876990267335, + "grad_norm": 266.4447326660156, + "learning_rate": 1.9436560331908882e-07, + "loss": 9.0777, + "step": 459930 + }, + { + "epoch": 0.9291078996594173, + "grad_norm": 251.5564727783203, + "learning_rate": 1.9426923547614052e-07, + "loss": 10.4074, + "step": 459940 + }, + { + "epoch": 0.9291281002921011, + "grad_norm": 643.8529663085938, + "learning_rate": 1.9417289105574054e-07, + "loss": 29.6219, + "step": 459950 + }, + { + "epoch": 0.929148300924785, + "grad_norm": 226.2017364501953, + "learning_rate": 1.9407657005835967e-07, + "loss": 20.2664, + "step": 459960 + }, + { + "epoch": 0.9291685015574688, + "grad_norm": 149.39573669433594, + "learning_rate": 1.9398027248446582e-07, + "loss": 9.7155, + "step": 459970 + }, + { + "epoch": 0.9291887021901526, + "grad_norm": 318.2593078613281, + "learning_rate": 1.9388399833452974e-07, + "loss": 19.1509, + "step": 459980 + }, + { + "epoch": 0.9292089028228364, + "grad_norm": 283.86065673828125, + "learning_rate": 1.9378774760902052e-07, + "loss": 12.8305, + "step": 459990 + }, + { + "epoch": 0.9292291034555202, + "grad_norm": 132.25704956054688, + "learning_rate": 1.9369152030840553e-07, + "loss": 16.9925, + "step": 460000 + }, + { + "epoch": 0.929249304088204, + "grad_norm": 290.5444030761719, + "learning_rate": 1.9359531643315665e-07, + "loss": 16.6173, + "step": 460010 + }, + { + "epoch": 0.9292695047208879, + "grad_norm": 552.2373657226562, + "learning_rate": 1.9349913598374014e-07, + "loss": 16.1302, + "step": 460020 + }, + { + "epoch": 0.9292897053535717, + "grad_norm": 467.9726867675781, + "learning_rate": 1.9340297896062676e-07, + "loss": 14.7202, + "step": 460030 + }, + { + "epoch": 0.9293099059862555, + "grad_norm": 533.1699829101562, + "learning_rate": 1.9330684536428335e-07, + "loss": 17.7331, + "step": 460040 + }, + { + "epoch": 0.9293301066189393, + "grad_norm": 470.14697265625, + "learning_rate": 1.9321073519518007e-07, + "loss": 19.9507, + "step": 460050 + }, + { + "epoch": 0.9293503072516232, + "grad_norm": 25.874284744262695, + "learning_rate": 1.9311464845378492e-07, + "loss": 12.7159, + "step": 460060 + }, + { + "epoch": 0.929370507884307, + "grad_norm": 529.6058959960938, + "learning_rate": 1.9301858514056527e-07, + "loss": 9.6371, + "step": 460070 + }, + { + "epoch": 0.9293907085169908, + "grad_norm": 342.6182861328125, + "learning_rate": 1.9292254525599075e-07, + "loss": 18.3621, + "step": 460080 + }, + { + "epoch": 0.9294109091496746, + "grad_norm": 304.01837158203125, + "learning_rate": 1.928265288005282e-07, + "loss": 19.4114, + "step": 460090 + }, + { + "epoch": 0.9294311097823584, + "grad_norm": 290.5, + "learning_rate": 1.927305357746462e-07, + "loss": 15.1211, + "step": 460100 + }, + { + "epoch": 0.9294513104150423, + "grad_norm": 215.43443298339844, + "learning_rate": 1.9263456617881203e-07, + "loss": 10.2721, + "step": 460110 + }, + { + "epoch": 0.929471511047726, + "grad_norm": 286.6866149902344, + "learning_rate": 1.9253862001349543e-07, + "loss": 13.1009, + "step": 460120 + }, + { + "epoch": 0.9294917116804098, + "grad_norm": 289.4507751464844, + "learning_rate": 1.9244269727916097e-07, + "loss": 10.1294, + "step": 460130 + }, + { + "epoch": 0.9295119123130936, + "grad_norm": 162.1569061279297, + "learning_rate": 1.9234679797627832e-07, + "loss": 13.3979, + "step": 460140 + }, + { + "epoch": 0.9295321129457774, + "grad_norm": 197.26498413085938, + "learning_rate": 1.9225092210531425e-07, + "loss": 20.3173, + "step": 460150 + }, + { + "epoch": 0.9295523135784612, + "grad_norm": 428.6112976074219, + "learning_rate": 1.9215506966673624e-07, + "loss": 23.4347, + "step": 460160 + }, + { + "epoch": 0.9295725142111451, + "grad_norm": 280.0367736816406, + "learning_rate": 1.9205924066101057e-07, + "loss": 10.5209, + "step": 460170 + }, + { + "epoch": 0.9295927148438289, + "grad_norm": 409.23406982421875, + "learning_rate": 1.9196343508860515e-07, + "loss": 20.4145, + "step": 460180 + }, + { + "epoch": 0.9296129154765127, + "grad_norm": 360.54339599609375, + "learning_rate": 1.9186765294998855e-07, + "loss": 17.5262, + "step": 460190 + }, + { + "epoch": 0.9296331161091965, + "grad_norm": 259.7884826660156, + "learning_rate": 1.917718942456237e-07, + "loss": 15.4444, + "step": 460200 + }, + { + "epoch": 0.9296533167418803, + "grad_norm": 273.5634460449219, + "learning_rate": 1.9167615897598023e-07, + "loss": 13.764, + "step": 460210 + }, + { + "epoch": 0.9296735173745642, + "grad_norm": 379.6659851074219, + "learning_rate": 1.9158044714152447e-07, + "loss": 23.9401, + "step": 460220 + }, + { + "epoch": 0.929693718007248, + "grad_norm": 0.0, + "learning_rate": 1.914847587427221e-07, + "loss": 13.5406, + "step": 460230 + }, + { + "epoch": 0.9297139186399318, + "grad_norm": 224.84014892578125, + "learning_rate": 1.9138909378003946e-07, + "loss": 16.305, + "step": 460240 + }, + { + "epoch": 0.9297341192726156, + "grad_norm": 384.63299560546875, + "learning_rate": 1.9129345225394335e-07, + "loss": 18.553, + "step": 460250 + }, + { + "epoch": 0.9297543199052994, + "grad_norm": 501.1186218261719, + "learning_rate": 1.9119783416490013e-07, + "loss": 26.2674, + "step": 460260 + }, + { + "epoch": 0.9297745205379833, + "grad_norm": 20.195629119873047, + "learning_rate": 1.9110223951337492e-07, + "loss": 17.3952, + "step": 460270 + }, + { + "epoch": 0.9297947211706671, + "grad_norm": 159.2061309814453, + "learning_rate": 1.910066682998346e-07, + "loss": 20.4762, + "step": 460280 + }, + { + "epoch": 0.9298149218033509, + "grad_norm": 201.14389038085938, + "learning_rate": 1.909111205247438e-07, + "loss": 22.9978, + "step": 460290 + }, + { + "epoch": 0.9298351224360347, + "grad_norm": 169.78372192382812, + "learning_rate": 1.9081559618856938e-07, + "loss": 27.0752, + "step": 460300 + }, + { + "epoch": 0.9298553230687185, + "grad_norm": 322.01409912109375, + "learning_rate": 1.907200952917765e-07, + "loss": 23.7841, + "step": 460310 + }, + { + "epoch": 0.9298755237014024, + "grad_norm": 245.37893676757812, + "learning_rate": 1.9062461783483034e-07, + "loss": 12.6225, + "step": 460320 + }, + { + "epoch": 0.9298957243340862, + "grad_norm": 620.7093505859375, + "learning_rate": 1.9052916381819664e-07, + "loss": 20.2819, + "step": 460330 + }, + { + "epoch": 0.92991592496677, + "grad_norm": 0.0, + "learning_rate": 1.904337332423406e-07, + "loss": 25.589, + "step": 460340 + }, + { + "epoch": 0.9299361255994538, + "grad_norm": 251.79603576660156, + "learning_rate": 1.903383261077274e-07, + "loss": 22.6557, + "step": 460350 + }, + { + "epoch": 0.9299563262321376, + "grad_norm": 5.498881816864014, + "learning_rate": 1.9024294241482112e-07, + "loss": 15.9323, + "step": 460360 + }, + { + "epoch": 0.9299765268648214, + "grad_norm": 31.50300407409668, + "learning_rate": 1.9014758216408803e-07, + "loss": 19.4993, + "step": 460370 + }, + { + "epoch": 0.9299967274975052, + "grad_norm": 26.68342399597168, + "learning_rate": 1.900522453559922e-07, + "loss": 16.4608, + "step": 460380 + }, + { + "epoch": 0.930016928130189, + "grad_norm": 1415.607666015625, + "learning_rate": 1.899569319909983e-07, + "loss": 35.6689, + "step": 460390 + }, + { + "epoch": 0.9300371287628728, + "grad_norm": 118.35054779052734, + "learning_rate": 1.8986164206957037e-07, + "loss": 15.2918, + "step": 460400 + }, + { + "epoch": 0.9300573293955566, + "grad_norm": 537.1720581054688, + "learning_rate": 1.897663755921747e-07, + "loss": 24.4098, + "step": 460410 + }, + { + "epoch": 0.9300775300282405, + "grad_norm": 219.6941375732422, + "learning_rate": 1.8967113255927315e-07, + "loss": 17.4501, + "step": 460420 + }, + { + "epoch": 0.9300977306609243, + "grad_norm": 265.08026123046875, + "learning_rate": 1.8957591297133093e-07, + "loss": 9.4662, + "step": 460430 + }, + { + "epoch": 0.9301179312936081, + "grad_norm": 327.1904296875, + "learning_rate": 1.894807168288132e-07, + "loss": 16.3948, + "step": 460440 + }, + { + "epoch": 0.9301381319262919, + "grad_norm": 98.9393081665039, + "learning_rate": 1.8938554413218292e-07, + "loss": 13.5331, + "step": 460450 + }, + { + "epoch": 0.9301583325589757, + "grad_norm": 140.4616241455078, + "learning_rate": 1.8929039488190304e-07, + "loss": 8.2329, + "step": 460460 + }, + { + "epoch": 0.9301785331916596, + "grad_norm": 0.0, + "learning_rate": 1.8919526907843876e-07, + "loss": 39.6257, + "step": 460470 + }, + { + "epoch": 0.9301987338243434, + "grad_norm": 42.32804870605469, + "learning_rate": 1.8910016672225418e-07, + "loss": 14.0256, + "step": 460480 + }, + { + "epoch": 0.9302189344570272, + "grad_norm": 269.3990478515625, + "learning_rate": 1.8900508781381056e-07, + "loss": 13.5953, + "step": 460490 + }, + { + "epoch": 0.930239135089711, + "grad_norm": 116.81532287597656, + "learning_rate": 1.8891003235357307e-07, + "loss": 18.2773, + "step": 460500 + }, + { + "epoch": 0.9302593357223948, + "grad_norm": 340.3694152832031, + "learning_rate": 1.8881500034200473e-07, + "loss": 15.3587, + "step": 460510 + }, + { + "epoch": 0.9302795363550787, + "grad_norm": 326.3700866699219, + "learning_rate": 1.88719991779569e-07, + "loss": 41.7643, + "step": 460520 + }, + { + "epoch": 0.9302997369877625, + "grad_norm": 15.594130516052246, + "learning_rate": 1.8862500666672778e-07, + "loss": 9.7799, + "step": 460530 + }, + { + "epoch": 0.9303199376204463, + "grad_norm": 443.0193176269531, + "learning_rate": 1.8853004500394512e-07, + "loss": 19.7495, + "step": 460540 + }, + { + "epoch": 0.9303401382531301, + "grad_norm": 11.327974319458008, + "learning_rate": 1.8843510679168341e-07, + "loss": 12.4272, + "step": 460550 + }, + { + "epoch": 0.9303603388858139, + "grad_norm": 5.238090515136719, + "learning_rate": 1.883401920304051e-07, + "loss": 10.2333, + "step": 460560 + }, + { + "epoch": 0.9303805395184978, + "grad_norm": 352.72735595703125, + "learning_rate": 1.8824530072057369e-07, + "loss": 24.7441, + "step": 460570 + }, + { + "epoch": 0.9304007401511816, + "grad_norm": 215.92539978027344, + "learning_rate": 1.8815043286265044e-07, + "loss": 18.3635, + "step": 460580 + }, + { + "epoch": 0.9304209407838654, + "grad_norm": 146.20742797851562, + "learning_rate": 1.8805558845709894e-07, + "loss": 10.1945, + "step": 460590 + }, + { + "epoch": 0.9304411414165492, + "grad_norm": 772.099609375, + "learning_rate": 1.8796076750438096e-07, + "loss": 27.2361, + "step": 460600 + }, + { + "epoch": 0.930461342049233, + "grad_norm": 34.058345794677734, + "learning_rate": 1.878659700049579e-07, + "loss": 19.6448, + "step": 460610 + }, + { + "epoch": 0.9304815426819169, + "grad_norm": 573.7548217773438, + "learning_rate": 1.8777119595929315e-07, + "loss": 14.8042, + "step": 460620 + }, + { + "epoch": 0.9305017433146006, + "grad_norm": 367.4371032714844, + "learning_rate": 1.8767644536784703e-07, + "loss": 16.0438, + "step": 460630 + }, + { + "epoch": 0.9305219439472844, + "grad_norm": 359.34515380859375, + "learning_rate": 1.8758171823108295e-07, + "loss": 12.4917, + "step": 460640 + }, + { + "epoch": 0.9305421445799682, + "grad_norm": 221.7831573486328, + "learning_rate": 1.874870145494617e-07, + "loss": 24.6353, + "step": 460650 + }, + { + "epoch": 0.930562345212652, + "grad_norm": 277.5025329589844, + "learning_rate": 1.8739233432344518e-07, + "loss": 17.8756, + "step": 460660 + }, + { + "epoch": 0.9305825458453358, + "grad_norm": 67.49771118164062, + "learning_rate": 1.8729767755349514e-07, + "loss": 17.9026, + "step": 460670 + }, + { + "epoch": 0.9306027464780197, + "grad_norm": 676.9361572265625, + "learning_rate": 1.872030442400713e-07, + "loss": 30.2501, + "step": 460680 + }, + { + "epoch": 0.9306229471107035, + "grad_norm": 407.73931884765625, + "learning_rate": 1.8710843438363713e-07, + "loss": 35.4802, + "step": 460690 + }, + { + "epoch": 0.9306431477433873, + "grad_norm": 0.0, + "learning_rate": 1.8701384798465284e-07, + "loss": 13.346, + "step": 460700 + }, + { + "epoch": 0.9306633483760711, + "grad_norm": 160.38052368164062, + "learning_rate": 1.8691928504357858e-07, + "loss": 23.4906, + "step": 460710 + }, + { + "epoch": 0.9306835490087549, + "grad_norm": 201.43923950195312, + "learning_rate": 1.868247455608757e-07, + "loss": 10.0047, + "step": 460720 + }, + { + "epoch": 0.9307037496414388, + "grad_norm": 235.8616180419922, + "learning_rate": 1.867302295370066e-07, + "loss": 14.4985, + "step": 460730 + }, + { + "epoch": 0.9307239502741226, + "grad_norm": 232.42117309570312, + "learning_rate": 1.8663573697242977e-07, + "loss": 16.334, + "step": 460740 + }, + { + "epoch": 0.9307441509068064, + "grad_norm": 206.00790405273438, + "learning_rate": 1.8654126786760597e-07, + "loss": 33.1476, + "step": 460750 + }, + { + "epoch": 0.9307643515394902, + "grad_norm": 0.0, + "learning_rate": 1.8644682222299703e-07, + "loss": 10.1404, + "step": 460760 + }, + { + "epoch": 0.930784552172174, + "grad_norm": 74.71009063720703, + "learning_rate": 1.8635240003906264e-07, + "loss": 13.2874, + "step": 460770 + }, + { + "epoch": 0.9308047528048579, + "grad_norm": 175.77464294433594, + "learning_rate": 1.8625800131626236e-07, + "loss": 31.2075, + "step": 460780 + }, + { + "epoch": 0.9308249534375417, + "grad_norm": 94.30030822753906, + "learning_rate": 1.8616362605505645e-07, + "loss": 7.6412, + "step": 460790 + }, + { + "epoch": 0.9308451540702255, + "grad_norm": 395.0694580078125, + "learning_rate": 1.8606927425590616e-07, + "loss": 21.654, + "step": 460800 + }, + { + "epoch": 0.9308653547029093, + "grad_norm": 105.0181655883789, + "learning_rate": 1.8597494591926946e-07, + "loss": 18.7361, + "step": 460810 + }, + { + "epoch": 0.9308855553355931, + "grad_norm": 482.18182373046875, + "learning_rate": 1.858806410456071e-07, + "loss": 16.3657, + "step": 460820 + }, + { + "epoch": 0.930905755968277, + "grad_norm": 189.46099853515625, + "learning_rate": 1.8578635963537926e-07, + "loss": 11.8485, + "step": 460830 + }, + { + "epoch": 0.9309259566009608, + "grad_norm": 264.17681884765625, + "learning_rate": 1.856921016890445e-07, + "loss": 18.5469, + "step": 460840 + }, + { + "epoch": 0.9309461572336446, + "grad_norm": 228.68739318847656, + "learning_rate": 1.8559786720706185e-07, + "loss": 15.871, + "step": 460850 + }, + { + "epoch": 0.9309663578663284, + "grad_norm": 527.9981079101562, + "learning_rate": 1.8550365618989207e-07, + "loss": 17.0557, + "step": 460860 + }, + { + "epoch": 0.9309865584990122, + "grad_norm": 479.7044372558594, + "learning_rate": 1.854094686379937e-07, + "loss": 21.1745, + "step": 460870 + }, + { + "epoch": 0.9310067591316961, + "grad_norm": 446.8087158203125, + "learning_rate": 1.8531530455182522e-07, + "loss": 13.7563, + "step": 460880 + }, + { + "epoch": 0.9310269597643798, + "grad_norm": 10.297074317932129, + "learning_rate": 1.852211639318463e-07, + "loss": 7.3226, + "step": 460890 + }, + { + "epoch": 0.9310471603970636, + "grad_norm": 135.11688232421875, + "learning_rate": 1.8512704677851489e-07, + "loss": 7.8177, + "step": 460900 + }, + { + "epoch": 0.9310673610297474, + "grad_norm": 119.65900421142578, + "learning_rate": 1.8503295309229065e-07, + "loss": 9.6999, + "step": 460910 + }, + { + "epoch": 0.9310875616624312, + "grad_norm": 344.15252685546875, + "learning_rate": 1.8493888287363148e-07, + "loss": 17.9117, + "step": 460920 + }, + { + "epoch": 0.931107762295115, + "grad_norm": 69.01956939697266, + "learning_rate": 1.8484483612299654e-07, + "loss": 24.0062, + "step": 460930 + }, + { + "epoch": 0.9311279629277989, + "grad_norm": 87.6671142578125, + "learning_rate": 1.8475081284084428e-07, + "loss": 15.6079, + "step": 460940 + }, + { + "epoch": 0.9311481635604827, + "grad_norm": 133.2266082763672, + "learning_rate": 1.846568130276316e-07, + "loss": 18.3529, + "step": 460950 + }, + { + "epoch": 0.9311683641931665, + "grad_norm": 206.53697204589844, + "learning_rate": 1.8456283668381814e-07, + "loss": 13.5257, + "step": 460960 + }, + { + "epoch": 0.9311885648258503, + "grad_norm": 147.25564575195312, + "learning_rate": 1.8446888380986128e-07, + "loss": 24.4171, + "step": 460970 + }, + { + "epoch": 0.9312087654585341, + "grad_norm": 197.21304321289062, + "learning_rate": 1.84374954406219e-07, + "loss": 19.9227, + "step": 460980 + }, + { + "epoch": 0.931228966091218, + "grad_norm": 378.8609313964844, + "learning_rate": 1.8428104847334927e-07, + "loss": 8.0765, + "step": 460990 + }, + { + "epoch": 0.9312491667239018, + "grad_norm": 363.6342468261719, + "learning_rate": 1.841871660117095e-07, + "loss": 15.0018, + "step": 461000 + }, + { + "epoch": 0.9312693673565856, + "grad_norm": 473.4636535644531, + "learning_rate": 1.8409330702175764e-07, + "loss": 19.226, + "step": 461010 + }, + { + "epoch": 0.9312895679892694, + "grad_norm": 82.45626068115234, + "learning_rate": 1.8399947150395058e-07, + "loss": 25.1138, + "step": 461020 + }, + { + "epoch": 0.9313097686219532, + "grad_norm": 257.804931640625, + "learning_rate": 1.8390565945874572e-07, + "loss": 18.8037, + "step": 461030 + }, + { + "epoch": 0.9313299692546371, + "grad_norm": 229.63601684570312, + "learning_rate": 1.8381187088660046e-07, + "loss": 19.9147, + "step": 461040 + }, + { + "epoch": 0.9313501698873209, + "grad_norm": 125.4647445678711, + "learning_rate": 1.8371810578797277e-07, + "loss": 13.1107, + "step": 461050 + }, + { + "epoch": 0.9313703705200047, + "grad_norm": 153.53598022460938, + "learning_rate": 1.8362436416331896e-07, + "loss": 13.5738, + "step": 461060 + }, + { + "epoch": 0.9313905711526885, + "grad_norm": 239.79486083984375, + "learning_rate": 1.8353064601309533e-07, + "loss": 12.1175, + "step": 461070 + }, + { + "epoch": 0.9314107717853723, + "grad_norm": 54.68346405029297, + "learning_rate": 1.8343695133775874e-07, + "loss": 26.0745, + "step": 461080 + }, + { + "epoch": 0.9314309724180562, + "grad_norm": 222.83460998535156, + "learning_rate": 1.833432801377677e-07, + "loss": 17.1467, + "step": 461090 + }, + { + "epoch": 0.93145117305074, + "grad_norm": 157.41082763671875, + "learning_rate": 1.832496324135763e-07, + "loss": 12.3532, + "step": 461100 + }, + { + "epoch": 0.9314713736834238, + "grad_norm": 325.1773376464844, + "learning_rate": 1.8315600816564137e-07, + "loss": 20.4668, + "step": 461110 + }, + { + "epoch": 0.9314915743161076, + "grad_norm": 166.11477661132812, + "learning_rate": 1.8306240739442094e-07, + "loss": 17.5134, + "step": 461120 + }, + { + "epoch": 0.9315117749487914, + "grad_norm": 294.5464172363281, + "learning_rate": 1.8296883010037014e-07, + "loss": 17.9774, + "step": 461130 + }, + { + "epoch": 0.9315319755814752, + "grad_norm": 73.10298919677734, + "learning_rate": 1.8287527628394418e-07, + "loss": 9.8741, + "step": 461140 + }, + { + "epoch": 0.931552176214159, + "grad_norm": 131.22254943847656, + "learning_rate": 1.827817459456005e-07, + "loss": 11.0736, + "step": 461150 + }, + { + "epoch": 0.9315723768468428, + "grad_norm": 190.13491821289062, + "learning_rate": 1.826882390857948e-07, + "loss": 18.4156, + "step": 461160 + }, + { + "epoch": 0.9315925774795266, + "grad_norm": 157.9766082763672, + "learning_rate": 1.825947557049812e-07, + "loss": 22.2857, + "step": 461170 + }, + { + "epoch": 0.9316127781122104, + "grad_norm": 460.1831359863281, + "learning_rate": 1.825012958036171e-07, + "loss": 34.7807, + "step": 461180 + }, + { + "epoch": 0.9316329787448943, + "grad_norm": 365.3998107910156, + "learning_rate": 1.824078593821571e-07, + "loss": 14.7511, + "step": 461190 + }, + { + "epoch": 0.9316531793775781, + "grad_norm": 48.21858596801758, + "learning_rate": 1.8231444644105755e-07, + "loss": 16.885, + "step": 461200 + }, + { + "epoch": 0.9316733800102619, + "grad_norm": 498.6594543457031, + "learning_rate": 1.8222105698077253e-07, + "loss": 26.9817, + "step": 461210 + }, + { + "epoch": 0.9316935806429457, + "grad_norm": 207.6343994140625, + "learning_rate": 1.8212769100175774e-07, + "loss": 22.9423, + "step": 461220 + }, + { + "epoch": 0.9317137812756295, + "grad_norm": 387.0116882324219, + "learning_rate": 1.8203434850446844e-07, + "loss": 21.9838, + "step": 461230 + }, + { + "epoch": 0.9317339819083134, + "grad_norm": 267.20263671875, + "learning_rate": 1.819410294893592e-07, + "loss": 26.3953, + "step": 461240 + }, + { + "epoch": 0.9317541825409972, + "grad_norm": 5.025315761566162, + "learning_rate": 1.8184773395688527e-07, + "loss": 11.4558, + "step": 461250 + }, + { + "epoch": 0.931774383173681, + "grad_norm": 388.8743896484375, + "learning_rate": 1.8175446190750068e-07, + "loss": 18.0609, + "step": 461260 + }, + { + "epoch": 0.9317945838063648, + "grad_norm": 442.39404296875, + "learning_rate": 1.816612133416612e-07, + "loss": 14.8389, + "step": 461270 + }, + { + "epoch": 0.9318147844390486, + "grad_norm": 177.66726684570312, + "learning_rate": 1.8156798825982035e-07, + "loss": 9.7187, + "step": 461280 + }, + { + "epoch": 0.9318349850717325, + "grad_norm": 203.19876098632812, + "learning_rate": 1.814747866624317e-07, + "loss": 17.6691, + "step": 461290 + }, + { + "epoch": 0.9318551857044163, + "grad_norm": 280.1035461425781, + "learning_rate": 1.8138160854995145e-07, + "loss": 23.1074, + "step": 461300 + }, + { + "epoch": 0.9318753863371001, + "grad_norm": 302.52130126953125, + "learning_rate": 1.8128845392283324e-07, + "loss": 14.5102, + "step": 461310 + }, + { + "epoch": 0.9318955869697839, + "grad_norm": 468.0241394042969, + "learning_rate": 1.8119532278152996e-07, + "loss": 28.7969, + "step": 461320 + }, + { + "epoch": 0.9319157876024677, + "grad_norm": 332.97210693359375, + "learning_rate": 1.8110221512649573e-07, + "loss": 14.2433, + "step": 461330 + }, + { + "epoch": 0.9319359882351516, + "grad_norm": 398.02874755859375, + "learning_rate": 1.8100913095818627e-07, + "loss": 21.8113, + "step": 461340 + }, + { + "epoch": 0.9319561888678354, + "grad_norm": 149.2668914794922, + "learning_rate": 1.8091607027705293e-07, + "loss": 11.9831, + "step": 461350 + }, + { + "epoch": 0.9319763895005192, + "grad_norm": 362.22430419921875, + "learning_rate": 1.8082303308354976e-07, + "loss": 19.275, + "step": 461360 + }, + { + "epoch": 0.931996590133203, + "grad_norm": 129.23387145996094, + "learning_rate": 1.8073001937813138e-07, + "loss": 16.4116, + "step": 461370 + }, + { + "epoch": 0.9320167907658868, + "grad_norm": 243.48883056640625, + "learning_rate": 1.8063702916125025e-07, + "loss": 9.4262, + "step": 461380 + }, + { + "epoch": 0.9320369913985707, + "grad_norm": 206.71240234375, + "learning_rate": 1.805440624333593e-07, + "loss": 25.4547, + "step": 461390 + }, + { + "epoch": 0.9320571920312544, + "grad_norm": 371.13433837890625, + "learning_rate": 1.804511191949121e-07, + "loss": 18.8169, + "step": 461400 + }, + { + "epoch": 0.9320773926639382, + "grad_norm": 100.96916198730469, + "learning_rate": 1.8035819944636269e-07, + "loss": 25.9949, + "step": 461410 + }, + { + "epoch": 0.932097593296622, + "grad_norm": 169.36326599121094, + "learning_rate": 1.8026530318816183e-07, + "loss": 15.5662, + "step": 461420 + }, + { + "epoch": 0.9321177939293058, + "grad_norm": 107.69503021240234, + "learning_rate": 1.8017243042076304e-07, + "loss": 17.789, + "step": 461430 + }, + { + "epoch": 0.9321379945619896, + "grad_norm": 230.31922912597656, + "learning_rate": 1.8007958114462042e-07, + "loss": 12.5262, + "step": 461440 + }, + { + "epoch": 0.9321581951946735, + "grad_norm": 690.5305786132812, + "learning_rate": 1.7998675536018474e-07, + "loss": 21.3882, + "step": 461450 + }, + { + "epoch": 0.9321783958273573, + "grad_norm": 76.3077163696289, + "learning_rate": 1.7989395306790835e-07, + "loss": 47.8428, + "step": 461460 + }, + { + "epoch": 0.9321985964600411, + "grad_norm": 311.6148376464844, + "learning_rate": 1.798011742682454e-07, + "loss": 14.5941, + "step": 461470 + }, + { + "epoch": 0.9322187970927249, + "grad_norm": 69.86370849609375, + "learning_rate": 1.7970841896164658e-07, + "loss": 28.1239, + "step": 461480 + }, + { + "epoch": 0.9322389977254087, + "grad_norm": 124.4713134765625, + "learning_rate": 1.7961568714856382e-07, + "loss": 11.2964, + "step": 461490 + }, + { + "epoch": 0.9322591983580926, + "grad_norm": 197.35536193847656, + "learning_rate": 1.7952297882945e-07, + "loss": 38.8373, + "step": 461500 + }, + { + "epoch": 0.9322793989907764, + "grad_norm": 0.0, + "learning_rate": 1.7943029400475598e-07, + "loss": 14.5593, + "step": 461510 + }, + { + "epoch": 0.9322995996234602, + "grad_norm": 179.98277282714844, + "learning_rate": 1.7933763267493465e-07, + "loss": 12.0409, + "step": 461520 + }, + { + "epoch": 0.932319800256144, + "grad_norm": 1.978187084197998, + "learning_rate": 1.7924499484043622e-07, + "loss": 15.9934, + "step": 461530 + }, + { + "epoch": 0.9323400008888278, + "grad_norm": 224.9529266357422, + "learning_rate": 1.7915238050171367e-07, + "loss": 18.9983, + "step": 461540 + }, + { + "epoch": 0.9323602015215117, + "grad_norm": 129.3295135498047, + "learning_rate": 1.7905978965921778e-07, + "loss": 17.885, + "step": 461550 + }, + { + "epoch": 0.9323804021541955, + "grad_norm": 401.3939514160156, + "learning_rate": 1.7896722231339925e-07, + "loss": 23.0999, + "step": 461560 + }, + { + "epoch": 0.9324006027868793, + "grad_norm": 139.5451202392578, + "learning_rate": 1.788746784647105e-07, + "loss": 13.566, + "step": 461570 + }, + { + "epoch": 0.9324208034195631, + "grad_norm": 381.82012939453125, + "learning_rate": 1.7878215811360068e-07, + "loss": 18.4742, + "step": 461580 + }, + { + "epoch": 0.932441004052247, + "grad_norm": 375.12969970703125, + "learning_rate": 1.7868966126052323e-07, + "loss": 10.8111, + "step": 461590 + }, + { + "epoch": 0.9324612046849308, + "grad_norm": 171.27349853515625, + "learning_rate": 1.785971879059273e-07, + "loss": 19.8354, + "step": 461600 + }, + { + "epoch": 0.9324814053176146, + "grad_norm": 240.85580444335938, + "learning_rate": 1.7850473805026304e-07, + "loss": 22.4678, + "step": 461610 + }, + { + "epoch": 0.9325016059502984, + "grad_norm": 94.5416030883789, + "learning_rate": 1.7841231169398287e-07, + "loss": 11.4463, + "step": 461620 + }, + { + "epoch": 0.9325218065829822, + "grad_norm": 307.89361572265625, + "learning_rate": 1.7831990883753592e-07, + "loss": 14.7547, + "step": 461630 + }, + { + "epoch": 0.932542007215666, + "grad_norm": 310.5614013671875, + "learning_rate": 1.7822752948137289e-07, + "loss": 13.9369, + "step": 461640 + }, + { + "epoch": 0.9325622078483498, + "grad_norm": 909.2883911132812, + "learning_rate": 1.7813517362594347e-07, + "loss": 25.6955, + "step": 461650 + }, + { + "epoch": 0.9325824084810336, + "grad_norm": 229.96792602539062, + "learning_rate": 1.7804284127169946e-07, + "loss": 7.7572, + "step": 461660 + }, + { + "epoch": 0.9326026091137174, + "grad_norm": 233.24856567382812, + "learning_rate": 1.7795053241908943e-07, + "loss": 10.6594, + "step": 461670 + }, + { + "epoch": 0.9326228097464012, + "grad_norm": 357.04248046875, + "learning_rate": 1.7785824706856303e-07, + "loss": 12.6461, + "step": 461680 + }, + { + "epoch": 0.932643010379085, + "grad_norm": 458.421875, + "learning_rate": 1.7776598522057154e-07, + "loss": 22.4475, + "step": 461690 + }, + { + "epoch": 0.9326632110117689, + "grad_norm": 232.36062622070312, + "learning_rate": 1.7767374687556405e-07, + "loss": 30.5857, + "step": 461700 + }, + { + "epoch": 0.9326834116444527, + "grad_norm": 179.06101989746094, + "learning_rate": 1.7758153203398853e-07, + "loss": 25.9762, + "step": 461710 + }, + { + "epoch": 0.9327036122771365, + "grad_norm": 167.18104553222656, + "learning_rate": 1.774893406962963e-07, + "loss": 22.3963, + "step": 461720 + }, + { + "epoch": 0.9327238129098203, + "grad_norm": 506.8542785644531, + "learning_rate": 1.7739717286293644e-07, + "loss": 13.8257, + "step": 461730 + }, + { + "epoch": 0.9327440135425041, + "grad_norm": 85.00739288330078, + "learning_rate": 1.7730502853435805e-07, + "loss": 13.1455, + "step": 461740 + }, + { + "epoch": 0.932764214175188, + "grad_norm": 122.48938751220703, + "learning_rate": 1.7721290771100964e-07, + "loss": 12.0569, + "step": 461750 + }, + { + "epoch": 0.9327844148078718, + "grad_norm": 143.9090576171875, + "learning_rate": 1.7712081039334083e-07, + "loss": 9.0365, + "step": 461760 + }, + { + "epoch": 0.9328046154405556, + "grad_norm": 34.36774444580078, + "learning_rate": 1.770287365818002e-07, + "loss": 8.9793, + "step": 461770 + }, + { + "epoch": 0.9328248160732394, + "grad_norm": 174.63685607910156, + "learning_rate": 1.7693668627683625e-07, + "loss": 9.9798, + "step": 461780 + }, + { + "epoch": 0.9328450167059232, + "grad_norm": 250.67218017578125, + "learning_rate": 1.7684465947889806e-07, + "loss": 13.5474, + "step": 461790 + }, + { + "epoch": 0.9328652173386071, + "grad_norm": 200.02870178222656, + "learning_rate": 1.7675265618843361e-07, + "loss": 10.7043, + "step": 461800 + }, + { + "epoch": 0.9328854179712909, + "grad_norm": 215.54002380371094, + "learning_rate": 1.7666067640589256e-07, + "loss": 29.935, + "step": 461810 + }, + { + "epoch": 0.9329056186039747, + "grad_norm": 446.4798278808594, + "learning_rate": 1.7656872013172176e-07, + "loss": 19.1518, + "step": 461820 + }, + { + "epoch": 0.9329258192366585, + "grad_norm": 220.8884735107422, + "learning_rate": 1.764767873663703e-07, + "loss": 37.2522, + "step": 461830 + }, + { + "epoch": 0.9329460198693423, + "grad_norm": 381.53521728515625, + "learning_rate": 1.7638487811028616e-07, + "loss": 16.741, + "step": 461840 + }, + { + "epoch": 0.9329662205020262, + "grad_norm": 184.9522705078125, + "learning_rate": 1.7629299236391616e-07, + "loss": 13.5561, + "step": 461850 + }, + { + "epoch": 0.93298642113471, + "grad_norm": 1035.3656005859375, + "learning_rate": 1.7620113012771002e-07, + "loss": 19.5629, + "step": 461860 + }, + { + "epoch": 0.9330066217673938, + "grad_norm": 511.546630859375, + "learning_rate": 1.7610929140211397e-07, + "loss": 24.5877, + "step": 461870 + }, + { + "epoch": 0.9330268224000776, + "grad_norm": 335.9839782714844, + "learning_rate": 1.760174761875766e-07, + "loss": 10.984, + "step": 461880 + }, + { + "epoch": 0.9330470230327614, + "grad_norm": 194.3610382080078, + "learning_rate": 1.7592568448454528e-07, + "loss": 21.0758, + "step": 461890 + }, + { + "epoch": 0.9330672236654453, + "grad_norm": 119.42951202392578, + "learning_rate": 1.758339162934658e-07, + "loss": 20.2449, + "step": 461900 + }, + { + "epoch": 0.933087424298129, + "grad_norm": 446.78759765625, + "learning_rate": 1.757421716147878e-07, + "loss": 20.569, + "step": 461910 + }, + { + "epoch": 0.9331076249308128, + "grad_norm": 545.8987426757812, + "learning_rate": 1.7565045044895756e-07, + "loss": 20.1485, + "step": 461920 + }, + { + "epoch": 0.9331278255634966, + "grad_norm": 142.15664672851562, + "learning_rate": 1.7555875279642087e-07, + "loss": 16.4516, + "step": 461930 + }, + { + "epoch": 0.9331480261961804, + "grad_norm": 79.61673736572266, + "learning_rate": 1.754670786576257e-07, + "loss": 11.931, + "step": 461940 + }, + { + "epoch": 0.9331682268288642, + "grad_norm": 111.73258209228516, + "learning_rate": 1.7537542803302e-07, + "loss": 7.779, + "step": 461950 + }, + { + "epoch": 0.9331884274615481, + "grad_norm": 283.0420227050781, + "learning_rate": 1.7528380092304842e-07, + "loss": 17.6648, + "step": 461960 + }, + { + "epoch": 0.9332086280942319, + "grad_norm": 609.4569702148438, + "learning_rate": 1.751921973281584e-07, + "loss": 30.5705, + "step": 461970 + }, + { + "epoch": 0.9332288287269157, + "grad_norm": 329.05462646484375, + "learning_rate": 1.7510061724879678e-07, + "loss": 16.6133, + "step": 461980 + }, + { + "epoch": 0.9332490293595995, + "grad_norm": 8.676403045654297, + "learning_rate": 1.750090606854099e-07, + "loss": 20.2779, + "step": 461990 + }, + { + "epoch": 0.9332692299922833, + "grad_norm": 217.2760009765625, + "learning_rate": 1.7491752763844294e-07, + "loss": 14.0126, + "step": 462000 + }, + { + "epoch": 0.9332894306249672, + "grad_norm": 218.53541564941406, + "learning_rate": 1.7482601810834276e-07, + "loss": 13.3233, + "step": 462010 + }, + { + "epoch": 0.933309631257651, + "grad_norm": 100.9002685546875, + "learning_rate": 1.7473453209555625e-07, + "loss": 15.2209, + "step": 462020 + }, + { + "epoch": 0.9333298318903348, + "grad_norm": 363.990478515625, + "learning_rate": 1.7464306960052746e-07, + "loss": 16.0504, + "step": 462030 + }, + { + "epoch": 0.9333500325230186, + "grad_norm": 572.1642456054688, + "learning_rate": 1.7455163062370273e-07, + "loss": 10.6288, + "step": 462040 + }, + { + "epoch": 0.9333702331557024, + "grad_norm": 286.1855773925781, + "learning_rate": 1.744602151655289e-07, + "loss": 17.9434, + "step": 462050 + }, + { + "epoch": 0.9333904337883863, + "grad_norm": 139.72940063476562, + "learning_rate": 1.743688232264512e-07, + "loss": 6.4435, + "step": 462060 + }, + { + "epoch": 0.9334106344210701, + "grad_norm": 238.74295043945312, + "learning_rate": 1.742774548069137e-07, + "loss": 11.5949, + "step": 462070 + }, + { + "epoch": 0.9334308350537539, + "grad_norm": 282.2811279296875, + "learning_rate": 1.7418610990736273e-07, + "loss": 15.9587, + "step": 462080 + }, + { + "epoch": 0.9334510356864377, + "grad_norm": 108.57559967041016, + "learning_rate": 1.7409478852824402e-07, + "loss": 28.7614, + "step": 462090 + }, + { + "epoch": 0.9334712363191215, + "grad_norm": 255.90158081054688, + "learning_rate": 1.740034906700011e-07, + "loss": 9.9074, + "step": 462100 + }, + { + "epoch": 0.9334914369518054, + "grad_norm": 274.3307800292969, + "learning_rate": 1.7391221633308032e-07, + "loss": 16.6796, + "step": 462110 + }, + { + "epoch": 0.9335116375844892, + "grad_norm": 268.1937255859375, + "learning_rate": 1.7382096551792572e-07, + "loss": 14.801, + "step": 462120 + }, + { + "epoch": 0.933531838217173, + "grad_norm": 352.5740051269531, + "learning_rate": 1.7372973822498252e-07, + "loss": 16.7663, + "step": 462130 + }, + { + "epoch": 0.9335520388498568, + "grad_norm": 19.405569076538086, + "learning_rate": 1.7363853445469482e-07, + "loss": 15.1377, + "step": 462140 + }, + { + "epoch": 0.9335722394825406, + "grad_norm": 323.9486999511719, + "learning_rate": 1.7354735420750835e-07, + "loss": 14.7211, + "step": 462150 + }, + { + "epoch": 0.9335924401152244, + "grad_norm": 163.8055419921875, + "learning_rate": 1.7345619748386666e-07, + "loss": 14.3039, + "step": 462160 + }, + { + "epoch": 0.9336126407479082, + "grad_norm": 201.86553955078125, + "learning_rate": 1.733650642842133e-07, + "loss": 15.7717, + "step": 462170 + }, + { + "epoch": 0.933632841380592, + "grad_norm": 454.3749084472656, + "learning_rate": 1.73273954608994e-07, + "loss": 27.3315, + "step": 462180 + }, + { + "epoch": 0.9336530420132758, + "grad_norm": 30.84465789794922, + "learning_rate": 1.7318286845865174e-07, + "loss": 25.938, + "step": 462190 + }, + { + "epoch": 0.9336732426459596, + "grad_norm": 88.91412353515625, + "learning_rate": 1.7309180583363062e-07, + "loss": 21.5223, + "step": 462200 + }, + { + "epoch": 0.9336934432786435, + "grad_norm": 265.9754638671875, + "learning_rate": 1.7300076673437526e-07, + "loss": 9.4895, + "step": 462210 + }, + { + "epoch": 0.9337136439113273, + "grad_norm": 270.78021240234375, + "learning_rate": 1.7290975116132756e-07, + "loss": 20.5894, + "step": 462220 + }, + { + "epoch": 0.9337338445440111, + "grad_norm": 141.9483184814453, + "learning_rate": 1.728187591149333e-07, + "loss": 11.7209, + "step": 462230 + }, + { + "epoch": 0.9337540451766949, + "grad_norm": 102.59892272949219, + "learning_rate": 1.7272779059563483e-07, + "loss": 22.9712, + "step": 462240 + }, + { + "epoch": 0.9337742458093787, + "grad_norm": 242.95530700683594, + "learning_rate": 1.7263684560387518e-07, + "loss": 20.0876, + "step": 462250 + }, + { + "epoch": 0.9337944464420626, + "grad_norm": 206.11166381835938, + "learning_rate": 1.7254592414009785e-07, + "loss": 16.8085, + "step": 462260 + }, + { + "epoch": 0.9338146470747464, + "grad_norm": 704.259521484375, + "learning_rate": 1.7245502620474643e-07, + "loss": 20.4897, + "step": 462270 + }, + { + "epoch": 0.9338348477074302, + "grad_norm": 281.055419921875, + "learning_rate": 1.7236415179826438e-07, + "loss": 21.912, + "step": 462280 + }, + { + "epoch": 0.933855048340114, + "grad_norm": 85.06211853027344, + "learning_rate": 1.7227330092109306e-07, + "loss": 14.7004, + "step": 462290 + }, + { + "epoch": 0.9338752489727978, + "grad_norm": 128.4312286376953, + "learning_rate": 1.7218247357367656e-07, + "loss": 14.4685, + "step": 462300 + }, + { + "epoch": 0.9338954496054817, + "grad_norm": 8.975204467773438, + "learning_rate": 1.720916697564573e-07, + "loss": 8.9357, + "step": 462310 + }, + { + "epoch": 0.9339156502381655, + "grad_norm": 378.54376220703125, + "learning_rate": 1.7200088946987713e-07, + "loss": 26.6003, + "step": 462320 + }, + { + "epoch": 0.9339358508708493, + "grad_norm": 809.9996948242188, + "learning_rate": 1.7191013271437908e-07, + "loss": 22.5336, + "step": 462330 + }, + { + "epoch": 0.9339560515035331, + "grad_norm": 446.6749267578125, + "learning_rate": 1.7181939949040606e-07, + "loss": 15.6763, + "step": 462340 + }, + { + "epoch": 0.9339762521362169, + "grad_norm": 674.1312255859375, + "learning_rate": 1.717286897983994e-07, + "loss": 28.7681, + "step": 462350 + }, + { + "epoch": 0.9339964527689008, + "grad_norm": 89.6597900390625, + "learning_rate": 1.7163800363880102e-07, + "loss": 8.1562, + "step": 462360 + }, + { + "epoch": 0.9340166534015846, + "grad_norm": 273.99957275390625, + "learning_rate": 1.715473410120544e-07, + "loss": 27.0604, + "step": 462370 + }, + { + "epoch": 0.9340368540342684, + "grad_norm": 324.4134521484375, + "learning_rate": 1.7145670191859977e-07, + "loss": 13.3288, + "step": 462380 + }, + { + "epoch": 0.9340570546669522, + "grad_norm": 130.2768096923828, + "learning_rate": 1.7136608635887952e-07, + "loss": 17.8747, + "step": 462390 + }, + { + "epoch": 0.934077255299636, + "grad_norm": 0.0, + "learning_rate": 1.7127549433333557e-07, + "loss": 13.5814, + "step": 462400 + }, + { + "epoch": 0.9340974559323199, + "grad_norm": 108.8055191040039, + "learning_rate": 1.7118492584240865e-07, + "loss": 15.4526, + "step": 462410 + }, + { + "epoch": 0.9341176565650036, + "grad_norm": 303.342529296875, + "learning_rate": 1.7109438088654173e-07, + "loss": 15.3623, + "step": 462420 + }, + { + "epoch": 0.9341378571976874, + "grad_norm": 418.23321533203125, + "learning_rate": 1.7100385946617393e-07, + "loss": 22.4594, + "step": 462430 + }, + { + "epoch": 0.9341580578303712, + "grad_norm": 633.9379272460938, + "learning_rate": 1.7091336158174877e-07, + "loss": 22.1655, + "step": 462440 + }, + { + "epoch": 0.934178258463055, + "grad_norm": 247.4800567626953, + "learning_rate": 1.7082288723370587e-07, + "loss": 5.665, + "step": 462450 + }, + { + "epoch": 0.9341984590957388, + "grad_norm": 177.9915771484375, + "learning_rate": 1.7073243642248605e-07, + "loss": 14.5838, + "step": 462460 + }, + { + "epoch": 0.9342186597284227, + "grad_norm": 425.112548828125, + "learning_rate": 1.7064200914853112e-07, + "loss": 23.317, + "step": 462470 + }, + { + "epoch": 0.9342388603611065, + "grad_norm": 157.11764526367188, + "learning_rate": 1.7055160541228077e-07, + "loss": 6.9621, + "step": 462480 + }, + { + "epoch": 0.9342590609937903, + "grad_norm": 530.9124145507812, + "learning_rate": 1.7046122521417686e-07, + "loss": 11.7681, + "step": 462490 + }, + { + "epoch": 0.9342792616264741, + "grad_norm": 187.06556701660156, + "learning_rate": 1.7037086855465902e-07, + "loss": 22.7605, + "step": 462500 + }, + { + "epoch": 0.9342994622591579, + "grad_norm": 191.40975952148438, + "learning_rate": 1.702805354341669e-07, + "loss": 24.8928, + "step": 462510 + }, + { + "epoch": 0.9343196628918418, + "grad_norm": 0.0, + "learning_rate": 1.7019022585314293e-07, + "loss": 9.0658, + "step": 462520 + }, + { + "epoch": 0.9343398635245256, + "grad_norm": 588.7440795898438, + "learning_rate": 1.7009993981202567e-07, + "loss": 25.3733, + "step": 462530 + }, + { + "epoch": 0.9343600641572094, + "grad_norm": 128.56907653808594, + "learning_rate": 1.7000967731125472e-07, + "loss": 21.2674, + "step": 462540 + }, + { + "epoch": 0.9343802647898932, + "grad_norm": 204.19056701660156, + "learning_rate": 1.699194383512709e-07, + "loss": 7.1781, + "step": 462550 + }, + { + "epoch": 0.934400465422577, + "grad_norm": 238.27174377441406, + "learning_rate": 1.6982922293251548e-07, + "loss": 44.0774, + "step": 462560 + }, + { + "epoch": 0.9344206660552609, + "grad_norm": 180.11070251464844, + "learning_rate": 1.6973903105542533e-07, + "loss": 32.1863, + "step": 462570 + }, + { + "epoch": 0.9344408666879447, + "grad_norm": 584.6224975585938, + "learning_rate": 1.6964886272044069e-07, + "loss": 23.1239, + "step": 462580 + }, + { + "epoch": 0.9344610673206285, + "grad_norm": 245.20762634277344, + "learning_rate": 1.6955871792800283e-07, + "loss": 13.2816, + "step": 462590 + }, + { + "epoch": 0.9344812679533123, + "grad_norm": 366.8085021972656, + "learning_rate": 1.6946859667854977e-07, + "loss": 31.749, + "step": 462600 + }, + { + "epoch": 0.9345014685859961, + "grad_norm": 99.18296813964844, + "learning_rate": 1.6937849897252056e-07, + "loss": 8.3537, + "step": 462610 + }, + { + "epoch": 0.93452166921868, + "grad_norm": 131.9552764892578, + "learning_rate": 1.6928842481035436e-07, + "loss": 9.0818, + "step": 462620 + }, + { + "epoch": 0.9345418698513638, + "grad_norm": 371.1967468261719, + "learning_rate": 1.691983741924913e-07, + "loss": 16.3709, + "step": 462630 + }, + { + "epoch": 0.9345620704840476, + "grad_norm": 271.56060791015625, + "learning_rate": 1.6910834711936886e-07, + "loss": 15.2731, + "step": 462640 + }, + { + "epoch": 0.9345822711167314, + "grad_norm": 382.3261413574219, + "learning_rate": 1.690183435914261e-07, + "loss": 11.5249, + "step": 462650 + }, + { + "epoch": 0.9346024717494152, + "grad_norm": 413.75128173828125, + "learning_rate": 1.689283636091027e-07, + "loss": 13.1042, + "step": 462660 + }, + { + "epoch": 0.9346226723820991, + "grad_norm": 237.03350830078125, + "learning_rate": 1.688384071728366e-07, + "loss": 21.1822, + "step": 462670 + }, + { + "epoch": 0.9346428730147828, + "grad_norm": 107.84660339355469, + "learning_rate": 1.6874847428306583e-07, + "loss": 20.7907, + "step": 462680 + }, + { + "epoch": 0.9346630736474666, + "grad_norm": 206.15914916992188, + "learning_rate": 1.6865856494022892e-07, + "loss": 13.9638, + "step": 462690 + }, + { + "epoch": 0.9346832742801504, + "grad_norm": 269.52392578125, + "learning_rate": 1.6856867914476492e-07, + "loss": 7.6573, + "step": 462700 + }, + { + "epoch": 0.9347034749128342, + "grad_norm": 4.615231513977051, + "learning_rate": 1.684788168971102e-07, + "loss": 12.817, + "step": 462710 + }, + { + "epoch": 0.934723675545518, + "grad_norm": 468.2229309082031, + "learning_rate": 1.6838897819770438e-07, + "loss": 38.626, + "step": 462720 + }, + { + "epoch": 0.9347438761782019, + "grad_norm": 2374.771240234375, + "learning_rate": 1.682991630469838e-07, + "loss": 27.894, + "step": 462730 + }, + { + "epoch": 0.9347640768108857, + "grad_norm": 432.9613037109375, + "learning_rate": 1.6820937144538807e-07, + "loss": 16.0758, + "step": 462740 + }, + { + "epoch": 0.9347842774435695, + "grad_norm": 485.05889892578125, + "learning_rate": 1.6811960339335298e-07, + "loss": 18.6951, + "step": 462750 + }, + { + "epoch": 0.9348044780762533, + "grad_norm": 0.0, + "learning_rate": 1.6802985889131762e-07, + "loss": 18.6691, + "step": 462760 + }, + { + "epoch": 0.9348246787089372, + "grad_norm": 175.54074096679688, + "learning_rate": 1.6794013793971887e-07, + "loss": 26.6739, + "step": 462770 + }, + { + "epoch": 0.934844879341621, + "grad_norm": 392.8362731933594, + "learning_rate": 1.6785044053899302e-07, + "loss": 14.6655, + "step": 462780 + }, + { + "epoch": 0.9348650799743048, + "grad_norm": 141.31777954101562, + "learning_rate": 1.6776076668957864e-07, + "loss": 16.925, + "step": 462790 + }, + { + "epoch": 0.9348852806069886, + "grad_norm": 176.33457946777344, + "learning_rate": 1.6767111639191202e-07, + "loss": 25.0126, + "step": 462800 + }, + { + "epoch": 0.9349054812396724, + "grad_norm": 0.0, + "learning_rate": 1.675814896464306e-07, + "loss": 17.1646, + "step": 462810 + }, + { + "epoch": 0.9349256818723563, + "grad_norm": 182.71763610839844, + "learning_rate": 1.6749188645357072e-07, + "loss": 12.5836, + "step": 462820 + }, + { + "epoch": 0.9349458825050401, + "grad_norm": 312.7193603515625, + "learning_rate": 1.6740230681376867e-07, + "loss": 36.3983, + "step": 462830 + }, + { + "epoch": 0.9349660831377239, + "grad_norm": 57.26655197143555, + "learning_rate": 1.6731275072746244e-07, + "loss": 19.5918, + "step": 462840 + }, + { + "epoch": 0.9349862837704077, + "grad_norm": 362.644775390625, + "learning_rate": 1.672232181950878e-07, + "loss": 12.4153, + "step": 462850 + }, + { + "epoch": 0.9350064844030915, + "grad_norm": 90.16925811767578, + "learning_rate": 1.6713370921708049e-07, + "loss": 10.4072, + "step": 462860 + }, + { + "epoch": 0.9350266850357754, + "grad_norm": 241.1505584716797, + "learning_rate": 1.6704422379387685e-07, + "loss": 15.8052, + "step": 462870 + }, + { + "epoch": 0.9350468856684592, + "grad_norm": 4.763140678405762, + "learning_rate": 1.669547619259143e-07, + "loss": 11.5775, + "step": 462880 + }, + { + "epoch": 0.935067086301143, + "grad_norm": 322.7345275878906, + "learning_rate": 1.6686532361362805e-07, + "loss": 16.5688, + "step": 462890 + }, + { + "epoch": 0.9350872869338268, + "grad_norm": 107.84266662597656, + "learning_rate": 1.6677590885745388e-07, + "loss": 30.3047, + "step": 462900 + }, + { + "epoch": 0.9351074875665106, + "grad_norm": 134.6808624267578, + "learning_rate": 1.6668651765782806e-07, + "loss": 27.6451, + "step": 462910 + }, + { + "epoch": 0.9351276881991945, + "grad_norm": 402.5738525390625, + "learning_rate": 1.6659715001518583e-07, + "loss": 19.3808, + "step": 462920 + }, + { + "epoch": 0.9351478888318782, + "grad_norm": 406.24359130859375, + "learning_rate": 1.665078059299624e-07, + "loss": 11.4116, + "step": 462930 + }, + { + "epoch": 0.935168089464562, + "grad_norm": 272.2979431152344, + "learning_rate": 1.6641848540259353e-07, + "loss": 29.7682, + "step": 462940 + }, + { + "epoch": 0.9351882900972458, + "grad_norm": 55.207969665527344, + "learning_rate": 1.6632918843351554e-07, + "loss": 8.7309, + "step": 462950 + }, + { + "epoch": 0.9352084907299296, + "grad_norm": 83.4108657836914, + "learning_rate": 1.662399150231625e-07, + "loss": 17.1847, + "step": 462960 + }, + { + "epoch": 0.9352286913626134, + "grad_norm": 271.63861083984375, + "learning_rate": 1.6615066517196965e-07, + "loss": 22.5939, + "step": 462970 + }, + { + "epoch": 0.9352488919952973, + "grad_norm": 364.66839599609375, + "learning_rate": 1.6606143888037219e-07, + "loss": 11.5765, + "step": 462980 + }, + { + "epoch": 0.9352690926279811, + "grad_norm": 261.2265625, + "learning_rate": 1.659722361488053e-07, + "loss": 22.7304, + "step": 462990 + }, + { + "epoch": 0.9352892932606649, + "grad_norm": 395.2714538574219, + "learning_rate": 1.6588305697770313e-07, + "loss": 23.3989, + "step": 463000 + }, + { + "epoch": 0.9353094938933487, + "grad_norm": 162.39208984375, + "learning_rate": 1.6579390136750086e-07, + "loss": 29.618, + "step": 463010 + }, + { + "epoch": 0.9353296945260325, + "grad_norm": 233.4529266357422, + "learning_rate": 1.6570476931863256e-07, + "loss": 14.8123, + "step": 463020 + }, + { + "epoch": 0.9353498951587164, + "grad_norm": 370.2455749511719, + "learning_rate": 1.656156608315329e-07, + "loss": 22.7996, + "step": 463030 + }, + { + "epoch": 0.9353700957914002, + "grad_norm": 196.27635192871094, + "learning_rate": 1.65526575906636e-07, + "loss": 14.9946, + "step": 463040 + }, + { + "epoch": 0.935390296424084, + "grad_norm": 129.41957092285156, + "learning_rate": 1.6543751454437708e-07, + "loss": 14.2775, + "step": 463050 + }, + { + "epoch": 0.9354104970567678, + "grad_norm": 165.82347106933594, + "learning_rate": 1.6534847674518905e-07, + "loss": 13.1837, + "step": 463060 + }, + { + "epoch": 0.9354306976894516, + "grad_norm": 227.88143920898438, + "learning_rate": 1.6525946250950553e-07, + "loss": 12.6661, + "step": 463070 + }, + { + "epoch": 0.9354508983221355, + "grad_norm": 144.63101196289062, + "learning_rate": 1.651704718377617e-07, + "loss": 12.573, + "step": 463080 + }, + { + "epoch": 0.9354710989548193, + "grad_norm": 512.3611450195312, + "learning_rate": 1.650815047303894e-07, + "loss": 21.9159, + "step": 463090 + }, + { + "epoch": 0.9354912995875031, + "grad_norm": 8.180095672607422, + "learning_rate": 1.6499256118782503e-07, + "loss": 24.0838, + "step": 463100 + }, + { + "epoch": 0.9355115002201869, + "grad_norm": 1209.033447265625, + "learning_rate": 1.6490364121049984e-07, + "loss": 24.2327, + "step": 463110 + }, + { + "epoch": 0.9355317008528707, + "grad_norm": 215.40579223632812, + "learning_rate": 1.648147447988474e-07, + "loss": 19.8343, + "step": 463120 + }, + { + "epoch": 0.9355519014855546, + "grad_norm": 451.74371337890625, + "learning_rate": 1.6472587195330236e-07, + "loss": 28.199, + "step": 463130 + }, + { + "epoch": 0.9355721021182384, + "grad_norm": 294.3414001464844, + "learning_rate": 1.6463702267429659e-07, + "loss": 16.5919, + "step": 463140 + }, + { + "epoch": 0.9355923027509222, + "grad_norm": 301.0167541503906, + "learning_rate": 1.645481969622631e-07, + "loss": 19.5308, + "step": 463150 + }, + { + "epoch": 0.935612503383606, + "grad_norm": 256.2607421875, + "learning_rate": 1.644593948176354e-07, + "loss": 23.8855, + "step": 463160 + }, + { + "epoch": 0.9356327040162898, + "grad_norm": 201.0640869140625, + "learning_rate": 1.6437061624084704e-07, + "loss": 13.9896, + "step": 463170 + }, + { + "epoch": 0.9356529046489737, + "grad_norm": 256.10980224609375, + "learning_rate": 1.6428186123232826e-07, + "loss": 13.7225, + "step": 463180 + }, + { + "epoch": 0.9356731052816574, + "grad_norm": 207.07473754882812, + "learning_rate": 1.6419312979251368e-07, + "loss": 26.9117, + "step": 463190 + }, + { + "epoch": 0.9356933059143412, + "grad_norm": 401.6343078613281, + "learning_rate": 1.6410442192183574e-07, + "loss": 13.2821, + "step": 463200 + }, + { + "epoch": 0.935713506547025, + "grad_norm": 161.51248168945312, + "learning_rate": 1.6401573762072631e-07, + "loss": 9.5149, + "step": 463210 + }, + { + "epoch": 0.9357337071797088, + "grad_norm": 327.02825927734375, + "learning_rate": 1.6392707688961728e-07, + "loss": 14.089, + "step": 463220 + }, + { + "epoch": 0.9357539078123926, + "grad_norm": 150.39974975585938, + "learning_rate": 1.638384397289411e-07, + "loss": 24.2577, + "step": 463230 + }, + { + "epoch": 0.9357741084450765, + "grad_norm": 220.7972412109375, + "learning_rate": 1.6374982613913072e-07, + "loss": 18.1346, + "step": 463240 + }, + { + "epoch": 0.9357943090777603, + "grad_norm": 336.61572265625, + "learning_rate": 1.6366123612061636e-07, + "loss": 18.3377, + "step": 463250 + }, + { + "epoch": 0.9358145097104441, + "grad_norm": 174.1511993408203, + "learning_rate": 1.635726696738299e-07, + "loss": 21.0204, + "step": 463260 + }, + { + "epoch": 0.9358347103431279, + "grad_norm": 340.135498046875, + "learning_rate": 1.6348412679920488e-07, + "loss": 9.5584, + "step": 463270 + }, + { + "epoch": 0.9358549109758117, + "grad_norm": 119.71471405029297, + "learning_rate": 1.6339560749717154e-07, + "loss": 9.1923, + "step": 463280 + }, + { + "epoch": 0.9358751116084956, + "grad_norm": 237.67774963378906, + "learning_rate": 1.633071117681606e-07, + "loss": 14.2949, + "step": 463290 + }, + { + "epoch": 0.9358953122411794, + "grad_norm": 500.4530944824219, + "learning_rate": 1.6321863961260452e-07, + "loss": 17.3124, + "step": 463300 + }, + { + "epoch": 0.9359155128738632, + "grad_norm": 416.07958984375, + "learning_rate": 1.6313019103093463e-07, + "loss": 16.5199, + "step": 463310 + }, + { + "epoch": 0.935935713506547, + "grad_norm": 35.018951416015625, + "learning_rate": 1.6304176602358056e-07, + "loss": 14.4992, + "step": 463320 + }, + { + "epoch": 0.9359559141392308, + "grad_norm": 271.4516296386719, + "learning_rate": 1.6295336459097532e-07, + "loss": 13.3977, + "step": 463330 + }, + { + "epoch": 0.9359761147719147, + "grad_norm": 294.55303955078125, + "learning_rate": 1.62864986733548e-07, + "loss": 14.6723, + "step": 463340 + }, + { + "epoch": 0.9359963154045985, + "grad_norm": 200.52882385253906, + "learning_rate": 1.6277663245173047e-07, + "loss": 10.7413, + "step": 463350 + }, + { + "epoch": 0.9360165160372823, + "grad_norm": 212.34140014648438, + "learning_rate": 1.6268830174595242e-07, + "loss": 11.5137, + "step": 463360 + }, + { + "epoch": 0.9360367166699661, + "grad_norm": 298.3582763671875, + "learning_rate": 1.6259999461664567e-07, + "loss": 24.2277, + "step": 463370 + }, + { + "epoch": 0.93605691730265, + "grad_norm": 15.46964168548584, + "learning_rate": 1.6251171106423935e-07, + "loss": 13.355, + "step": 463380 + }, + { + "epoch": 0.9360771179353338, + "grad_norm": 249.83140563964844, + "learning_rate": 1.6242345108916424e-07, + "loss": 13.0832, + "step": 463390 + }, + { + "epoch": 0.9360973185680176, + "grad_norm": 260.1106872558594, + "learning_rate": 1.6233521469185054e-07, + "loss": 21.597, + "step": 463400 + }, + { + "epoch": 0.9361175192007014, + "grad_norm": 219.79693603515625, + "learning_rate": 1.6224700187272792e-07, + "loss": 17.4026, + "step": 463410 + }, + { + "epoch": 0.9361377198333852, + "grad_norm": 194.88259887695312, + "learning_rate": 1.621588126322271e-07, + "loss": 11.5941, + "step": 463420 + }, + { + "epoch": 0.936157920466069, + "grad_norm": 818.1163330078125, + "learning_rate": 1.620706469707778e-07, + "loss": 39.9242, + "step": 463430 + }, + { + "epoch": 0.9361781210987528, + "grad_norm": 262.7649230957031, + "learning_rate": 1.619825048888085e-07, + "loss": 11.7672, + "step": 463440 + }, + { + "epoch": 0.9361983217314366, + "grad_norm": 271.2383117675781, + "learning_rate": 1.618943863867506e-07, + "loss": 21.945, + "step": 463450 + }, + { + "epoch": 0.9362185223641204, + "grad_norm": 10.14715576171875, + "learning_rate": 1.6180629146503256e-07, + "loss": 9.3233, + "step": 463460 + }, + { + "epoch": 0.9362387229968042, + "grad_norm": 377.5271301269531, + "learning_rate": 1.61718220124083e-07, + "loss": 11.135, + "step": 463470 + }, + { + "epoch": 0.936258923629488, + "grad_norm": 5.2541985511779785, + "learning_rate": 1.6163017236433265e-07, + "loss": 14.5229, + "step": 463480 + }, + { + "epoch": 0.9362791242621719, + "grad_norm": 511.776123046875, + "learning_rate": 1.6154214818621007e-07, + "loss": 21.0427, + "step": 463490 + }, + { + "epoch": 0.9362993248948557, + "grad_norm": 163.4555206298828, + "learning_rate": 1.6145414759014433e-07, + "loss": 13.9459, + "step": 463500 + }, + { + "epoch": 0.9363195255275395, + "grad_norm": 158.45411682128906, + "learning_rate": 1.6136617057656344e-07, + "loss": 13.8923, + "step": 463510 + }, + { + "epoch": 0.9363397261602233, + "grad_norm": 96.13873291015625, + "learning_rate": 1.6127821714589763e-07, + "loss": 14.0296, + "step": 463520 + }, + { + "epoch": 0.9363599267929071, + "grad_norm": 518.1529541015625, + "learning_rate": 1.6119028729857545e-07, + "loss": 15.4953, + "step": 463530 + }, + { + "epoch": 0.936380127425591, + "grad_norm": 210.54432678222656, + "learning_rate": 1.6110238103502374e-07, + "loss": 19.9439, + "step": 463540 + }, + { + "epoch": 0.9364003280582748, + "grad_norm": 175.22804260253906, + "learning_rate": 1.6101449835567273e-07, + "loss": 11.3401, + "step": 463550 + }, + { + "epoch": 0.9364205286909586, + "grad_norm": 559.84228515625, + "learning_rate": 1.6092663926094987e-07, + "loss": 21.6868, + "step": 463560 + }, + { + "epoch": 0.9364407293236424, + "grad_norm": 321.38812255859375, + "learning_rate": 1.6083880375128424e-07, + "loss": 16.2458, + "step": 463570 + }, + { + "epoch": 0.9364609299563262, + "grad_norm": 445.6077575683594, + "learning_rate": 1.6075099182710274e-07, + "loss": 33.7, + "step": 463580 + }, + { + "epoch": 0.9364811305890101, + "grad_norm": 188.43954467773438, + "learning_rate": 1.6066320348883448e-07, + "loss": 21.0742, + "step": 463590 + }, + { + "epoch": 0.9365013312216939, + "grad_norm": 136.55584716796875, + "learning_rate": 1.6057543873690685e-07, + "loss": 11.6383, + "step": 463600 + }, + { + "epoch": 0.9365215318543777, + "grad_norm": 906.4600830078125, + "learning_rate": 1.604876975717473e-07, + "loss": 15.5584, + "step": 463610 + }, + { + "epoch": 0.9365417324870615, + "grad_norm": 173.13731384277344, + "learning_rate": 1.6039997999378388e-07, + "loss": 9.4284, + "step": 463620 + }, + { + "epoch": 0.9365619331197453, + "grad_norm": 169.66293334960938, + "learning_rate": 1.603122860034434e-07, + "loss": 17.2174, + "step": 463630 + }, + { + "epoch": 0.9365821337524292, + "grad_norm": 116.82272338867188, + "learning_rate": 1.6022461560115498e-07, + "loss": 21.4119, + "step": 463640 + }, + { + "epoch": 0.936602334385113, + "grad_norm": 421.8213806152344, + "learning_rate": 1.6013696878734385e-07, + "loss": 10.5557, + "step": 463650 + }, + { + "epoch": 0.9366225350177968, + "grad_norm": 174.93968200683594, + "learning_rate": 1.6004934556243857e-07, + "loss": 8.0367, + "step": 463660 + }, + { + "epoch": 0.9366427356504806, + "grad_norm": 351.3277587890625, + "learning_rate": 1.5996174592686598e-07, + "loss": 30.9874, + "step": 463670 + }, + { + "epoch": 0.9366629362831644, + "grad_norm": 325.6501770019531, + "learning_rate": 1.5987416988105188e-07, + "loss": 27.6566, + "step": 463680 + }, + { + "epoch": 0.9366831369158483, + "grad_norm": 802.6322021484375, + "learning_rate": 1.5978661742542477e-07, + "loss": 21.7901, + "step": 463690 + }, + { + "epoch": 0.936703337548532, + "grad_norm": 363.5493469238281, + "learning_rate": 1.596990885604105e-07, + "loss": 13.6323, + "step": 463700 + }, + { + "epoch": 0.9367235381812158, + "grad_norm": 299.0626220703125, + "learning_rate": 1.596115832864359e-07, + "loss": 28.5307, + "step": 463710 + }, + { + "epoch": 0.9367437388138996, + "grad_norm": 273.3241882324219, + "learning_rate": 1.5952410160392784e-07, + "loss": 22.5999, + "step": 463720 + }, + { + "epoch": 0.9367639394465834, + "grad_norm": 369.0726623535156, + "learning_rate": 1.59436643513311e-07, + "loss": 31.4092, + "step": 463730 + }, + { + "epoch": 0.9367841400792672, + "grad_norm": 84.29007720947266, + "learning_rate": 1.5934920901501395e-07, + "loss": 13.0794, + "step": 463740 + }, + { + "epoch": 0.9368043407119511, + "grad_norm": 335.17510986328125, + "learning_rate": 1.5926179810946185e-07, + "loss": 14.829, + "step": 463750 + }, + { + "epoch": 0.9368245413446349, + "grad_norm": 62.5571403503418, + "learning_rate": 1.5917441079707942e-07, + "loss": 16.8753, + "step": 463760 + }, + { + "epoch": 0.9368447419773187, + "grad_norm": 242.92271423339844, + "learning_rate": 1.5908704707829458e-07, + "loss": 12.437, + "step": 463770 + }, + { + "epoch": 0.9368649426100025, + "grad_norm": 508.7433776855469, + "learning_rate": 1.5899970695353262e-07, + "loss": 19.0374, + "step": 463780 + }, + { + "epoch": 0.9368851432426863, + "grad_norm": 277.3528747558594, + "learning_rate": 1.5891239042321871e-07, + "loss": 10.7622, + "step": 463790 + }, + { + "epoch": 0.9369053438753702, + "grad_norm": 293.6339416503906, + "learning_rate": 1.5882509748777809e-07, + "loss": 23.3743, + "step": 463800 + }, + { + "epoch": 0.936925544508054, + "grad_norm": 152.61813354492188, + "learning_rate": 1.5873782814763762e-07, + "loss": 39.8537, + "step": 463810 + }, + { + "epoch": 0.9369457451407378, + "grad_norm": 100.76495361328125, + "learning_rate": 1.586505824032214e-07, + "loss": 14.3764, + "step": 463820 + }, + { + "epoch": 0.9369659457734216, + "grad_norm": 400.2121887207031, + "learning_rate": 1.5856336025495466e-07, + "loss": 10.9528, + "step": 463830 + }, + { + "epoch": 0.9369861464061054, + "grad_norm": 458.34588623046875, + "learning_rate": 1.5847616170326318e-07, + "loss": 17.7317, + "step": 463840 + }, + { + "epoch": 0.9370063470387893, + "grad_norm": 370.8608093261719, + "learning_rate": 1.5838898674857273e-07, + "loss": 14.1121, + "step": 463850 + }, + { + "epoch": 0.9370265476714731, + "grad_norm": 177.37461853027344, + "learning_rate": 1.5830183539130574e-07, + "loss": 10.8087, + "step": 463860 + }, + { + "epoch": 0.9370467483041569, + "grad_norm": 310.3232116699219, + "learning_rate": 1.582147076318885e-07, + "loss": 13.7634, + "step": 463870 + }, + { + "epoch": 0.9370669489368407, + "grad_norm": 125.4625244140625, + "learning_rate": 1.581276034707463e-07, + "loss": 18.1908, + "step": 463880 + }, + { + "epoch": 0.9370871495695245, + "grad_norm": 279.0887756347656, + "learning_rate": 1.5804052290830262e-07, + "loss": 18.8361, + "step": 463890 + }, + { + "epoch": 0.9371073502022084, + "grad_norm": 486.1850280761719, + "learning_rate": 1.5795346594498162e-07, + "loss": 15.2239, + "step": 463900 + }, + { + "epoch": 0.9371275508348922, + "grad_norm": 183.5675811767578, + "learning_rate": 1.5786643258120905e-07, + "loss": 18.1985, + "step": 463910 + }, + { + "epoch": 0.937147751467576, + "grad_norm": 20.34307861328125, + "learning_rate": 1.5777942281740789e-07, + "loss": 12.9008, + "step": 463920 + }, + { + "epoch": 0.9371679521002598, + "grad_norm": 268.869384765625, + "learning_rate": 1.5769243665400224e-07, + "loss": 26.5671, + "step": 463930 + }, + { + "epoch": 0.9371881527329436, + "grad_norm": 328.96832275390625, + "learning_rate": 1.5760547409141626e-07, + "loss": 17.6477, + "step": 463940 + }, + { + "epoch": 0.9372083533656275, + "grad_norm": 154.60748291015625, + "learning_rate": 1.5751853513007454e-07, + "loss": 10.0067, + "step": 463950 + }, + { + "epoch": 0.9372285539983112, + "grad_norm": 42.942100524902344, + "learning_rate": 1.5743161977039954e-07, + "loss": 16.669, + "step": 463960 + }, + { + "epoch": 0.937248754630995, + "grad_norm": 225.9125213623047, + "learning_rate": 1.5734472801281543e-07, + "loss": 14.7296, + "step": 463970 + }, + { + "epoch": 0.9372689552636788, + "grad_norm": 284.292236328125, + "learning_rate": 1.5725785985774623e-07, + "loss": 14.482, + "step": 463980 + }, + { + "epoch": 0.9372891558963626, + "grad_norm": 266.5537109375, + "learning_rate": 1.5717101530561497e-07, + "loss": 10.6148, + "step": 463990 + }, + { + "epoch": 0.9373093565290465, + "grad_norm": 174.10211181640625, + "learning_rate": 1.5708419435684463e-07, + "loss": 20.1838, + "step": 464000 + }, + { + "epoch": 0.9373295571617303, + "grad_norm": 275.5843505859375, + "learning_rate": 1.5699739701185878e-07, + "loss": 42.5351, + "step": 464010 + }, + { + "epoch": 0.9373497577944141, + "grad_norm": 432.7255554199219, + "learning_rate": 1.5691062327107932e-07, + "loss": 14.5928, + "step": 464020 + }, + { + "epoch": 0.9373699584270979, + "grad_norm": 181.0866241455078, + "learning_rate": 1.5682387313493086e-07, + "loss": 16.4988, + "step": 464030 + }, + { + "epoch": 0.9373901590597817, + "grad_norm": 326.6819152832031, + "learning_rate": 1.5673714660383532e-07, + "loss": 16.7856, + "step": 464040 + }, + { + "epoch": 0.9374103596924656, + "grad_norm": 359.0847473144531, + "learning_rate": 1.5665044367821513e-07, + "loss": 17.2553, + "step": 464050 + }, + { + "epoch": 0.9374305603251494, + "grad_norm": 386.4946594238281, + "learning_rate": 1.5656376435849385e-07, + "loss": 25.8802, + "step": 464060 + }, + { + "epoch": 0.9374507609578332, + "grad_norm": 278.6641540527344, + "learning_rate": 1.5647710864509336e-07, + "loss": 18.7478, + "step": 464070 + }, + { + "epoch": 0.937470961590517, + "grad_norm": 293.5640563964844, + "learning_rate": 1.5639047653843554e-07, + "loss": 14.4219, + "step": 464080 + }, + { + "epoch": 0.9374911622232008, + "grad_norm": 114.81095123291016, + "learning_rate": 1.563038680389428e-07, + "loss": 9.2082, + "step": 464090 + }, + { + "epoch": 0.9375113628558847, + "grad_norm": 50.59746551513672, + "learning_rate": 1.5621728314703822e-07, + "loss": 15.5744, + "step": 464100 + }, + { + "epoch": 0.9375315634885685, + "grad_norm": 222.39903259277344, + "learning_rate": 1.5613072186314304e-07, + "loss": 14.7094, + "step": 464110 + }, + { + "epoch": 0.9375517641212523, + "grad_norm": 506.1026916503906, + "learning_rate": 1.560441841876792e-07, + "loss": 24.3718, + "step": 464120 + }, + { + "epoch": 0.9375719647539361, + "grad_norm": 248.27789306640625, + "learning_rate": 1.5595767012106856e-07, + "loss": 18.9075, + "step": 464130 + }, + { + "epoch": 0.9375921653866199, + "grad_norm": 289.97283935546875, + "learning_rate": 1.5587117966373244e-07, + "loss": 9.895, + "step": 464140 + }, + { + "epoch": 0.9376123660193038, + "grad_norm": 337.35882568359375, + "learning_rate": 1.5578471281609274e-07, + "loss": 18.4055, + "step": 464150 + }, + { + "epoch": 0.9376325666519876, + "grad_norm": 252.82054138183594, + "learning_rate": 1.5569826957857027e-07, + "loss": 19.524, + "step": 464160 + }, + { + "epoch": 0.9376527672846714, + "grad_norm": 144.07119750976562, + "learning_rate": 1.556118499515885e-07, + "loss": 18.7757, + "step": 464170 + }, + { + "epoch": 0.9376729679173552, + "grad_norm": 427.6900634765625, + "learning_rate": 1.555254539355655e-07, + "loss": 11.2591, + "step": 464180 + }, + { + "epoch": 0.937693168550039, + "grad_norm": 444.0274658203125, + "learning_rate": 1.5543908153092424e-07, + "loss": 13.1624, + "step": 464190 + }, + { + "epoch": 0.9377133691827229, + "grad_norm": 394.1073913574219, + "learning_rate": 1.553527327380855e-07, + "loss": 27.6414, + "step": 464200 + }, + { + "epoch": 0.9377335698154066, + "grad_norm": 256.9996337890625, + "learning_rate": 1.5526640755747003e-07, + "loss": 20.7622, + "step": 464210 + }, + { + "epoch": 0.9377537704480904, + "grad_norm": 403.0283508300781, + "learning_rate": 1.5518010598949807e-07, + "loss": 16.7295, + "step": 464220 + }, + { + "epoch": 0.9377739710807742, + "grad_norm": 288.77215576171875, + "learning_rate": 1.5509382803459149e-07, + "loss": 18.791, + "step": 464230 + }, + { + "epoch": 0.937794171713458, + "grad_norm": 261.7998352050781, + "learning_rate": 1.5500757369316888e-07, + "loss": 24.8994, + "step": 464240 + }, + { + "epoch": 0.9378143723461418, + "grad_norm": 293.237548828125, + "learning_rate": 1.5492134296565264e-07, + "loss": 57.5289, + "step": 464250 + }, + { + "epoch": 0.9378345729788257, + "grad_norm": 161.0277557373047, + "learning_rate": 1.5483513585246135e-07, + "loss": 18.1571, + "step": 464260 + }, + { + "epoch": 0.9378547736115095, + "grad_norm": 505.86834716796875, + "learning_rate": 1.5474895235401688e-07, + "loss": 21.8036, + "step": 464270 + }, + { + "epoch": 0.9378749742441933, + "grad_norm": 372.587646484375, + "learning_rate": 1.546627924707378e-07, + "loss": 19.0715, + "step": 464280 + }, + { + "epoch": 0.9378951748768771, + "grad_norm": 167.16751098632812, + "learning_rate": 1.545766562030443e-07, + "loss": 19.0516, + "step": 464290 + }, + { + "epoch": 0.937915375509561, + "grad_norm": 272.3799743652344, + "learning_rate": 1.5449054355135718e-07, + "loss": 18.8585, + "step": 464300 + }, + { + "epoch": 0.9379355761422448, + "grad_norm": 185.17149353027344, + "learning_rate": 1.54404454516095e-07, + "loss": 21.4619, + "step": 464310 + }, + { + "epoch": 0.9379557767749286, + "grad_norm": 423.4834289550781, + "learning_rate": 1.5431838909767793e-07, + "loss": 18.7611, + "step": 464320 + }, + { + "epoch": 0.9379759774076124, + "grad_norm": 219.85989379882812, + "learning_rate": 1.542323472965257e-07, + "loss": 15.396, + "step": 464330 + }, + { + "epoch": 0.9379961780402962, + "grad_norm": 275.00982666015625, + "learning_rate": 1.5414632911305683e-07, + "loss": 13.2177, + "step": 464340 + }, + { + "epoch": 0.93801637867298, + "grad_norm": 301.32568359375, + "learning_rate": 1.5406033454769154e-07, + "loss": 10.5237, + "step": 464350 + }, + { + "epoch": 0.9380365793056639, + "grad_norm": 237.79751586914062, + "learning_rate": 1.5397436360084784e-07, + "loss": 17.7063, + "step": 464360 + }, + { + "epoch": 0.9380567799383477, + "grad_norm": 236.28114318847656, + "learning_rate": 1.5388841627294536e-07, + "loss": 20.8824, + "step": 464370 + }, + { + "epoch": 0.9380769805710315, + "grad_norm": 563.0267944335938, + "learning_rate": 1.5380249256440272e-07, + "loss": 22.0628, + "step": 464380 + }, + { + "epoch": 0.9380971812037153, + "grad_norm": 365.0470886230469, + "learning_rate": 1.5371659247564063e-07, + "loss": 19.0374, + "step": 464390 + }, + { + "epoch": 0.9381173818363991, + "grad_norm": 373.4736328125, + "learning_rate": 1.5363071600707435e-07, + "loss": 15.6116, + "step": 464400 + }, + { + "epoch": 0.938137582469083, + "grad_norm": 231.653076171875, + "learning_rate": 1.5354486315912408e-07, + "loss": 18.9384, + "step": 464410 + }, + { + "epoch": 0.9381577831017668, + "grad_norm": 477.3868713378906, + "learning_rate": 1.534590339322095e-07, + "loss": 12.3416, + "step": 464420 + }, + { + "epoch": 0.9381779837344506, + "grad_norm": 266.5515441894531, + "learning_rate": 1.533732283267475e-07, + "loss": 24.4451, + "step": 464430 + }, + { + "epoch": 0.9381981843671344, + "grad_norm": 221.8384246826172, + "learning_rate": 1.532874463431555e-07, + "loss": 9.7771, + "step": 464440 + }, + { + "epoch": 0.9382183849998182, + "grad_norm": 225.82688903808594, + "learning_rate": 1.532016879818532e-07, + "loss": 19.1567, + "step": 464450 + }, + { + "epoch": 0.9382385856325021, + "grad_norm": 46.60588455200195, + "learning_rate": 1.5311595324325912e-07, + "loss": 13.5228, + "step": 464460 + }, + { + "epoch": 0.9382587862651858, + "grad_norm": 2069.479248046875, + "learning_rate": 1.5303024212778905e-07, + "loss": 27.4372, + "step": 464470 + }, + { + "epoch": 0.9382789868978696, + "grad_norm": 208.14450073242188, + "learning_rate": 1.5294455463586157e-07, + "loss": 12.1534, + "step": 464480 + }, + { + "epoch": 0.9382991875305534, + "grad_norm": 301.5461120605469, + "learning_rate": 1.528588907678946e-07, + "loss": 21.3321, + "step": 464490 + }, + { + "epoch": 0.9383193881632372, + "grad_norm": 233.73643493652344, + "learning_rate": 1.5277325052430569e-07, + "loss": 8.8772, + "step": 464500 + }, + { + "epoch": 0.938339588795921, + "grad_norm": 179.8396453857422, + "learning_rate": 1.5268763390551167e-07, + "loss": 8.8738, + "step": 464510 + }, + { + "epoch": 0.9383597894286049, + "grad_norm": 608.1994018554688, + "learning_rate": 1.526020409119311e-07, + "loss": 15.8383, + "step": 464520 + }, + { + "epoch": 0.9383799900612887, + "grad_norm": 483.1649169921875, + "learning_rate": 1.5251647154397975e-07, + "loss": 18.2113, + "step": 464530 + }, + { + "epoch": 0.9384001906939725, + "grad_norm": 331.9686279296875, + "learning_rate": 1.5243092580207507e-07, + "loss": 21.4486, + "step": 464540 + }, + { + "epoch": 0.9384203913266563, + "grad_norm": 156.11338806152344, + "learning_rate": 1.5234540368663343e-07, + "loss": 13.561, + "step": 464550 + }, + { + "epoch": 0.9384405919593402, + "grad_norm": 255.95281982421875, + "learning_rate": 1.5225990519807332e-07, + "loss": 12.507, + "step": 464560 + }, + { + "epoch": 0.938460792592024, + "grad_norm": 436.13861083984375, + "learning_rate": 1.5217443033681058e-07, + "loss": 12.8422, + "step": 464570 + }, + { + "epoch": 0.9384809932247078, + "grad_norm": 261.2200927734375, + "learning_rate": 1.5208897910326092e-07, + "loss": 14.746, + "step": 464580 + }, + { + "epoch": 0.9385011938573916, + "grad_norm": 217.70619201660156, + "learning_rate": 1.520035514978424e-07, + "loss": 15.9952, + "step": 464590 + }, + { + "epoch": 0.9385213944900754, + "grad_norm": 377.078369140625, + "learning_rate": 1.5191814752097024e-07, + "loss": 10.0508, + "step": 464600 + }, + { + "epoch": 0.9385415951227593, + "grad_norm": 389.1170654296875, + "learning_rate": 1.5183276717306072e-07, + "loss": 15.1369, + "step": 464610 + }, + { + "epoch": 0.9385617957554431, + "grad_norm": 370.8136291503906, + "learning_rate": 1.517474104545308e-07, + "loss": 11.9117, + "step": 464620 + }, + { + "epoch": 0.9385819963881269, + "grad_norm": 481.1498718261719, + "learning_rate": 1.5166207736579564e-07, + "loss": 22.6878, + "step": 464630 + }, + { + "epoch": 0.9386021970208107, + "grad_norm": 468.8928527832031, + "learning_rate": 1.515767679072716e-07, + "loss": 12.4588, + "step": 464640 + }, + { + "epoch": 0.9386223976534945, + "grad_norm": 378.5009460449219, + "learning_rate": 1.5149148207937447e-07, + "loss": 20.1811, + "step": 464650 + }, + { + "epoch": 0.9386425982861784, + "grad_norm": 292.85101318359375, + "learning_rate": 1.5140621988251947e-07, + "loss": 5.6071, + "step": 464660 + }, + { + "epoch": 0.9386627989188622, + "grad_norm": 363.9794006347656, + "learning_rate": 1.513209813171229e-07, + "loss": 16.2339, + "step": 464670 + }, + { + "epoch": 0.938682999551546, + "grad_norm": 300.9176025390625, + "learning_rate": 1.5123576638360004e-07, + "loss": 15.4974, + "step": 464680 + }, + { + "epoch": 0.9387032001842298, + "grad_norm": 146.10968017578125, + "learning_rate": 1.5115057508236498e-07, + "loss": 14.3807, + "step": 464690 + }, + { + "epoch": 0.9387234008169136, + "grad_norm": 776.8226318359375, + "learning_rate": 1.5106540741383402e-07, + "loss": 25.4167, + "step": 464700 + }, + { + "epoch": 0.9387436014495975, + "grad_norm": 110.0594482421875, + "learning_rate": 1.5098026337842297e-07, + "loss": 24.875, + "step": 464710 + }, + { + "epoch": 0.9387638020822812, + "grad_norm": 293.3554382324219, + "learning_rate": 1.5089514297654594e-07, + "loss": 20.56, + "step": 464720 + }, + { + "epoch": 0.938784002714965, + "grad_norm": 233.24440002441406, + "learning_rate": 1.5081004620861706e-07, + "loss": 18.238, + "step": 464730 + }, + { + "epoch": 0.9388042033476488, + "grad_norm": 336.6822204589844, + "learning_rate": 1.5072497307505263e-07, + "loss": 16.5808, + "step": 464740 + }, + { + "epoch": 0.9388244039803326, + "grad_norm": 293.6535339355469, + "learning_rate": 1.5063992357626623e-07, + "loss": 34.2862, + "step": 464750 + }, + { + "epoch": 0.9388446046130164, + "grad_norm": 440.262939453125, + "learning_rate": 1.5055489771267252e-07, + "loss": 23.0098, + "step": 464760 + }, + { + "epoch": 0.9388648052457003, + "grad_norm": 347.2552185058594, + "learning_rate": 1.5046989548468616e-07, + "loss": 15.1431, + "step": 464770 + }, + { + "epoch": 0.9388850058783841, + "grad_norm": 226.5079803466797, + "learning_rate": 1.503849168927224e-07, + "loss": 24.5541, + "step": 464780 + }, + { + "epoch": 0.9389052065110679, + "grad_norm": 405.8617248535156, + "learning_rate": 1.502999619371931e-07, + "loss": 20.6261, + "step": 464790 + }, + { + "epoch": 0.9389254071437517, + "grad_norm": 268.3782958984375, + "learning_rate": 1.502150306185135e-07, + "loss": 17.145, + "step": 464800 + }, + { + "epoch": 0.9389456077764355, + "grad_norm": 188.48545837402344, + "learning_rate": 1.5013012293709828e-07, + "loss": 10.6227, + "step": 464810 + }, + { + "epoch": 0.9389658084091194, + "grad_norm": 139.31396484375, + "learning_rate": 1.5004523889336042e-07, + "loss": 20.7047, + "step": 464820 + }, + { + "epoch": 0.9389860090418032, + "grad_norm": 144.83143615722656, + "learning_rate": 1.499603784877135e-07, + "loss": 11.3791, + "step": 464830 + }, + { + "epoch": 0.939006209674487, + "grad_norm": 421.0755920410156, + "learning_rate": 1.4987554172057216e-07, + "loss": 14.0488, + "step": 464840 + }, + { + "epoch": 0.9390264103071708, + "grad_norm": 437.9329528808594, + "learning_rate": 1.497907285923489e-07, + "loss": 16.8727, + "step": 464850 + }, + { + "epoch": 0.9390466109398546, + "grad_norm": 100.489501953125, + "learning_rate": 1.4970593910345665e-07, + "loss": 9.158, + "step": 464860 + }, + { + "epoch": 0.9390668115725385, + "grad_norm": 272.7707824707031, + "learning_rate": 1.4962117325431013e-07, + "loss": 23.773, + "step": 464870 + }, + { + "epoch": 0.9390870122052223, + "grad_norm": 102.82606506347656, + "learning_rate": 1.495364310453218e-07, + "loss": 16.2965, + "step": 464880 + }, + { + "epoch": 0.9391072128379061, + "grad_norm": 338.75128173828125, + "learning_rate": 1.494517124769046e-07, + "loss": 10.7134, + "step": 464890 + }, + { + "epoch": 0.9391274134705899, + "grad_norm": 203.58189392089844, + "learning_rate": 1.4936701754947104e-07, + "loss": 11.8365, + "step": 464900 + }, + { + "epoch": 0.9391476141032737, + "grad_norm": 251.21974182128906, + "learning_rate": 1.4928234626343464e-07, + "loss": 8.4952, + "step": 464910 + }, + { + "epoch": 0.9391678147359576, + "grad_norm": 242.14511108398438, + "learning_rate": 1.4919769861920785e-07, + "loss": 13.1516, + "step": 464920 + }, + { + "epoch": 0.9391880153686414, + "grad_norm": 219.99034118652344, + "learning_rate": 1.491130746172026e-07, + "loss": 18.0917, + "step": 464930 + }, + { + "epoch": 0.9392082160013252, + "grad_norm": 391.0244445800781, + "learning_rate": 1.490284742578324e-07, + "loss": 32.7682, + "step": 464940 + }, + { + "epoch": 0.939228416634009, + "grad_norm": 263.4881896972656, + "learning_rate": 1.4894389754150862e-07, + "loss": 30.4799, + "step": 464950 + }, + { + "epoch": 0.9392486172666928, + "grad_norm": 242.61471557617188, + "learning_rate": 1.4885934446864425e-07, + "loss": 17.89, + "step": 464960 + }, + { + "epoch": 0.9392688178993767, + "grad_norm": 133.09295654296875, + "learning_rate": 1.487748150396512e-07, + "loss": 14.5193, + "step": 464970 + }, + { + "epoch": 0.9392890185320604, + "grad_norm": 109.19881439208984, + "learning_rate": 1.4869030925494077e-07, + "loss": 22.463, + "step": 464980 + }, + { + "epoch": 0.9393092191647442, + "grad_norm": 387.658203125, + "learning_rate": 1.4860582711492544e-07, + "loss": 23.5541, + "step": 464990 + }, + { + "epoch": 0.939329419797428, + "grad_norm": 127.01421356201172, + "learning_rate": 1.4852136862001766e-07, + "loss": 26.0863, + "step": 465000 + }, + { + "epoch": 0.9393496204301118, + "grad_norm": 248.5615234375, + "learning_rate": 1.4843693377062818e-07, + "loss": 17.2345, + "step": 465010 + }, + { + "epoch": 0.9393698210627957, + "grad_norm": 329.6812744140625, + "learning_rate": 1.483525225671678e-07, + "loss": 21.7594, + "step": 465020 + }, + { + "epoch": 0.9393900216954795, + "grad_norm": 66.26323699951172, + "learning_rate": 1.4826813501004954e-07, + "loss": 15.4191, + "step": 465030 + }, + { + "epoch": 0.9394102223281633, + "grad_norm": 622.2844848632812, + "learning_rate": 1.4818377109968417e-07, + "loss": 19.3583, + "step": 465040 + }, + { + "epoch": 0.9394304229608471, + "grad_norm": 368.30633544921875, + "learning_rate": 1.4809943083648194e-07, + "loss": 16.1753, + "step": 465050 + }, + { + "epoch": 0.9394506235935309, + "grad_norm": 373.9842529296875, + "learning_rate": 1.480151142208547e-07, + "loss": 13.5912, + "step": 465060 + }, + { + "epoch": 0.9394708242262148, + "grad_norm": 161.87060546875, + "learning_rate": 1.4793082125321435e-07, + "loss": 21.0955, + "step": 465070 + }, + { + "epoch": 0.9394910248588986, + "grad_norm": 178.7263946533203, + "learning_rate": 1.4784655193396947e-07, + "loss": 15.2693, + "step": 465080 + }, + { + "epoch": 0.9395112254915824, + "grad_norm": 140.8055419921875, + "learning_rate": 1.4776230626353193e-07, + "loss": 16.8984, + "step": 465090 + }, + { + "epoch": 0.9395314261242662, + "grad_norm": 252.18603515625, + "learning_rate": 1.4767808424231312e-07, + "loss": 12.0256, + "step": 465100 + }, + { + "epoch": 0.93955162675695, + "grad_norm": 61.71898651123047, + "learning_rate": 1.4759388587072266e-07, + "loss": 19.4285, + "step": 465110 + }, + { + "epoch": 0.9395718273896339, + "grad_norm": 159.76754760742188, + "learning_rate": 1.475097111491708e-07, + "loss": 12.8262, + "step": 465120 + }, + { + "epoch": 0.9395920280223177, + "grad_norm": 176.18861389160156, + "learning_rate": 1.474255600780683e-07, + "loss": 13.4194, + "step": 465130 + }, + { + "epoch": 0.9396122286550015, + "grad_norm": 276.20440673828125, + "learning_rate": 1.473414326578254e-07, + "loss": 9.6049, + "step": 465140 + }, + { + "epoch": 0.9396324292876853, + "grad_norm": 652.6522216796875, + "learning_rate": 1.4725732888885126e-07, + "loss": 20.6648, + "step": 465150 + }, + { + "epoch": 0.9396526299203691, + "grad_norm": 245.8262939453125, + "learning_rate": 1.4717324877155603e-07, + "loss": 7.9968, + "step": 465160 + }, + { + "epoch": 0.939672830553053, + "grad_norm": 169.50160217285156, + "learning_rate": 1.4708919230635054e-07, + "loss": 13.6993, + "step": 465170 + }, + { + "epoch": 0.9396930311857368, + "grad_norm": 307.2168273925781, + "learning_rate": 1.4700515949364337e-07, + "loss": 29.3474, + "step": 465180 + }, + { + "epoch": 0.9397132318184206, + "grad_norm": 309.11541748046875, + "learning_rate": 1.4692115033384468e-07, + "loss": 24.8049, + "step": 465190 + }, + { + "epoch": 0.9397334324511044, + "grad_norm": 299.8684997558594, + "learning_rate": 1.4683716482736364e-07, + "loss": 20.7849, + "step": 465200 + }, + { + "epoch": 0.9397536330837882, + "grad_norm": 343.4499816894531, + "learning_rate": 1.4675320297460994e-07, + "loss": 22.834, + "step": 465210 + }, + { + "epoch": 0.939773833716472, + "grad_norm": 0.3346022665500641, + "learning_rate": 1.4666926477599153e-07, + "loss": 8.4834, + "step": 465220 + }, + { + "epoch": 0.9397940343491558, + "grad_norm": 1.9308102130889893, + "learning_rate": 1.4658535023191922e-07, + "loss": 20.9597, + "step": 465230 + }, + { + "epoch": 0.9398142349818396, + "grad_norm": 197.47598266601562, + "learning_rate": 1.4650145934280103e-07, + "loss": 27.1396, + "step": 465240 + }, + { + "epoch": 0.9398344356145234, + "grad_norm": 179.12158203125, + "learning_rate": 1.4641759210904605e-07, + "loss": 15.4021, + "step": 465250 + }, + { + "epoch": 0.9398546362472072, + "grad_norm": 383.4201354980469, + "learning_rate": 1.463337485310634e-07, + "loss": 18.0885, + "step": 465260 + }, + { + "epoch": 0.939874836879891, + "grad_norm": 3.454732656478882, + "learning_rate": 1.4624992860926112e-07, + "loss": 11.9672, + "step": 465270 + }, + { + "epoch": 0.9398950375125749, + "grad_norm": 1308.201904296875, + "learning_rate": 1.461661323440483e-07, + "loss": 35.694, + "step": 465280 + }, + { + "epoch": 0.9399152381452587, + "grad_norm": 179.21511840820312, + "learning_rate": 1.4608235973583296e-07, + "loss": 14.4515, + "step": 465290 + }, + { + "epoch": 0.9399354387779425, + "grad_norm": 166.96446228027344, + "learning_rate": 1.459986107850231e-07, + "loss": 35.6755, + "step": 465300 + }, + { + "epoch": 0.9399556394106263, + "grad_norm": 2.7305474281311035, + "learning_rate": 1.4591488549202725e-07, + "loss": 10.5408, + "step": 465310 + }, + { + "epoch": 0.9399758400433101, + "grad_norm": 326.416015625, + "learning_rate": 1.4583118385725402e-07, + "loss": 10.2123, + "step": 465320 + }, + { + "epoch": 0.939996040675994, + "grad_norm": 90.38726043701172, + "learning_rate": 1.4574750588111085e-07, + "loss": 10.5752, + "step": 465330 + }, + { + "epoch": 0.9400162413086778, + "grad_norm": 33.91945266723633, + "learning_rate": 1.4566385156400463e-07, + "loss": 18.0592, + "step": 465340 + }, + { + "epoch": 0.9400364419413616, + "grad_norm": 62.38507843017578, + "learning_rate": 1.4558022090634504e-07, + "loss": 9.2965, + "step": 465350 + }, + { + "epoch": 0.9400566425740454, + "grad_norm": 201.84622192382812, + "learning_rate": 1.4549661390853897e-07, + "loss": 25.4784, + "step": 465360 + }, + { + "epoch": 0.9400768432067292, + "grad_norm": 207.21717834472656, + "learning_rate": 1.4541303057099275e-07, + "loss": 11.7948, + "step": 465370 + }, + { + "epoch": 0.9400970438394131, + "grad_norm": 310.4132995605469, + "learning_rate": 1.4532947089411443e-07, + "loss": 12.5109, + "step": 465380 + }, + { + "epoch": 0.9401172444720969, + "grad_norm": 71.95929718017578, + "learning_rate": 1.452459348783125e-07, + "loss": 16.5754, + "step": 465390 + }, + { + "epoch": 0.9401374451047807, + "grad_norm": 178.69338989257812, + "learning_rate": 1.4516242252399227e-07, + "loss": 21.9241, + "step": 465400 + }, + { + "epoch": 0.9401576457374645, + "grad_norm": 635.33935546875, + "learning_rate": 1.450789338315617e-07, + "loss": 9.9826, + "step": 465410 + }, + { + "epoch": 0.9401778463701483, + "grad_norm": 265.636962890625, + "learning_rate": 1.4499546880142823e-07, + "loss": 19.409, + "step": 465420 + }, + { + "epoch": 0.9401980470028322, + "grad_norm": 250.55796813964844, + "learning_rate": 1.4491202743399767e-07, + "loss": 17.1007, + "step": 465430 + }, + { + "epoch": 0.940218247635516, + "grad_norm": 404.17333984375, + "learning_rate": 1.448286097296764e-07, + "loss": 22.9604, + "step": 465440 + }, + { + "epoch": 0.9402384482681998, + "grad_norm": 410.077392578125, + "learning_rate": 1.4474521568887178e-07, + "loss": 20.4779, + "step": 465450 + }, + { + "epoch": 0.9402586489008836, + "grad_norm": 302.5479736328125, + "learning_rate": 1.4466184531199135e-07, + "loss": 10.2103, + "step": 465460 + }, + { + "epoch": 0.9402788495335674, + "grad_norm": 696.5477905273438, + "learning_rate": 1.4457849859943862e-07, + "loss": 20.1254, + "step": 465470 + }, + { + "epoch": 0.9402990501662513, + "grad_norm": 176.4188232421875, + "learning_rate": 1.4449517555162163e-07, + "loss": 11.1525, + "step": 465480 + }, + { + "epoch": 0.940319250798935, + "grad_norm": 269.1247253417969, + "learning_rate": 1.4441187616894724e-07, + "loss": 17.4083, + "step": 465490 + }, + { + "epoch": 0.9403394514316188, + "grad_norm": 181.580322265625, + "learning_rate": 1.4432860045182019e-07, + "loss": 20.2364, + "step": 465500 + }, + { + "epoch": 0.9403596520643026, + "grad_norm": 139.49365234375, + "learning_rate": 1.4424534840064563e-07, + "loss": 16.2173, + "step": 465510 + }, + { + "epoch": 0.9403798526969864, + "grad_norm": 154.74514770507812, + "learning_rate": 1.4416212001583163e-07, + "loss": 13.2751, + "step": 465520 + }, + { + "epoch": 0.9404000533296702, + "grad_norm": 177.03106689453125, + "learning_rate": 1.4407891529778172e-07, + "loss": 11.1822, + "step": 465530 + }, + { + "epoch": 0.9404202539623541, + "grad_norm": 308.2560729980469, + "learning_rate": 1.4399573424690227e-07, + "loss": 21.6448, + "step": 465540 + }, + { + "epoch": 0.9404404545950379, + "grad_norm": 285.0311279296875, + "learning_rate": 1.4391257686359906e-07, + "loss": 21.1814, + "step": 465550 + }, + { + "epoch": 0.9404606552277217, + "grad_norm": 153.18069458007812, + "learning_rate": 1.438294431482762e-07, + "loss": 29.6529, + "step": 465560 + }, + { + "epoch": 0.9404808558604055, + "grad_norm": 357.177978515625, + "learning_rate": 1.4374633310134057e-07, + "loss": 16.2107, + "step": 465570 + }, + { + "epoch": 0.9405010564930893, + "grad_norm": 263.331787109375, + "learning_rate": 1.4366324672319575e-07, + "loss": 27.4075, + "step": 465580 + }, + { + "epoch": 0.9405212571257732, + "grad_norm": 163.2997283935547, + "learning_rate": 1.43580184014247e-07, + "loss": 17.678, + "step": 465590 + }, + { + "epoch": 0.940541457758457, + "grad_norm": 160.16209411621094, + "learning_rate": 1.4349714497490009e-07, + "loss": 12.6306, + "step": 465600 + }, + { + "epoch": 0.9405616583911408, + "grad_norm": 162.60272216796875, + "learning_rate": 1.4341412960555855e-07, + "loss": 13.1878, + "step": 465610 + }, + { + "epoch": 0.9405818590238246, + "grad_norm": 389.0608825683594, + "learning_rate": 1.4333113790662822e-07, + "loss": 19.1214, + "step": 465620 + }, + { + "epoch": 0.9406020596565084, + "grad_norm": 160.79275512695312, + "learning_rate": 1.432481698785121e-07, + "loss": 4.4156, + "step": 465630 + }, + { + "epoch": 0.9406222602891923, + "grad_norm": 19.751644134521484, + "learning_rate": 1.4316522552161593e-07, + "loss": 17.5609, + "step": 465640 + }, + { + "epoch": 0.9406424609218761, + "grad_norm": 6.37514591217041, + "learning_rate": 1.4308230483634334e-07, + "loss": 27.6134, + "step": 465650 + }, + { + "epoch": 0.9406626615545599, + "grad_norm": 81.94923400878906, + "learning_rate": 1.4299940782309785e-07, + "loss": 11.7039, + "step": 465660 + }, + { + "epoch": 0.9406828621872437, + "grad_norm": 514.7180786132812, + "learning_rate": 1.4291653448228416e-07, + "loss": 13.1153, + "step": 465670 + }, + { + "epoch": 0.9407030628199275, + "grad_norm": 116.28053283691406, + "learning_rate": 1.4283368481430747e-07, + "loss": 20.3335, + "step": 465680 + }, + { + "epoch": 0.9407232634526114, + "grad_norm": 87.40438079833984, + "learning_rate": 1.427508588195692e-07, + "loss": 15.2348, + "step": 465690 + }, + { + "epoch": 0.9407434640852952, + "grad_norm": 460.5, + "learning_rate": 1.4266805649847392e-07, + "loss": 15.3604, + "step": 465700 + }, + { + "epoch": 0.940763664717979, + "grad_norm": 351.30767822265625, + "learning_rate": 1.425852778514264e-07, + "loss": 12.1725, + "step": 465710 + }, + { + "epoch": 0.9407838653506628, + "grad_norm": 393.6469421386719, + "learning_rate": 1.4250252287882848e-07, + "loss": 16.5886, + "step": 465720 + }, + { + "epoch": 0.9408040659833466, + "grad_norm": 252.6640167236328, + "learning_rate": 1.4241979158108433e-07, + "loss": 16.6276, + "step": 465730 + }, + { + "epoch": 0.9408242666160305, + "grad_norm": 15.118282318115234, + "learning_rate": 1.4233708395859692e-07, + "loss": 25.3619, + "step": 465740 + }, + { + "epoch": 0.9408444672487142, + "grad_norm": 140.9523162841797, + "learning_rate": 1.4225440001176983e-07, + "loss": 15.3279, + "step": 465750 + }, + { + "epoch": 0.940864667881398, + "grad_norm": 321.87371826171875, + "learning_rate": 1.421717397410044e-07, + "loss": 25.7802, + "step": 465760 + }, + { + "epoch": 0.9408848685140818, + "grad_norm": 424.4866027832031, + "learning_rate": 1.420891031467053e-07, + "loss": 16.5313, + "step": 465770 + }, + { + "epoch": 0.9409050691467656, + "grad_norm": 162.17381286621094, + "learning_rate": 1.4200649022927505e-07, + "loss": 12.1579, + "step": 465780 + }, + { + "epoch": 0.9409252697794495, + "grad_norm": 91.25312805175781, + "learning_rate": 1.41923900989116e-07, + "loss": 16.8099, + "step": 465790 + }, + { + "epoch": 0.9409454704121333, + "grad_norm": 355.78717041015625, + "learning_rate": 1.4184133542663014e-07, + "loss": 16.601, + "step": 465800 + }, + { + "epoch": 0.9409656710448171, + "grad_norm": 181.00601196289062, + "learning_rate": 1.41758793542221e-07, + "loss": 11.673, + "step": 465810 + }, + { + "epoch": 0.9409858716775009, + "grad_norm": 336.8885498046875, + "learning_rate": 1.4167627533628992e-07, + "loss": 18.4881, + "step": 465820 + }, + { + "epoch": 0.9410060723101847, + "grad_norm": 1.1595476865768433, + "learning_rate": 1.4159378080923936e-07, + "loss": 23.6533, + "step": 465830 + }, + { + "epoch": 0.9410262729428686, + "grad_norm": 322.81280517578125, + "learning_rate": 1.4151130996147177e-07, + "loss": 20.7972, + "step": 465840 + }, + { + "epoch": 0.9410464735755524, + "grad_norm": 353.97314453125, + "learning_rate": 1.4142886279338852e-07, + "loss": 27.6655, + "step": 465850 + }, + { + "epoch": 0.9410666742082362, + "grad_norm": 183.62677001953125, + "learning_rate": 1.4134643930539204e-07, + "loss": 13.2374, + "step": 465860 + }, + { + "epoch": 0.94108687484092, + "grad_norm": 163.1032257080078, + "learning_rate": 1.4126403949788369e-07, + "loss": 14.9478, + "step": 465870 + }, + { + "epoch": 0.9411070754736038, + "grad_norm": 50.128055572509766, + "learning_rate": 1.4118166337126428e-07, + "loss": 22.1623, + "step": 465880 + }, + { + "epoch": 0.9411272761062877, + "grad_norm": 89.6222915649414, + "learning_rate": 1.4109931092593732e-07, + "loss": 23.4234, + "step": 465890 + }, + { + "epoch": 0.9411474767389715, + "grad_norm": 417.8365478515625, + "learning_rate": 1.4101698216230254e-07, + "loss": 17.3314, + "step": 465900 + }, + { + "epoch": 0.9411676773716553, + "grad_norm": 163.18458557128906, + "learning_rate": 1.4093467708076126e-07, + "loss": 15.2798, + "step": 465910 + }, + { + "epoch": 0.9411878780043391, + "grad_norm": 460.31256103515625, + "learning_rate": 1.4085239568171483e-07, + "loss": 13.5431, + "step": 465920 + }, + { + "epoch": 0.9412080786370229, + "grad_norm": 4.70221471786499, + "learning_rate": 1.4077013796556515e-07, + "loss": 26.0239, + "step": 465930 + }, + { + "epoch": 0.9412282792697068, + "grad_norm": 166.77098083496094, + "learning_rate": 1.406879039327125e-07, + "loss": 19.1998, + "step": 465940 + }, + { + "epoch": 0.9412484799023906, + "grad_norm": 72.78886413574219, + "learning_rate": 1.4060569358355703e-07, + "loss": 27.4905, + "step": 465950 + }, + { + "epoch": 0.9412686805350744, + "grad_norm": 225.41270446777344, + "learning_rate": 1.405235069185007e-07, + "loss": 14.072, + "step": 465960 + }, + { + "epoch": 0.9412888811677582, + "grad_norm": 309.4897155761719, + "learning_rate": 1.4044134393794373e-07, + "loss": 16.9801, + "step": 465970 + }, + { + "epoch": 0.941309081800442, + "grad_norm": 248.46646118164062, + "learning_rate": 1.4035920464228525e-07, + "loss": 9.1789, + "step": 465980 + }, + { + "epoch": 0.9413292824331259, + "grad_norm": 129.87884521484375, + "learning_rate": 1.4027708903192662e-07, + "loss": 24.2261, + "step": 465990 + }, + { + "epoch": 0.9413494830658096, + "grad_norm": 291.889404296875, + "learning_rate": 1.4019499710726913e-07, + "loss": 16.4706, + "step": 466000 + }, + { + "epoch": 0.9413696836984934, + "grad_norm": 137.46664428710938, + "learning_rate": 1.4011292886871086e-07, + "loss": 13.0814, + "step": 466010 + }, + { + "epoch": 0.9413898843311772, + "grad_norm": 441.6543884277344, + "learning_rate": 1.4003088431665312e-07, + "loss": 10.4695, + "step": 466020 + }, + { + "epoch": 0.941410084963861, + "grad_norm": 383.4626770019531, + "learning_rate": 1.3994886345149504e-07, + "loss": 16.5051, + "step": 466030 + }, + { + "epoch": 0.9414302855965448, + "grad_norm": 575.4952392578125, + "learning_rate": 1.3986686627363744e-07, + "loss": 31.3564, + "step": 466040 + }, + { + "epoch": 0.9414504862292287, + "grad_norm": 36.06890869140625, + "learning_rate": 1.3978489278347883e-07, + "loss": 26.1608, + "step": 466050 + }, + { + "epoch": 0.9414706868619125, + "grad_norm": 257.4670104980469, + "learning_rate": 1.397029429814184e-07, + "loss": 22.7073, + "step": 466060 + }, + { + "epoch": 0.9414908874945963, + "grad_norm": 98.51122283935547, + "learning_rate": 1.39621016867858e-07, + "loss": 18.6808, + "step": 466070 + }, + { + "epoch": 0.9415110881272801, + "grad_norm": 287.9281921386719, + "learning_rate": 1.39539114443194e-07, + "loss": 24.4925, + "step": 466080 + }, + { + "epoch": 0.941531288759964, + "grad_norm": 357.3394775390625, + "learning_rate": 1.3945723570782722e-07, + "loss": 20.3994, + "step": 466090 + }, + { + "epoch": 0.9415514893926478, + "grad_norm": 101.79381561279297, + "learning_rate": 1.3937538066215672e-07, + "loss": 21.3881, + "step": 466100 + }, + { + "epoch": 0.9415716900253316, + "grad_norm": 207.9473114013672, + "learning_rate": 1.3929354930658112e-07, + "loss": 12.0363, + "step": 466110 + }, + { + "epoch": 0.9415918906580154, + "grad_norm": 275.6166687011719, + "learning_rate": 1.3921174164149842e-07, + "loss": 14.4197, + "step": 466120 + }, + { + "epoch": 0.9416120912906992, + "grad_norm": 367.0701599121094, + "learning_rate": 1.3912995766730887e-07, + "loss": 9.1168, + "step": 466130 + }, + { + "epoch": 0.941632291923383, + "grad_norm": 118.0570297241211, + "learning_rate": 1.3904819738441043e-07, + "loss": 26.8702, + "step": 466140 + }, + { + "epoch": 0.9416524925560669, + "grad_norm": 489.6506652832031, + "learning_rate": 1.3896646079320064e-07, + "loss": 19.6159, + "step": 466150 + }, + { + "epoch": 0.9416726931887507, + "grad_norm": 198.47056579589844, + "learning_rate": 1.388847478940797e-07, + "loss": 19.9269, + "step": 466160 + }, + { + "epoch": 0.9416928938214345, + "grad_norm": 172.43580627441406, + "learning_rate": 1.3880305868744392e-07, + "loss": 19.4838, + "step": 466170 + }, + { + "epoch": 0.9417130944541183, + "grad_norm": 269.263916015625, + "learning_rate": 1.3872139317369304e-07, + "loss": 9.9648, + "step": 466180 + }, + { + "epoch": 0.9417332950868021, + "grad_norm": 394.6880798339844, + "learning_rate": 1.3863975135322505e-07, + "loss": 13.4915, + "step": 466190 + }, + { + "epoch": 0.941753495719486, + "grad_norm": 137.1290740966797, + "learning_rate": 1.385581332264363e-07, + "loss": 15.7726, + "step": 466200 + }, + { + "epoch": 0.9417736963521698, + "grad_norm": 113.25942993164062, + "learning_rate": 1.3847653879372646e-07, + "loss": 19.0675, + "step": 466210 + }, + { + "epoch": 0.9417938969848536, + "grad_norm": 259.3558654785156, + "learning_rate": 1.3839496805549136e-07, + "loss": 14.4658, + "step": 466220 + }, + { + "epoch": 0.9418140976175374, + "grad_norm": 315.2320861816406, + "learning_rate": 1.383134210121301e-07, + "loss": 12.7709, + "step": 466230 + }, + { + "epoch": 0.9418342982502212, + "grad_norm": 163.65098571777344, + "learning_rate": 1.3823189766403954e-07, + "loss": 21.0497, + "step": 466240 + }, + { + "epoch": 0.9418544988829051, + "grad_norm": 364.7218322753906, + "learning_rate": 1.3815039801161723e-07, + "loss": 14.7508, + "step": 466250 + }, + { + "epoch": 0.9418746995155888, + "grad_norm": 420.7170715332031, + "learning_rate": 1.3806892205526e-07, + "loss": 12.4241, + "step": 466260 + }, + { + "epoch": 0.9418949001482726, + "grad_norm": 78.01525115966797, + "learning_rate": 1.3798746979536482e-07, + "loss": 20.511, + "step": 466270 + }, + { + "epoch": 0.9419151007809564, + "grad_norm": 251.70968627929688, + "learning_rate": 1.3790604123232966e-07, + "loss": 11.4409, + "step": 466280 + }, + { + "epoch": 0.9419353014136402, + "grad_norm": 242.22872924804688, + "learning_rate": 1.3782463636655087e-07, + "loss": 23.0007, + "step": 466290 + }, + { + "epoch": 0.9419555020463241, + "grad_norm": 31.46925163269043, + "learning_rate": 1.3774325519842423e-07, + "loss": 14.201, + "step": 466300 + }, + { + "epoch": 0.9419757026790079, + "grad_norm": 191.0092315673828, + "learning_rate": 1.376618977283478e-07, + "loss": 20.0056, + "step": 466310 + }, + { + "epoch": 0.9419959033116917, + "grad_norm": 411.2151184082031, + "learning_rate": 1.3758056395671738e-07, + "loss": 16.681, + "step": 466320 + }, + { + "epoch": 0.9420161039443755, + "grad_norm": 170.69198608398438, + "learning_rate": 1.374992538839298e-07, + "loss": 19.8808, + "step": 466330 + }, + { + "epoch": 0.9420363045770593, + "grad_norm": 111.5400619506836, + "learning_rate": 1.3741796751038095e-07, + "loss": 17.3456, + "step": 466340 + }, + { + "epoch": 0.9420565052097432, + "grad_norm": 300.8111267089844, + "learning_rate": 1.373367048364671e-07, + "loss": 13.2539, + "step": 466350 + }, + { + "epoch": 0.942076705842427, + "grad_norm": 1.120597243309021, + "learning_rate": 1.3725546586258464e-07, + "loss": 14.5331, + "step": 466360 + }, + { + "epoch": 0.9420969064751108, + "grad_norm": 130.71031188964844, + "learning_rate": 1.3717425058912882e-07, + "loss": 18.7702, + "step": 466370 + }, + { + "epoch": 0.9421171071077946, + "grad_norm": 9.499824523925781, + "learning_rate": 1.3709305901649594e-07, + "loss": 42.5031, + "step": 466380 + }, + { + "epoch": 0.9421373077404784, + "grad_norm": 342.7139587402344, + "learning_rate": 1.370118911450824e-07, + "loss": 16.3428, + "step": 466390 + }, + { + "epoch": 0.9421575083731623, + "grad_norm": 294.46417236328125, + "learning_rate": 1.3693074697528231e-07, + "loss": 20.2885, + "step": 466400 + }, + { + "epoch": 0.9421777090058461, + "grad_norm": 32.19709396362305, + "learning_rate": 1.36849626507492e-07, + "loss": 20.2099, + "step": 466410 + }, + { + "epoch": 0.9421979096385299, + "grad_norm": 221.39833068847656, + "learning_rate": 1.367685297421073e-07, + "loss": 22.3431, + "step": 466420 + }, + { + "epoch": 0.9422181102712137, + "grad_norm": 517.8855590820312, + "learning_rate": 1.366874566795229e-07, + "loss": 16.7649, + "step": 466430 + }, + { + "epoch": 0.9422383109038975, + "grad_norm": 349.9701232910156, + "learning_rate": 1.3660640732013342e-07, + "loss": 10.6436, + "step": 466440 + }, + { + "epoch": 0.9422585115365814, + "grad_norm": 148.7420196533203, + "learning_rate": 1.3652538166433527e-07, + "loss": 19.0194, + "step": 466450 + }, + { + "epoch": 0.9422787121692652, + "grad_norm": 109.53025817871094, + "learning_rate": 1.3644437971252144e-07, + "loss": 20.9439, + "step": 466460 + }, + { + "epoch": 0.942298912801949, + "grad_norm": 202.75306701660156, + "learning_rate": 1.3636340146508886e-07, + "loss": 10.2435, + "step": 466470 + }, + { + "epoch": 0.9423191134346328, + "grad_norm": 301.680908203125, + "learning_rate": 1.362824469224311e-07, + "loss": 25.3954, + "step": 466480 + }, + { + "epoch": 0.9423393140673166, + "grad_norm": 615.1964111328125, + "learning_rate": 1.362015160849417e-07, + "loss": 18.3119, + "step": 466490 + }, + { + "epoch": 0.9423595147000005, + "grad_norm": 123.60035705566406, + "learning_rate": 1.3612060895301759e-07, + "loss": 10.8776, + "step": 466500 + }, + { + "epoch": 0.9423797153326842, + "grad_norm": 81.8182601928711, + "learning_rate": 1.360397255270507e-07, + "loss": 12.123, + "step": 466510 + }, + { + "epoch": 0.942399915965368, + "grad_norm": 23.797916412353516, + "learning_rate": 1.3595886580743677e-07, + "loss": 16.8994, + "step": 466520 + }, + { + "epoch": 0.9424201165980518, + "grad_norm": 151.0086212158203, + "learning_rate": 1.3587802979456888e-07, + "loss": 19.425, + "step": 466530 + }, + { + "epoch": 0.9424403172307356, + "grad_norm": 393.822509765625, + "learning_rate": 1.3579721748884222e-07, + "loss": 16.2712, + "step": 466540 + }, + { + "epoch": 0.9424605178634194, + "grad_norm": 61.192020416259766, + "learning_rate": 1.3571642889064984e-07, + "loss": 13.6674, + "step": 466550 + }, + { + "epoch": 0.9424807184961033, + "grad_norm": 74.15399932861328, + "learning_rate": 1.356356640003853e-07, + "loss": 7.5289, + "step": 466560 + }, + { + "epoch": 0.9425009191287871, + "grad_norm": 174.9483184814453, + "learning_rate": 1.3555492281844273e-07, + "loss": 15.7946, + "step": 466570 + }, + { + "epoch": 0.9425211197614709, + "grad_norm": 242.9797821044922, + "learning_rate": 1.354742053452157e-07, + "loss": 25.624, + "step": 466580 + }, + { + "epoch": 0.9425413203941547, + "grad_norm": 426.18695068359375, + "learning_rate": 1.353935115810967e-07, + "loss": 16.0391, + "step": 466590 + }, + { + "epoch": 0.9425615210268385, + "grad_norm": 90.09152221679688, + "learning_rate": 1.3531284152647983e-07, + "loss": 25.8447, + "step": 466600 + }, + { + "epoch": 0.9425817216595224, + "grad_norm": 135.96788024902344, + "learning_rate": 1.3523219518175924e-07, + "loss": 13.1811, + "step": 466610 + }, + { + "epoch": 0.9426019222922062, + "grad_norm": 375.2832946777344, + "learning_rate": 1.351515725473257e-07, + "loss": 12.4075, + "step": 466620 + }, + { + "epoch": 0.94262212292489, + "grad_norm": 448.191650390625, + "learning_rate": 1.3507097362357392e-07, + "loss": 25.0984, + "step": 466630 + }, + { + "epoch": 0.9426423235575738, + "grad_norm": 420.7967834472656, + "learning_rate": 1.349903984108958e-07, + "loss": 19.3443, + "step": 466640 + }, + { + "epoch": 0.9426625241902576, + "grad_norm": 415.8902587890625, + "learning_rate": 1.3490984690968488e-07, + "loss": 20.8693, + "step": 466650 + }, + { + "epoch": 0.9426827248229415, + "grad_norm": 533.3202514648438, + "learning_rate": 1.3482931912033314e-07, + "loss": 17.6854, + "step": 466660 + }, + { + "epoch": 0.9427029254556253, + "grad_norm": 0.0, + "learning_rate": 1.3474881504323301e-07, + "loss": 9.6554, + "step": 466670 + }, + { + "epoch": 0.9427231260883091, + "grad_norm": 394.9208984375, + "learning_rate": 1.346683346787775e-07, + "loss": 11.8232, + "step": 466680 + }, + { + "epoch": 0.9427433267209929, + "grad_norm": 303.13507080078125, + "learning_rate": 1.3458787802735794e-07, + "loss": 16.0649, + "step": 466690 + }, + { + "epoch": 0.9427635273536767, + "grad_norm": 360.8521728515625, + "learning_rate": 1.3450744508936687e-07, + "loss": 16.0741, + "step": 466700 + }, + { + "epoch": 0.9427837279863606, + "grad_norm": 152.80377197265625, + "learning_rate": 1.3442703586519724e-07, + "loss": 10.3248, + "step": 466710 + }, + { + "epoch": 0.9428039286190444, + "grad_norm": 264.30511474609375, + "learning_rate": 1.3434665035523985e-07, + "loss": 30.071, + "step": 466720 + }, + { + "epoch": 0.9428241292517282, + "grad_norm": 385.9399719238281, + "learning_rate": 1.342662885598861e-07, + "loss": 6.8205, + "step": 466730 + }, + { + "epoch": 0.942844329884412, + "grad_norm": 341.55120849609375, + "learning_rate": 1.3418595047952897e-07, + "loss": 13.4045, + "step": 466740 + }, + { + "epoch": 0.9428645305170958, + "grad_norm": 493.4660949707031, + "learning_rate": 1.341056361145593e-07, + "loss": 16.0428, + "step": 466750 + }, + { + "epoch": 0.9428847311497797, + "grad_norm": 37.99992752075195, + "learning_rate": 1.3402534546536783e-07, + "loss": 17.0469, + "step": 466760 + }, + { + "epoch": 0.9429049317824634, + "grad_norm": 246.25341796875, + "learning_rate": 1.3394507853234763e-07, + "loss": 16.1532, + "step": 466770 + }, + { + "epoch": 0.9429251324151472, + "grad_norm": 328.9928283691406, + "learning_rate": 1.3386483531588834e-07, + "loss": 22.0632, + "step": 466780 + }, + { + "epoch": 0.942945333047831, + "grad_norm": 352.3476257324219, + "learning_rate": 1.337846158163819e-07, + "loss": 11.8354, + "step": 466790 + }, + { + "epoch": 0.9429655336805148, + "grad_norm": 449.89324951171875, + "learning_rate": 1.3370442003421913e-07, + "loss": 23.6962, + "step": 466800 + }, + { + "epoch": 0.9429857343131987, + "grad_norm": 168.8517608642578, + "learning_rate": 1.336242479697908e-07, + "loss": 33.493, + "step": 466810 + }, + { + "epoch": 0.9430059349458825, + "grad_norm": 482.6525573730469, + "learning_rate": 1.335440996234877e-07, + "loss": 10.6317, + "step": 466820 + }, + { + "epoch": 0.9430261355785663, + "grad_norm": 208.42434692382812, + "learning_rate": 1.334639749956995e-07, + "loss": 13.3247, + "step": 466830 + }, + { + "epoch": 0.9430463362112501, + "grad_norm": 401.0406494140625, + "learning_rate": 1.3338387408681875e-07, + "loss": 12.9286, + "step": 466840 + }, + { + "epoch": 0.9430665368439339, + "grad_norm": 369.0025329589844, + "learning_rate": 1.333037968972345e-07, + "loss": 17.5546, + "step": 466850 + }, + { + "epoch": 0.9430867374766178, + "grad_norm": 79.51467895507812, + "learning_rate": 1.33223743427337e-07, + "loss": 17.6554, + "step": 466860 + }, + { + "epoch": 0.9431069381093016, + "grad_norm": 274.3871154785156, + "learning_rate": 1.331437136775171e-07, + "loss": 18.3525, + "step": 466870 + }, + { + "epoch": 0.9431271387419854, + "grad_norm": 104.7078628540039, + "learning_rate": 1.330637076481639e-07, + "loss": 12.1618, + "step": 466880 + }, + { + "epoch": 0.9431473393746692, + "grad_norm": 87.01747131347656, + "learning_rate": 1.3298372533966874e-07, + "loss": 10.0266, + "step": 466890 + }, + { + "epoch": 0.943167540007353, + "grad_norm": 302.48150634765625, + "learning_rate": 1.3290376675242022e-07, + "loss": 26.9877, + "step": 466900 + }, + { + "epoch": 0.9431877406400369, + "grad_norm": 261.38824462890625, + "learning_rate": 1.3282383188680802e-07, + "loss": 21.9908, + "step": 466910 + }, + { + "epoch": 0.9432079412727207, + "grad_norm": 6.378824234008789, + "learning_rate": 1.327439207432224e-07, + "loss": 15.0628, + "step": 466920 + }, + { + "epoch": 0.9432281419054045, + "grad_norm": 223.4723358154297, + "learning_rate": 1.3266403332205248e-07, + "loss": 10.1727, + "step": 466930 + }, + { + "epoch": 0.9432483425380883, + "grad_norm": 162.22921752929688, + "learning_rate": 1.3258416962368849e-07, + "loss": 12.2772, + "step": 466940 + }, + { + "epoch": 0.9432685431707721, + "grad_norm": 203.04843139648438, + "learning_rate": 1.325043296485179e-07, + "loss": 17.2785, + "step": 466950 + }, + { + "epoch": 0.943288743803456, + "grad_norm": 403.38531494140625, + "learning_rate": 1.3242451339693153e-07, + "loss": 31.0572, + "step": 466960 + }, + { + "epoch": 0.9433089444361398, + "grad_norm": 51.905208587646484, + "learning_rate": 1.3234472086931738e-07, + "loss": 26.5818, + "step": 466970 + }, + { + "epoch": 0.9433291450688236, + "grad_norm": 6.7533159255981445, + "learning_rate": 1.322649520660646e-07, + "loss": 13.6709, + "step": 466980 + }, + { + "epoch": 0.9433493457015074, + "grad_norm": 321.4626159667969, + "learning_rate": 1.3218520698756177e-07, + "loss": 9.8681, + "step": 466990 + }, + { + "epoch": 0.9433695463341912, + "grad_norm": 161.235595703125, + "learning_rate": 1.3210548563419857e-07, + "loss": 14.6061, + "step": 467000 + }, + { + "epoch": 0.943389746966875, + "grad_norm": 210.1715850830078, + "learning_rate": 1.32025788006363e-07, + "loss": 24.0145, + "step": 467010 + }, + { + "epoch": 0.9434099475995589, + "grad_norm": 545.941650390625, + "learning_rate": 1.3194611410444258e-07, + "loss": 20.3786, + "step": 467020 + }, + { + "epoch": 0.9434301482322426, + "grad_norm": 312.9477233886719, + "learning_rate": 1.3186646392882696e-07, + "loss": 9.535, + "step": 467030 + }, + { + "epoch": 0.9434503488649264, + "grad_norm": 135.27679443359375, + "learning_rate": 1.3178683747990362e-07, + "loss": 10.6673, + "step": 467040 + }, + { + "epoch": 0.9434705494976102, + "grad_norm": 435.4106750488281, + "learning_rate": 1.3170723475806003e-07, + "loss": 24.8198, + "step": 467050 + }, + { + "epoch": 0.943490750130294, + "grad_norm": 206.95925903320312, + "learning_rate": 1.3162765576368587e-07, + "loss": 23.6655, + "step": 467060 + }, + { + "epoch": 0.9435109507629779, + "grad_norm": 178.56077575683594, + "learning_rate": 1.315481004971675e-07, + "loss": 9.3552, + "step": 467070 + }, + { + "epoch": 0.9435311513956617, + "grad_norm": 210.2391357421875, + "learning_rate": 1.314685689588935e-07, + "loss": 18.2021, + "step": 467080 + }, + { + "epoch": 0.9435513520283455, + "grad_norm": 211.8839569091797, + "learning_rate": 1.3138906114925133e-07, + "loss": 11.0213, + "step": 467090 + }, + { + "epoch": 0.9435715526610293, + "grad_norm": 119.14147186279297, + "learning_rate": 1.313095770686279e-07, + "loss": 12.2139, + "step": 467100 + }, + { + "epoch": 0.9435917532937131, + "grad_norm": 248.24574279785156, + "learning_rate": 1.3123011671741183e-07, + "loss": 14.5964, + "step": 467110 + }, + { + "epoch": 0.943611953926397, + "grad_norm": 158.181640625, + "learning_rate": 1.3115068009598886e-07, + "loss": 15.6111, + "step": 467120 + }, + { + "epoch": 0.9436321545590808, + "grad_norm": 338.14825439453125, + "learning_rate": 1.3107126720474762e-07, + "loss": 12.4597, + "step": 467130 + }, + { + "epoch": 0.9436523551917646, + "grad_norm": 250.89205932617188, + "learning_rate": 1.3099187804407387e-07, + "loss": 23.5018, + "step": 467140 + }, + { + "epoch": 0.9436725558244484, + "grad_norm": 226.29510498046875, + "learning_rate": 1.3091251261435568e-07, + "loss": 7.2197, + "step": 467150 + }, + { + "epoch": 0.9436927564571322, + "grad_norm": 278.6545104980469, + "learning_rate": 1.3083317091597936e-07, + "loss": 15.7558, + "step": 467160 + }, + { + "epoch": 0.9437129570898161, + "grad_norm": 267.90771484375, + "learning_rate": 1.3075385294933129e-07, + "loss": 14.5581, + "step": 467170 + }, + { + "epoch": 0.9437331577224999, + "grad_norm": 0.0, + "learning_rate": 1.306745587147984e-07, + "loss": 14.3584, + "step": 467180 + }, + { + "epoch": 0.9437533583551837, + "grad_norm": 288.2501220703125, + "learning_rate": 1.3059528821276758e-07, + "loss": 18.8858, + "step": 467190 + }, + { + "epoch": 0.9437735589878675, + "grad_norm": 111.18515014648438, + "learning_rate": 1.3051604144362407e-07, + "loss": 15.6672, + "step": 467200 + }, + { + "epoch": 0.9437937596205513, + "grad_norm": 363.2454833984375, + "learning_rate": 1.304368184077548e-07, + "loss": 18.1603, + "step": 467210 + }, + { + "epoch": 0.9438139602532352, + "grad_norm": 265.66278076171875, + "learning_rate": 1.3035761910554666e-07, + "loss": 21.7102, + "step": 467220 + }, + { + "epoch": 0.943834160885919, + "grad_norm": 357.0940856933594, + "learning_rate": 1.302784435373844e-07, + "loss": 12.6734, + "step": 467230 + }, + { + "epoch": 0.9438543615186028, + "grad_norm": 204.77838134765625, + "learning_rate": 1.3019929170365376e-07, + "loss": 18.3832, + "step": 467240 + }, + { + "epoch": 0.9438745621512866, + "grad_norm": 301.5811462402344, + "learning_rate": 1.3012016360474223e-07, + "loss": 8.2714, + "step": 467250 + }, + { + "epoch": 0.9438947627839704, + "grad_norm": 113.41458892822266, + "learning_rate": 1.3004105924103394e-07, + "loss": 27.4468, + "step": 467260 + }, + { + "epoch": 0.9439149634166543, + "grad_norm": 255.78952026367188, + "learning_rate": 1.2996197861291472e-07, + "loss": 23.7522, + "step": 467270 + }, + { + "epoch": 0.943935164049338, + "grad_norm": 258.4866943359375, + "learning_rate": 1.2988292172076977e-07, + "loss": 18.5747, + "step": 467280 + }, + { + "epoch": 0.9439553646820218, + "grad_norm": 32.485294342041016, + "learning_rate": 1.2980388856498604e-07, + "loss": 8.9216, + "step": 467290 + }, + { + "epoch": 0.9439755653147056, + "grad_norm": 112.42797088623047, + "learning_rate": 1.29724879145946e-07, + "loss": 13.212, + "step": 467300 + }, + { + "epoch": 0.9439957659473894, + "grad_norm": 265.965087890625, + "learning_rate": 1.296458934640371e-07, + "loss": 9.4598, + "step": 467310 + }, + { + "epoch": 0.9440159665800733, + "grad_norm": 378.8536682128906, + "learning_rate": 1.2956693151964296e-07, + "loss": 18.24, + "step": 467320 + }, + { + "epoch": 0.9440361672127571, + "grad_norm": 212.27166748046875, + "learning_rate": 1.2948799331314933e-07, + "loss": 15.0391, + "step": 467330 + }, + { + "epoch": 0.9440563678454409, + "grad_norm": 833.9385375976562, + "learning_rate": 1.2940907884494036e-07, + "loss": 45.1016, + "step": 467340 + }, + { + "epoch": 0.9440765684781247, + "grad_norm": 298.88360595703125, + "learning_rate": 1.2933018811540078e-07, + "loss": 17.9972, + "step": 467350 + }, + { + "epoch": 0.9440967691108085, + "grad_norm": 202.49998474121094, + "learning_rate": 1.2925132112491523e-07, + "loss": 15.4919, + "step": 467360 + }, + { + "epoch": 0.9441169697434924, + "grad_norm": 188.1671600341797, + "learning_rate": 1.2917247787386787e-07, + "loss": 16.3879, + "step": 467370 + }, + { + "epoch": 0.9441371703761762, + "grad_norm": 370.61407470703125, + "learning_rate": 1.2909365836264287e-07, + "loss": 18.5846, + "step": 467380 + }, + { + "epoch": 0.94415737100886, + "grad_norm": 153.25758361816406, + "learning_rate": 1.2901486259162488e-07, + "loss": 18.4889, + "step": 467390 + }, + { + "epoch": 0.9441775716415438, + "grad_norm": 251.57826232910156, + "learning_rate": 1.289360905611975e-07, + "loss": 8.5688, + "step": 467400 + }, + { + "epoch": 0.9441977722742276, + "grad_norm": 91.77510070800781, + "learning_rate": 1.288573422717454e-07, + "loss": 31.492, + "step": 467410 + }, + { + "epoch": 0.9442179729069115, + "grad_norm": 6.797186851501465, + "learning_rate": 1.287786177236511e-07, + "loss": 15.1667, + "step": 467420 + }, + { + "epoch": 0.9442381735395953, + "grad_norm": 563.285888671875, + "learning_rate": 1.2869991691729922e-07, + "loss": 29.3362, + "step": 467430 + }, + { + "epoch": 0.9442583741722791, + "grad_norm": 18.798179626464844, + "learning_rate": 1.2862123985307284e-07, + "loss": 10.5163, + "step": 467440 + }, + { + "epoch": 0.9442785748049629, + "grad_norm": 217.36122131347656, + "learning_rate": 1.285425865313561e-07, + "loss": 13.9328, + "step": 467450 + }, + { + "epoch": 0.9442987754376467, + "grad_norm": 314.8659973144531, + "learning_rate": 1.28463956952532e-07, + "loss": 20.3368, + "step": 467460 + }, + { + "epoch": 0.9443189760703306, + "grad_norm": 440.9849548339844, + "learning_rate": 1.2838535111698359e-07, + "loss": 19.1159, + "step": 467470 + }, + { + "epoch": 0.9443391767030144, + "grad_norm": 248.7581329345703, + "learning_rate": 1.2830676902509443e-07, + "loss": 13.1833, + "step": 467480 + }, + { + "epoch": 0.9443593773356982, + "grad_norm": 266.7648620605469, + "learning_rate": 1.2822821067724643e-07, + "loss": 37.2818, + "step": 467490 + }, + { + "epoch": 0.944379577968382, + "grad_norm": 181.34719848632812, + "learning_rate": 1.2814967607382433e-07, + "loss": 9.4124, + "step": 467500 + }, + { + "epoch": 0.9443997786010658, + "grad_norm": 273.1263122558594, + "learning_rate": 1.2807116521520947e-07, + "loss": 12.9209, + "step": 467510 + }, + { + "epoch": 0.9444199792337497, + "grad_norm": 432.1019592285156, + "learning_rate": 1.279926781017843e-07, + "loss": 26.1046, + "step": 467520 + }, + { + "epoch": 0.9444401798664335, + "grad_norm": 42.72356033325195, + "learning_rate": 1.2791421473393184e-07, + "loss": 10.2638, + "step": 467530 + }, + { + "epoch": 0.9444603804991172, + "grad_norm": 270.31695556640625, + "learning_rate": 1.2783577511203515e-07, + "loss": 15.005, + "step": 467540 + }, + { + "epoch": 0.944480581131801, + "grad_norm": 641.5654296875, + "learning_rate": 1.2775735923647614e-07, + "loss": 20.9434, + "step": 467550 + }, + { + "epoch": 0.9445007817644848, + "grad_norm": 177.09170532226562, + "learning_rate": 1.2767896710763616e-07, + "loss": 14.5292, + "step": 467560 + }, + { + "epoch": 0.9445209823971686, + "grad_norm": 128.17633056640625, + "learning_rate": 1.2760059872589824e-07, + "loss": 21.4097, + "step": 467570 + }, + { + "epoch": 0.9445411830298525, + "grad_norm": 191.09945678710938, + "learning_rate": 1.2752225409164432e-07, + "loss": 24.4088, + "step": 467580 + }, + { + "epoch": 0.9445613836625363, + "grad_norm": 142.05490112304688, + "learning_rate": 1.2744393320525573e-07, + "loss": 11.0233, + "step": 467590 + }, + { + "epoch": 0.9445815842952201, + "grad_norm": 330.49957275390625, + "learning_rate": 1.2736563606711384e-07, + "loss": 11.7947, + "step": 467600 + }, + { + "epoch": 0.9446017849279039, + "grad_norm": 195.06472778320312, + "learning_rate": 1.2728736267760167e-07, + "loss": 42.5652, + "step": 467610 + }, + { + "epoch": 0.9446219855605877, + "grad_norm": 123.06463623046875, + "learning_rate": 1.2720911303710004e-07, + "loss": 11.8161, + "step": 467620 + }, + { + "epoch": 0.9446421861932716, + "grad_norm": 149.53256225585938, + "learning_rate": 1.2713088714598974e-07, + "loss": 10.8602, + "step": 467630 + }, + { + "epoch": 0.9446623868259554, + "grad_norm": 118.68656158447266, + "learning_rate": 1.2705268500465274e-07, + "loss": 13.8827, + "step": 467640 + }, + { + "epoch": 0.9446825874586392, + "grad_norm": 11.887679100036621, + "learning_rate": 1.2697450661347033e-07, + "loss": 12.5499, + "step": 467650 + }, + { + "epoch": 0.944702788091323, + "grad_norm": 571.0030517578125, + "learning_rate": 1.2689635197282224e-07, + "loss": 25.1405, + "step": 467660 + }, + { + "epoch": 0.9447229887240068, + "grad_norm": 598.7418212890625, + "learning_rate": 1.2681822108309094e-07, + "loss": 23.4043, + "step": 467670 + }, + { + "epoch": 0.9447431893566907, + "grad_norm": 661.38427734375, + "learning_rate": 1.2674011394465614e-07, + "loss": 26.8192, + "step": 467680 + }, + { + "epoch": 0.9447633899893745, + "grad_norm": 171.64144897460938, + "learning_rate": 1.2666203055789915e-07, + "loss": 10.2326, + "step": 467690 + }, + { + "epoch": 0.9447835906220583, + "grad_norm": 269.7370300292969, + "learning_rate": 1.2658397092320028e-07, + "loss": 18.9251, + "step": 467700 + }, + { + "epoch": 0.9448037912547421, + "grad_norm": 360.25775146484375, + "learning_rate": 1.2650593504094034e-07, + "loss": 16.4297, + "step": 467710 + }, + { + "epoch": 0.9448239918874259, + "grad_norm": 316.7818908691406, + "learning_rate": 1.2642792291149896e-07, + "loss": 13.7358, + "step": 467720 + }, + { + "epoch": 0.9448441925201098, + "grad_norm": 580.6852416992188, + "learning_rate": 1.2634993453525702e-07, + "loss": 25.5588, + "step": 467730 + }, + { + "epoch": 0.9448643931527936, + "grad_norm": 177.68309020996094, + "learning_rate": 1.2627196991259473e-07, + "loss": 24.8256, + "step": 467740 + }, + { + "epoch": 0.9448845937854774, + "grad_norm": 170.87631225585938, + "learning_rate": 1.261940290438912e-07, + "loss": 22.1925, + "step": 467750 + }, + { + "epoch": 0.9449047944181612, + "grad_norm": 546.9649658203125, + "learning_rate": 1.2611611192952733e-07, + "loss": 14.5484, + "step": 467760 + }, + { + "epoch": 0.944924995050845, + "grad_norm": 11.079774856567383, + "learning_rate": 1.2603821856988218e-07, + "loss": 13.4663, + "step": 467770 + }, + { + "epoch": 0.9449451956835289, + "grad_norm": 354.6279602050781, + "learning_rate": 1.259603489653355e-07, + "loss": 14.3967, + "step": 467780 + }, + { + "epoch": 0.9449653963162126, + "grad_norm": 219.11502075195312, + "learning_rate": 1.2588250311626693e-07, + "loss": 19.6991, + "step": 467790 + }, + { + "epoch": 0.9449855969488964, + "grad_norm": 287.68670654296875, + "learning_rate": 1.258046810230562e-07, + "loss": 23.0365, + "step": 467800 + }, + { + "epoch": 0.9450057975815802, + "grad_norm": 224.74842834472656, + "learning_rate": 1.257268826860819e-07, + "loss": 29.1695, + "step": 467810 + }, + { + "epoch": 0.945025998214264, + "grad_norm": 150.61618041992188, + "learning_rate": 1.2564910810572317e-07, + "loss": 10.7292, + "step": 467820 + }, + { + "epoch": 0.9450461988469478, + "grad_norm": 367.19525146484375, + "learning_rate": 1.255713572823608e-07, + "loss": 14.223, + "step": 467830 + }, + { + "epoch": 0.9450663994796317, + "grad_norm": 493.5210266113281, + "learning_rate": 1.2549363021637174e-07, + "loss": 14.1907, + "step": 467840 + }, + { + "epoch": 0.9450866001123155, + "grad_norm": 161.30459594726562, + "learning_rate": 1.2541592690813508e-07, + "loss": 14.1587, + "step": 467850 + }, + { + "epoch": 0.9451068007449993, + "grad_norm": 210.84934997558594, + "learning_rate": 1.2533824735803059e-07, + "loss": 17.4816, + "step": 467860 + }, + { + "epoch": 0.9451270013776831, + "grad_norm": 38.96179962158203, + "learning_rate": 1.252605915664362e-07, + "loss": 18.9776, + "step": 467870 + }, + { + "epoch": 0.945147202010367, + "grad_norm": 398.9723205566406, + "learning_rate": 1.2518295953373005e-07, + "loss": 12.3231, + "step": 467880 + }, + { + "epoch": 0.9451674026430508, + "grad_norm": 128.52308654785156, + "learning_rate": 1.2510535126029067e-07, + "loss": 14.2906, + "step": 467890 + }, + { + "epoch": 0.9451876032757346, + "grad_norm": 35.2382926940918, + "learning_rate": 1.2502776674649776e-07, + "loss": 17.7047, + "step": 467900 + }, + { + "epoch": 0.9452078039084184, + "grad_norm": 362.67681884765625, + "learning_rate": 1.2495020599272766e-07, + "loss": 15.2447, + "step": 467910 + }, + { + "epoch": 0.9452280045411022, + "grad_norm": 220.71279907226562, + "learning_rate": 1.2487266899935845e-07, + "loss": 13.3988, + "step": 467920 + }, + { + "epoch": 0.945248205173786, + "grad_norm": 350.5021057128906, + "learning_rate": 1.2479515576676925e-07, + "loss": 22.0513, + "step": 467930 + }, + { + "epoch": 0.9452684058064699, + "grad_norm": 168.4711151123047, + "learning_rate": 1.24717666295337e-07, + "loss": 6.218, + "step": 467940 + }, + { + "epoch": 0.9452886064391537, + "grad_norm": 141.270263671875, + "learning_rate": 1.2464020058543912e-07, + "loss": 10.5573, + "step": 467950 + }, + { + "epoch": 0.9453088070718375, + "grad_norm": 290.7150573730469, + "learning_rate": 1.2456275863745426e-07, + "loss": 14.7041, + "step": 467960 + }, + { + "epoch": 0.9453290077045213, + "grad_norm": 738.6734008789062, + "learning_rate": 1.2448534045175876e-07, + "loss": 23.7464, + "step": 467970 + }, + { + "epoch": 0.9453492083372051, + "grad_norm": 304.2628479003906, + "learning_rate": 1.2440794602873064e-07, + "loss": 12.3787, + "step": 467980 + }, + { + "epoch": 0.945369408969889, + "grad_norm": 162.34193420410156, + "learning_rate": 1.2433057536874682e-07, + "loss": 9.3161, + "step": 467990 + }, + { + "epoch": 0.9453896096025728, + "grad_norm": 506.57305908203125, + "learning_rate": 1.2425322847218368e-07, + "loss": 21.8317, + "step": 468000 + }, + { + "epoch": 0.9454098102352566, + "grad_norm": 60.979793548583984, + "learning_rate": 1.241759053394198e-07, + "loss": 14.2102, + "step": 468010 + }, + { + "epoch": 0.9454300108679404, + "grad_norm": 71.64190673828125, + "learning_rate": 1.2409860597083102e-07, + "loss": 9.0547, + "step": 468020 + }, + { + "epoch": 0.9454502115006242, + "grad_norm": 321.5577087402344, + "learning_rate": 1.240213303667942e-07, + "loss": 7.2337, + "step": 468030 + }, + { + "epoch": 0.9454704121333081, + "grad_norm": 410.08990478515625, + "learning_rate": 1.239440785276863e-07, + "loss": 32.9802, + "step": 468040 + }, + { + "epoch": 0.9454906127659918, + "grad_norm": 522.6094970703125, + "learning_rate": 1.2386685045388313e-07, + "loss": 19.6221, + "step": 468050 + }, + { + "epoch": 0.9455108133986756, + "grad_norm": 339.32867431640625, + "learning_rate": 1.2378964614576162e-07, + "loss": 15.568, + "step": 468060 + }, + { + "epoch": 0.9455310140313594, + "grad_norm": 302.2319641113281, + "learning_rate": 1.237124656036981e-07, + "loss": 20.2023, + "step": 468070 + }, + { + "epoch": 0.9455512146640432, + "grad_norm": 133.09210205078125, + "learning_rate": 1.236353088280684e-07, + "loss": 18.3638, + "step": 468080 + }, + { + "epoch": 0.9455714152967271, + "grad_norm": 499.7694091796875, + "learning_rate": 1.2355817581924945e-07, + "loss": 12.3634, + "step": 468090 + }, + { + "epoch": 0.9455916159294109, + "grad_norm": 416.52093505859375, + "learning_rate": 1.2348106657761537e-07, + "loss": 10.5899, + "step": 468100 + }, + { + "epoch": 0.9456118165620947, + "grad_norm": 173.1257781982422, + "learning_rate": 1.2340398110354424e-07, + "loss": 14.2336, + "step": 468110 + }, + { + "epoch": 0.9456320171947785, + "grad_norm": 250.32508850097656, + "learning_rate": 1.2332691939741015e-07, + "loss": 8.7739, + "step": 468120 + }, + { + "epoch": 0.9456522178274623, + "grad_norm": 205.54248046875, + "learning_rate": 1.2324988145958895e-07, + "loss": 18.2811, + "step": 468130 + }, + { + "epoch": 0.9456724184601462, + "grad_norm": 23.474239349365234, + "learning_rate": 1.2317286729045586e-07, + "loss": 20.2123, + "step": 468140 + }, + { + "epoch": 0.94569261909283, + "grad_norm": 186.38671875, + "learning_rate": 1.2309587689038783e-07, + "loss": 27.4474, + "step": 468150 + }, + { + "epoch": 0.9457128197255138, + "grad_norm": 164.83273315429688, + "learning_rate": 1.2301891025975897e-07, + "loss": 16.3613, + "step": 468160 + }, + { + "epoch": 0.9457330203581976, + "grad_norm": 66.46712493896484, + "learning_rate": 1.229419673989435e-07, + "loss": 15.0921, + "step": 468170 + }, + { + "epoch": 0.9457532209908814, + "grad_norm": 267.094482421875, + "learning_rate": 1.2286504830831824e-07, + "loss": 14.6982, + "step": 468180 + }, + { + "epoch": 0.9457734216235653, + "grad_norm": 284.9889831542969, + "learning_rate": 1.2278815298825742e-07, + "loss": 25.1199, + "step": 468190 + }, + { + "epoch": 0.9457936222562491, + "grad_norm": 277.446533203125, + "learning_rate": 1.2271128143913458e-07, + "loss": 31.3855, + "step": 468200 + }, + { + "epoch": 0.9458138228889329, + "grad_norm": 116.27572631835938, + "learning_rate": 1.2263443366132555e-07, + "loss": 7.6856, + "step": 468210 + }, + { + "epoch": 0.9458340235216167, + "grad_norm": 237.4043731689453, + "learning_rate": 1.2255760965520557e-07, + "loss": 21.5642, + "step": 468220 + }, + { + "epoch": 0.9458542241543005, + "grad_norm": 299.2898864746094, + "learning_rate": 1.224808094211477e-07, + "loss": 15.2724, + "step": 468230 + }, + { + "epoch": 0.9458744247869844, + "grad_norm": 245.67074584960938, + "learning_rate": 1.2240403295952662e-07, + "loss": 9.2735, + "step": 468240 + }, + { + "epoch": 0.9458946254196682, + "grad_norm": 399.322265625, + "learning_rate": 1.2232728027071704e-07, + "loss": 13.4048, + "step": 468250 + }, + { + "epoch": 0.945914826052352, + "grad_norm": 69.8430404663086, + "learning_rate": 1.222505513550931e-07, + "loss": 9.9953, + "step": 468260 + }, + { + "epoch": 0.9459350266850358, + "grad_norm": 737.2543334960938, + "learning_rate": 1.221738462130273e-07, + "loss": 25.3555, + "step": 468270 + }, + { + "epoch": 0.9459552273177196, + "grad_norm": 54.74570846557617, + "learning_rate": 1.2209716484489543e-07, + "loss": 24.7528, + "step": 468280 + }, + { + "epoch": 0.9459754279504035, + "grad_norm": 324.6932373046875, + "learning_rate": 1.2202050725106995e-07, + "loss": 23.5245, + "step": 468290 + }, + { + "epoch": 0.9459956285830872, + "grad_norm": 189.67054748535156, + "learning_rate": 1.2194387343192504e-07, + "loss": 15.4185, + "step": 468300 + }, + { + "epoch": 0.946015829215771, + "grad_norm": 157.17291259765625, + "learning_rate": 1.2186726338783427e-07, + "loss": 10.3489, + "step": 468310 + }, + { + "epoch": 0.9460360298484548, + "grad_norm": 288.1325378417969, + "learning_rate": 1.2179067711917015e-07, + "loss": 12.6965, + "step": 468320 + }, + { + "epoch": 0.9460562304811386, + "grad_norm": 113.22279357910156, + "learning_rate": 1.2171411462630732e-07, + "loss": 14.872, + "step": 468330 + }, + { + "epoch": 0.9460764311138224, + "grad_norm": 384.1490478515625, + "learning_rate": 1.216375759096178e-07, + "loss": 34.573, + "step": 468340 + }, + { + "epoch": 0.9460966317465063, + "grad_norm": 537.5817260742188, + "learning_rate": 1.2156106096947563e-07, + "loss": 30.7704, + "step": 468350 + }, + { + "epoch": 0.9461168323791901, + "grad_norm": 83.53834533691406, + "learning_rate": 1.2148456980625223e-07, + "loss": 15.361, + "step": 468360 + }, + { + "epoch": 0.9461370330118739, + "grad_norm": 222.9774169921875, + "learning_rate": 1.214081024203223e-07, + "loss": 16.106, + "step": 468370 + }, + { + "epoch": 0.9461572336445577, + "grad_norm": 52.098289489746094, + "learning_rate": 1.2133165881205723e-07, + "loss": 17.7757, + "step": 468380 + }, + { + "epoch": 0.9461774342772415, + "grad_norm": 193.8730926513672, + "learning_rate": 1.2125523898182945e-07, + "loss": 18.033, + "step": 468390 + }, + { + "epoch": 0.9461976349099254, + "grad_norm": 116.78641510009766, + "learning_rate": 1.211788429300126e-07, + "loss": 18.3156, + "step": 468400 + }, + { + "epoch": 0.9462178355426092, + "grad_norm": 339.0370178222656, + "learning_rate": 1.21102470656978e-07, + "loss": 21.2598, + "step": 468410 + }, + { + "epoch": 0.946238036175293, + "grad_norm": 222.95582580566406, + "learning_rate": 1.2102612216309816e-07, + "loss": 16.2987, + "step": 468420 + }, + { + "epoch": 0.9462582368079768, + "grad_norm": 61.94565200805664, + "learning_rate": 1.2094979744874502e-07, + "loss": 15.6604, + "step": 468430 + }, + { + "epoch": 0.9462784374406606, + "grad_norm": 408.4616394042969, + "learning_rate": 1.2087349651429215e-07, + "loss": 12.794, + "step": 468440 + }, + { + "epoch": 0.9462986380733445, + "grad_norm": 287.6683044433594, + "learning_rate": 1.207972193601087e-07, + "loss": 12.9118, + "step": 468450 + }, + { + "epoch": 0.9463188387060283, + "grad_norm": 260.74639892578125, + "learning_rate": 1.207209659865677e-07, + "loss": 14.1318, + "step": 468460 + }, + { + "epoch": 0.9463390393387121, + "grad_norm": 444.3227844238281, + "learning_rate": 1.206447363940416e-07, + "loss": 18.9796, + "step": 468470 + }, + { + "epoch": 0.9463592399713959, + "grad_norm": 469.51971435546875, + "learning_rate": 1.205685305829013e-07, + "loss": 24.7297, + "step": 468480 + }, + { + "epoch": 0.9463794406040797, + "grad_norm": 134.87899780273438, + "learning_rate": 1.204923485535181e-07, + "loss": 22.758, + "step": 468490 + }, + { + "epoch": 0.9463996412367636, + "grad_norm": 418.8274841308594, + "learning_rate": 1.2041619030626283e-07, + "loss": 21.7794, + "step": 468500 + }, + { + "epoch": 0.9464198418694474, + "grad_norm": 124.39008331298828, + "learning_rate": 1.2034005584150854e-07, + "loss": 12.7598, + "step": 468510 + }, + { + "epoch": 0.9464400425021312, + "grad_norm": 294.5018005371094, + "learning_rate": 1.2026394515962382e-07, + "loss": 27.2352, + "step": 468520 + }, + { + "epoch": 0.946460243134815, + "grad_norm": 265.9117126464844, + "learning_rate": 1.2018785826098057e-07, + "loss": 8.2796, + "step": 468530 + }, + { + "epoch": 0.9464804437674988, + "grad_norm": 134.3726348876953, + "learning_rate": 1.2011179514595072e-07, + "loss": 29.3933, + "step": 468540 + }, + { + "epoch": 0.9465006444001827, + "grad_norm": 245.517333984375, + "learning_rate": 1.20035755814904e-07, + "loss": 16.1021, + "step": 468550 + }, + { + "epoch": 0.9465208450328664, + "grad_norm": 12.15012264251709, + "learning_rate": 1.1995974026821066e-07, + "loss": 24.2295, + "step": 468560 + }, + { + "epoch": 0.9465410456655502, + "grad_norm": 163.0849151611328, + "learning_rate": 1.1988374850624208e-07, + "loss": 16.3836, + "step": 468570 + }, + { + "epoch": 0.946561246298234, + "grad_norm": 0.0, + "learning_rate": 1.198077805293679e-07, + "loss": 3.8871, + "step": 468580 + }, + { + "epoch": 0.9465814469309178, + "grad_norm": 255.66712951660156, + "learning_rate": 1.1973183633795849e-07, + "loss": 52.9546, + "step": 468590 + }, + { + "epoch": 0.9466016475636017, + "grad_norm": 25.33980941772461, + "learning_rate": 1.1965591593238513e-07, + "loss": 10.039, + "step": 468600 + }, + { + "epoch": 0.9466218481962855, + "grad_norm": 210.8309783935547, + "learning_rate": 1.1958001931301587e-07, + "loss": 11.9243, + "step": 468610 + }, + { + "epoch": 0.9466420488289693, + "grad_norm": 138.99659729003906, + "learning_rate": 1.195041464802227e-07, + "loss": 11.3339, + "step": 468620 + }, + { + "epoch": 0.9466622494616531, + "grad_norm": 28.0512752532959, + "learning_rate": 1.19428297434373e-07, + "loss": 22.4278, + "step": 468630 + }, + { + "epoch": 0.9466824500943369, + "grad_norm": 294.96826171875, + "learning_rate": 1.1935247217583934e-07, + "loss": 11.0834, + "step": 468640 + }, + { + "epoch": 0.9467026507270208, + "grad_norm": 0.0, + "learning_rate": 1.1927667070498916e-07, + "loss": 10.3246, + "step": 468650 + }, + { + "epoch": 0.9467228513597046, + "grad_norm": 217.53558349609375, + "learning_rate": 1.1920089302219218e-07, + "loss": 15.5778, + "step": 468660 + }, + { + "epoch": 0.9467430519923884, + "grad_norm": 22.173551559448242, + "learning_rate": 1.1912513912781864e-07, + "loss": 18.1012, + "step": 468670 + }, + { + "epoch": 0.9467632526250722, + "grad_norm": 246.7733612060547, + "learning_rate": 1.1904940902223661e-07, + "loss": 17.512, + "step": 468680 + }, + { + "epoch": 0.946783453257756, + "grad_norm": 312.6581726074219, + "learning_rate": 1.1897370270581632e-07, + "loss": 15.442, + "step": 468690 + }, + { + "epoch": 0.9468036538904399, + "grad_norm": 9.137345314025879, + "learning_rate": 1.1889802017892638e-07, + "loss": 13.729, + "step": 468700 + }, + { + "epoch": 0.9468238545231237, + "grad_norm": 22.472421646118164, + "learning_rate": 1.1882236144193482e-07, + "loss": 11.9116, + "step": 468710 + }, + { + "epoch": 0.9468440551558075, + "grad_norm": 164.4954833984375, + "learning_rate": 1.1874672649521135e-07, + "loss": 25.9625, + "step": 468720 + }, + { + "epoch": 0.9468642557884913, + "grad_norm": 115.21696472167969, + "learning_rate": 1.1867111533912457e-07, + "loss": 11.0453, + "step": 468730 + }, + { + "epoch": 0.9468844564211751, + "grad_norm": 243.4597930908203, + "learning_rate": 1.1859552797404194e-07, + "loss": 12.88, + "step": 468740 + }, + { + "epoch": 0.946904657053859, + "grad_norm": 0.0, + "learning_rate": 1.185199644003332e-07, + "loss": 14.7446, + "step": 468750 + }, + { + "epoch": 0.9469248576865428, + "grad_norm": 552.9313354492188, + "learning_rate": 1.1844442461836636e-07, + "loss": 26.1298, + "step": 468760 + }, + { + "epoch": 0.9469450583192266, + "grad_norm": 114.78852081298828, + "learning_rate": 1.1836890862850892e-07, + "loss": 18.9783, + "step": 468770 + }, + { + "epoch": 0.9469652589519104, + "grad_norm": 174.59942626953125, + "learning_rate": 1.1829341643112946e-07, + "loss": 25.6725, + "step": 468780 + }, + { + "epoch": 0.9469854595845942, + "grad_norm": 544.2118530273438, + "learning_rate": 1.1821794802659603e-07, + "loss": 14.5247, + "step": 468790 + }, + { + "epoch": 0.9470056602172781, + "grad_norm": 182.3418731689453, + "learning_rate": 1.1814250341527611e-07, + "loss": 16.2897, + "step": 468800 + }, + { + "epoch": 0.9470258608499619, + "grad_norm": 0.0, + "learning_rate": 1.1806708259753718e-07, + "loss": 15.6742, + "step": 468810 + }, + { + "epoch": 0.9470460614826456, + "grad_norm": 212.8171844482422, + "learning_rate": 1.179916855737473e-07, + "loss": 16.854, + "step": 468820 + }, + { + "epoch": 0.9470662621153294, + "grad_norm": 208.92489624023438, + "learning_rate": 1.1791631234427448e-07, + "loss": 14.473, + "step": 468830 + }, + { + "epoch": 0.9470864627480132, + "grad_norm": 797.2440185546875, + "learning_rate": 1.1784096290948455e-07, + "loss": 22.6394, + "step": 468840 + }, + { + "epoch": 0.947106663380697, + "grad_norm": 368.9800109863281, + "learning_rate": 1.177656372697461e-07, + "loss": 14.2557, + "step": 468850 + }, + { + "epoch": 0.9471268640133809, + "grad_norm": 150.45416259765625, + "learning_rate": 1.1769033542542552e-07, + "loss": 7.5015, + "step": 468860 + }, + { + "epoch": 0.9471470646460647, + "grad_norm": 169.18548583984375, + "learning_rate": 1.1761505737689082e-07, + "loss": 18.4031, + "step": 468870 + }, + { + "epoch": 0.9471672652787485, + "grad_norm": 204.14382934570312, + "learning_rate": 1.175398031245073e-07, + "loss": 12.0764, + "step": 468880 + }, + { + "epoch": 0.9471874659114323, + "grad_norm": 197.69760131835938, + "learning_rate": 1.1746457266864297e-07, + "loss": 14.9799, + "step": 468890 + }, + { + "epoch": 0.9472076665441161, + "grad_norm": 312.3067321777344, + "learning_rate": 1.1738936600966366e-07, + "loss": 14.4391, + "step": 468900 + }, + { + "epoch": 0.9472278671768, + "grad_norm": 270.6473693847656, + "learning_rate": 1.173141831479374e-07, + "loss": 17.4277, + "step": 468910 + }, + { + "epoch": 0.9472480678094838, + "grad_norm": 161.43527221679688, + "learning_rate": 1.1723902408382892e-07, + "loss": 21.1507, + "step": 468920 + }, + { + "epoch": 0.9472682684421676, + "grad_norm": 248.78045654296875, + "learning_rate": 1.1716388881770513e-07, + "loss": 28.6238, + "step": 468930 + }, + { + "epoch": 0.9472884690748514, + "grad_norm": 490.5490417480469, + "learning_rate": 1.1708877734993296e-07, + "loss": 17.6159, + "step": 468940 + }, + { + "epoch": 0.9473086697075352, + "grad_norm": 171.56349182128906, + "learning_rate": 1.1701368968087711e-07, + "loss": 9.1319, + "step": 468950 + }, + { + "epoch": 0.9473288703402191, + "grad_norm": 262.90118408203125, + "learning_rate": 1.1693862581090453e-07, + "loss": 11.6391, + "step": 468960 + }, + { + "epoch": 0.9473490709729029, + "grad_norm": 520.4573974609375, + "learning_rate": 1.1686358574038104e-07, + "loss": 20.4618, + "step": 468970 + }, + { + "epoch": 0.9473692716055867, + "grad_norm": 89.89054107666016, + "learning_rate": 1.1678856946967244e-07, + "loss": 16.2975, + "step": 468980 + }, + { + "epoch": 0.9473894722382705, + "grad_norm": 0.0, + "learning_rate": 1.1671357699914343e-07, + "loss": 11.2836, + "step": 468990 + }, + { + "epoch": 0.9474096728709543, + "grad_norm": 124.83848571777344, + "learning_rate": 1.166386083291604e-07, + "loss": 13.0262, + "step": 469000 + }, + { + "epoch": 0.9474298735036382, + "grad_norm": 459.4477233886719, + "learning_rate": 1.1656366346008862e-07, + "loss": 21.6147, + "step": 469010 + }, + { + "epoch": 0.947450074136322, + "grad_norm": 277.0845947265625, + "learning_rate": 1.1648874239229391e-07, + "loss": 16.309, + "step": 469020 + }, + { + "epoch": 0.9474702747690058, + "grad_norm": 221.6920166015625, + "learning_rate": 1.1641384512613985e-07, + "loss": 11.8478, + "step": 469030 + }, + { + "epoch": 0.9474904754016896, + "grad_norm": 311.6980285644531, + "learning_rate": 1.1633897166199227e-07, + "loss": 20.3404, + "step": 469040 + }, + { + "epoch": 0.9475106760343734, + "grad_norm": 295.0459899902344, + "learning_rate": 1.1626412200021697e-07, + "loss": 12.8551, + "step": 469050 + }, + { + "epoch": 0.9475308766670573, + "grad_norm": 337.02716064453125, + "learning_rate": 1.1618929614117757e-07, + "loss": 21.6753, + "step": 469060 + }, + { + "epoch": 0.947551077299741, + "grad_norm": 43.183799743652344, + "learning_rate": 1.1611449408523879e-07, + "loss": 17.3536, + "step": 469070 + }, + { + "epoch": 0.9475712779324248, + "grad_norm": 218.86314392089844, + "learning_rate": 1.1603971583276641e-07, + "loss": 11.2716, + "step": 469080 + }, + { + "epoch": 0.9475914785651086, + "grad_norm": 32.14845275878906, + "learning_rate": 1.1596496138412405e-07, + "loss": 29.2551, + "step": 469090 + }, + { + "epoch": 0.9476116791977924, + "grad_norm": 514.91943359375, + "learning_rate": 1.1589023073967586e-07, + "loss": 19.7921, + "step": 469100 + }, + { + "epoch": 0.9476318798304763, + "grad_norm": 0.8910002708435059, + "learning_rate": 1.1581552389978601e-07, + "loss": 9.6958, + "step": 469110 + }, + { + "epoch": 0.9476520804631601, + "grad_norm": 233.43312072753906, + "learning_rate": 1.1574084086481973e-07, + "loss": 24.8293, + "step": 469120 + }, + { + "epoch": 0.9476722810958439, + "grad_norm": 242.30213928222656, + "learning_rate": 1.1566618163513954e-07, + "loss": 12.9195, + "step": 469130 + }, + { + "epoch": 0.9476924817285277, + "grad_norm": 17.421157836914062, + "learning_rate": 1.1559154621110957e-07, + "loss": 7.9596, + "step": 469140 + }, + { + "epoch": 0.9477126823612115, + "grad_norm": 700.6426391601562, + "learning_rate": 1.155169345930951e-07, + "loss": 20.2572, + "step": 469150 + }, + { + "epoch": 0.9477328829938954, + "grad_norm": 450.5594177246094, + "learning_rate": 1.1544234678145805e-07, + "loss": 23.2593, + "step": 469160 + }, + { + "epoch": 0.9477530836265792, + "grad_norm": 111.23733520507812, + "learning_rate": 1.1536778277656258e-07, + "loss": 10.1155, + "step": 469170 + }, + { + "epoch": 0.947773284259263, + "grad_norm": 180.03369140625, + "learning_rate": 1.1529324257877228e-07, + "loss": 21.2579, + "step": 469180 + }, + { + "epoch": 0.9477934848919468, + "grad_norm": 174.05715942382812, + "learning_rate": 1.152187261884502e-07, + "loss": 14.3938, + "step": 469190 + }, + { + "epoch": 0.9478136855246306, + "grad_norm": 443.8026428222656, + "learning_rate": 1.1514423360595939e-07, + "loss": 14.3149, + "step": 469200 + }, + { + "epoch": 0.9478338861573145, + "grad_norm": 859.9103393554688, + "learning_rate": 1.1506976483166343e-07, + "loss": 21.5424, + "step": 469210 + }, + { + "epoch": 0.9478540867899983, + "grad_norm": 729.26171875, + "learning_rate": 1.1499531986592482e-07, + "loss": 22.6312, + "step": 469220 + }, + { + "epoch": 0.9478742874226821, + "grad_norm": 321.8718566894531, + "learning_rate": 1.1492089870910662e-07, + "loss": 17.0135, + "step": 469230 + }, + { + "epoch": 0.9478944880553659, + "grad_norm": 254.30140686035156, + "learning_rate": 1.1484650136157127e-07, + "loss": 22.6747, + "step": 469240 + }, + { + "epoch": 0.9479146886880497, + "grad_norm": 459.9327392578125, + "learning_rate": 1.1477212782368185e-07, + "loss": 16.1738, + "step": 469250 + }, + { + "epoch": 0.9479348893207336, + "grad_norm": 345.2594909667969, + "learning_rate": 1.1469777809580084e-07, + "loss": 20.7374, + "step": 469260 + }, + { + "epoch": 0.9479550899534174, + "grad_norm": 194.55494689941406, + "learning_rate": 1.1462345217828963e-07, + "loss": 10.2714, + "step": 469270 + }, + { + "epoch": 0.9479752905861012, + "grad_norm": 374.4662170410156, + "learning_rate": 1.1454915007151179e-07, + "loss": 16.5521, + "step": 469280 + }, + { + "epoch": 0.947995491218785, + "grad_norm": 177.5438232421875, + "learning_rate": 1.1447487177582816e-07, + "loss": 18.4548, + "step": 469290 + }, + { + "epoch": 0.9480156918514688, + "grad_norm": 267.1248779296875, + "learning_rate": 1.1440061729160235e-07, + "loss": 19.3315, + "step": 469300 + }, + { + "epoch": 0.9480358924841527, + "grad_norm": 45.62554168701172, + "learning_rate": 1.1432638661919515e-07, + "loss": 9.7843, + "step": 469310 + }, + { + "epoch": 0.9480560931168365, + "grad_norm": 358.161376953125, + "learning_rate": 1.1425217975896796e-07, + "loss": 12.668, + "step": 469320 + }, + { + "epoch": 0.9480762937495202, + "grad_norm": 299.3165283203125, + "learning_rate": 1.1417799671128327e-07, + "loss": 16.3915, + "step": 469330 + }, + { + "epoch": 0.948096494382204, + "grad_norm": 272.8214111328125, + "learning_rate": 1.14103837476503e-07, + "loss": 16.3995, + "step": 469340 + }, + { + "epoch": 0.9481166950148878, + "grad_norm": 491.92205810546875, + "learning_rate": 1.1402970205498742e-07, + "loss": 26.6676, + "step": 469350 + }, + { + "epoch": 0.9481368956475716, + "grad_norm": 332.1183776855469, + "learning_rate": 1.1395559044709848e-07, + "loss": 10.5885, + "step": 469360 + }, + { + "epoch": 0.9481570962802555, + "grad_norm": 368.10345458984375, + "learning_rate": 1.1388150265319808e-07, + "loss": 14.0164, + "step": 469370 + }, + { + "epoch": 0.9481772969129393, + "grad_norm": 203.36778259277344, + "learning_rate": 1.1380743867364596e-07, + "loss": 16.265, + "step": 469380 + }, + { + "epoch": 0.9481974975456231, + "grad_norm": 91.9708251953125, + "learning_rate": 1.1373339850880405e-07, + "loss": 15.9717, + "step": 469390 + }, + { + "epoch": 0.9482176981783069, + "grad_norm": 312.5740051269531, + "learning_rate": 1.136593821590326e-07, + "loss": 12.2342, + "step": 469400 + }, + { + "epoch": 0.9482378988109907, + "grad_norm": 123.86184692382812, + "learning_rate": 1.1358538962469356e-07, + "loss": 22.2205, + "step": 469410 + }, + { + "epoch": 0.9482580994436746, + "grad_norm": 345.5480651855469, + "learning_rate": 1.1351142090614553e-07, + "loss": 20.7123, + "step": 469420 + }, + { + "epoch": 0.9482783000763584, + "grad_norm": 109.01927185058594, + "learning_rate": 1.1343747600375044e-07, + "loss": 8.7963, + "step": 469430 + }, + { + "epoch": 0.9482985007090422, + "grad_norm": 101.62294006347656, + "learning_rate": 1.1336355491786966e-07, + "loss": 14.3572, + "step": 469440 + }, + { + "epoch": 0.948318701341726, + "grad_norm": 123.70928955078125, + "learning_rate": 1.1328965764886069e-07, + "loss": 21.6722, + "step": 469450 + }, + { + "epoch": 0.9483389019744098, + "grad_norm": 104.20419311523438, + "learning_rate": 1.1321578419708545e-07, + "loss": 17.5573, + "step": 469460 + }, + { + "epoch": 0.9483591026070937, + "grad_norm": 309.7586975097656, + "learning_rate": 1.1314193456290424e-07, + "loss": 22.9749, + "step": 469470 + }, + { + "epoch": 0.9483793032397775, + "grad_norm": 43.185577392578125, + "learning_rate": 1.1306810874667673e-07, + "loss": 9.9447, + "step": 469480 + }, + { + "epoch": 0.9483995038724613, + "grad_norm": 265.19671630859375, + "learning_rate": 1.129943067487621e-07, + "loss": 13.2196, + "step": 469490 + }, + { + "epoch": 0.9484197045051451, + "grad_norm": 0.0, + "learning_rate": 1.1292052856952063e-07, + "loss": 12.6383, + "step": 469500 + }, + { + "epoch": 0.9484399051378289, + "grad_norm": 44.23899841308594, + "learning_rate": 1.1284677420931201e-07, + "loss": 9.5979, + "step": 469510 + }, + { + "epoch": 0.9484601057705128, + "grad_norm": 255.32183837890625, + "learning_rate": 1.1277304366849539e-07, + "loss": 16.5217, + "step": 469520 + }, + { + "epoch": 0.9484803064031966, + "grad_norm": 399.6812438964844, + "learning_rate": 1.1269933694742996e-07, + "loss": 27.1066, + "step": 469530 + }, + { + "epoch": 0.9485005070358804, + "grad_norm": 610.9277954101562, + "learning_rate": 1.1262565404647485e-07, + "loss": 18.2134, + "step": 469540 + }, + { + "epoch": 0.9485207076685642, + "grad_norm": 219.33639526367188, + "learning_rate": 1.1255199496599034e-07, + "loss": 13.4326, + "step": 469550 + }, + { + "epoch": 0.948540908301248, + "grad_norm": 502.5478210449219, + "learning_rate": 1.1247835970633392e-07, + "loss": 9.4909, + "step": 469560 + }, + { + "epoch": 0.9485611089339319, + "grad_norm": 259.220947265625, + "learning_rate": 1.1240474826786585e-07, + "loss": 11.7262, + "step": 469570 + }, + { + "epoch": 0.9485813095666156, + "grad_norm": 107.51399993896484, + "learning_rate": 1.1233116065094363e-07, + "loss": 19.9315, + "step": 469580 + }, + { + "epoch": 0.9486015101992994, + "grad_norm": 521.3172607421875, + "learning_rate": 1.1225759685592697e-07, + "loss": 17.4128, + "step": 469590 + }, + { + "epoch": 0.9486217108319832, + "grad_norm": 174.23753356933594, + "learning_rate": 1.1218405688317447e-07, + "loss": 8.2005, + "step": 469600 + }, + { + "epoch": 0.948641911464667, + "grad_norm": 227.84393310546875, + "learning_rate": 1.1211054073304305e-07, + "loss": 14.7567, + "step": 469610 + }, + { + "epoch": 0.9486621120973509, + "grad_norm": 191.42213439941406, + "learning_rate": 1.1203704840589247e-07, + "loss": 13.1507, + "step": 469620 + }, + { + "epoch": 0.9486823127300347, + "grad_norm": 16.103796005249023, + "learning_rate": 1.1196357990208074e-07, + "loss": 9.6244, + "step": 469630 + }, + { + "epoch": 0.9487025133627185, + "grad_norm": 304.25390625, + "learning_rate": 1.1189013522196479e-07, + "loss": 22.1177, + "step": 469640 + }, + { + "epoch": 0.9487227139954023, + "grad_norm": 93.02654266357422, + "learning_rate": 1.118167143659038e-07, + "loss": 9.61, + "step": 469650 + }, + { + "epoch": 0.9487429146280861, + "grad_norm": 306.44757080078125, + "learning_rate": 1.1174331733425636e-07, + "loss": 19.6469, + "step": 469660 + }, + { + "epoch": 0.94876311526077, + "grad_norm": 304.4107360839844, + "learning_rate": 1.1166994412737774e-07, + "loss": 21.4619, + "step": 469670 + }, + { + "epoch": 0.9487833158934538, + "grad_norm": 272.19146728515625, + "learning_rate": 1.1159659474562712e-07, + "loss": 13.1943, + "step": 469680 + }, + { + "epoch": 0.9488035165261376, + "grad_norm": 408.47821044921875, + "learning_rate": 1.1152326918936251e-07, + "loss": 23.881, + "step": 469690 + }, + { + "epoch": 0.9488237171588214, + "grad_norm": 301.56256103515625, + "learning_rate": 1.1144996745894033e-07, + "loss": 28.8089, + "step": 469700 + }, + { + "epoch": 0.9488439177915052, + "grad_norm": 455.19940185546875, + "learning_rate": 1.1137668955471803e-07, + "loss": 11.0243, + "step": 469710 + }, + { + "epoch": 0.948864118424189, + "grad_norm": 634.3616333007812, + "learning_rate": 1.1130343547705257e-07, + "loss": 23.3324, + "step": 469720 + }, + { + "epoch": 0.9488843190568729, + "grad_norm": 353.26751708984375, + "learning_rate": 1.1123020522630202e-07, + "loss": 22.2149, + "step": 469730 + }, + { + "epoch": 0.9489045196895567, + "grad_norm": 294.94317626953125, + "learning_rate": 1.111569988028216e-07, + "loss": 26.7194, + "step": 469740 + }, + { + "epoch": 0.9489247203222405, + "grad_norm": 397.300537109375, + "learning_rate": 1.1108381620696885e-07, + "loss": 13.6373, + "step": 469750 + }, + { + "epoch": 0.9489449209549243, + "grad_norm": 251.4523468017578, + "learning_rate": 1.1101065743910122e-07, + "loss": 14.7469, + "step": 469760 + }, + { + "epoch": 0.9489651215876082, + "grad_norm": 285.14208984375, + "learning_rate": 1.1093752249957512e-07, + "loss": 22.9022, + "step": 469770 + }, + { + "epoch": 0.948985322220292, + "grad_norm": 348.3089904785156, + "learning_rate": 1.1086441138874581e-07, + "loss": 34.9027, + "step": 469780 + }, + { + "epoch": 0.9490055228529758, + "grad_norm": 422.3214416503906, + "learning_rate": 1.107913241069708e-07, + "loss": 15.4875, + "step": 469790 + }, + { + "epoch": 0.9490257234856596, + "grad_norm": 139.68527221679688, + "learning_rate": 1.107182606546059e-07, + "loss": 6.1628, + "step": 469800 + }, + { + "epoch": 0.9490459241183434, + "grad_norm": 45.21986770629883, + "learning_rate": 1.1064522103200636e-07, + "loss": 9.997, + "step": 469810 + }, + { + "epoch": 0.9490661247510273, + "grad_norm": 294.0475158691406, + "learning_rate": 1.1057220523953027e-07, + "loss": 9.9022, + "step": 469820 + }, + { + "epoch": 0.9490863253837111, + "grad_norm": 120.39701080322266, + "learning_rate": 1.1049921327753121e-07, + "loss": 9.7648, + "step": 469830 + }, + { + "epoch": 0.9491065260163948, + "grad_norm": 466.20001220703125, + "learning_rate": 1.1042624514636669e-07, + "loss": 16.0697, + "step": 469840 + }, + { + "epoch": 0.9491267266490786, + "grad_norm": 236.29421997070312, + "learning_rate": 1.1035330084639084e-07, + "loss": 11.462, + "step": 469850 + }, + { + "epoch": 0.9491469272817624, + "grad_norm": 260.67724609375, + "learning_rate": 1.1028038037796063e-07, + "loss": 13.6326, + "step": 469860 + }, + { + "epoch": 0.9491671279144462, + "grad_norm": 218.26943969726562, + "learning_rate": 1.1020748374143075e-07, + "loss": 16.4647, + "step": 469870 + }, + { + "epoch": 0.9491873285471301, + "grad_norm": 144.53085327148438, + "learning_rate": 1.1013461093715594e-07, + "loss": 8.0247, + "step": 469880 + }, + { + "epoch": 0.9492075291798139, + "grad_norm": 182.36032104492188, + "learning_rate": 1.1006176196549256e-07, + "loss": 7.5152, + "step": 469890 + }, + { + "epoch": 0.9492277298124977, + "grad_norm": 305.69189453125, + "learning_rate": 1.0998893682679479e-07, + "loss": 12.5555, + "step": 469900 + }, + { + "epoch": 0.9492479304451815, + "grad_norm": 221.77523803710938, + "learning_rate": 1.099161355214179e-07, + "loss": 10.1588, + "step": 469910 + }, + { + "epoch": 0.9492681310778653, + "grad_norm": 431.93939208984375, + "learning_rate": 1.0984335804971713e-07, + "loss": 16.6653, + "step": 469920 + }, + { + "epoch": 0.9492883317105492, + "grad_norm": 29.182485580444336, + "learning_rate": 1.0977060441204612e-07, + "loss": 11.957, + "step": 469930 + }, + { + "epoch": 0.949308532343233, + "grad_norm": 219.14222717285156, + "learning_rate": 1.0969787460876013e-07, + "loss": 12.5424, + "step": 469940 + }, + { + "epoch": 0.9493287329759168, + "grad_norm": 290.021484375, + "learning_rate": 1.0962516864021388e-07, + "loss": 15.1483, + "step": 469950 + }, + { + "epoch": 0.9493489336086006, + "grad_norm": 163.8733673095703, + "learning_rate": 1.0955248650676154e-07, + "loss": 13.3683, + "step": 469960 + }, + { + "epoch": 0.9493691342412844, + "grad_norm": 63.725318908691406, + "learning_rate": 1.0947982820875669e-07, + "loss": 18.1248, + "step": 469970 + }, + { + "epoch": 0.9493893348739683, + "grad_norm": 1048.1917724609375, + "learning_rate": 1.0940719374655462e-07, + "loss": 35.0464, + "step": 469980 + }, + { + "epoch": 0.9494095355066521, + "grad_norm": 194.04000854492188, + "learning_rate": 1.0933458312050837e-07, + "loss": 15.3915, + "step": 469990 + }, + { + "epoch": 0.9494297361393359, + "grad_norm": 4.694753170013428, + "learning_rate": 1.0926199633097156e-07, + "loss": 10.135, + "step": 470000 + }, + { + "epoch": 0.9494499367720197, + "grad_norm": 781.8992309570312, + "learning_rate": 1.0918943337829945e-07, + "loss": 30.4598, + "step": 470010 + }, + { + "epoch": 0.9494701374047035, + "grad_norm": 41.23650360107422, + "learning_rate": 1.091168942628451e-07, + "loss": 9.8344, + "step": 470020 + }, + { + "epoch": 0.9494903380373874, + "grad_norm": 199.8082733154297, + "learning_rate": 1.09044378984961e-07, + "loss": 11.447, + "step": 470030 + }, + { + "epoch": 0.9495105386700712, + "grad_norm": 418.4902648925781, + "learning_rate": 1.0897188754500187e-07, + "loss": 17.8343, + "step": 470040 + }, + { + "epoch": 0.949530739302755, + "grad_norm": 142.16915893554688, + "learning_rate": 1.0889941994332077e-07, + "loss": 13.8683, + "step": 470050 + }, + { + "epoch": 0.9495509399354388, + "grad_norm": 757.1500244140625, + "learning_rate": 1.0882697618027016e-07, + "loss": 17.1053, + "step": 470060 + }, + { + "epoch": 0.9495711405681226, + "grad_norm": 145.1659393310547, + "learning_rate": 1.0875455625620368e-07, + "loss": 23.5184, + "step": 470070 + }, + { + "epoch": 0.9495913412008065, + "grad_norm": 580.390869140625, + "learning_rate": 1.0868216017147437e-07, + "loss": 28.3383, + "step": 470080 + }, + { + "epoch": 0.9496115418334903, + "grad_norm": 363.58538818359375, + "learning_rate": 1.0860978792643528e-07, + "loss": 10.8664, + "step": 470090 + }, + { + "epoch": 0.949631742466174, + "grad_norm": 300.77783203125, + "learning_rate": 1.0853743952143836e-07, + "loss": 13.1416, + "step": 470100 + }, + { + "epoch": 0.9496519430988578, + "grad_norm": 192.88450622558594, + "learning_rate": 1.084651149568372e-07, + "loss": 15.3004, + "step": 470110 + }, + { + "epoch": 0.9496721437315416, + "grad_norm": 342.4608459472656, + "learning_rate": 1.0839281423298375e-07, + "loss": 12.5223, + "step": 470120 + }, + { + "epoch": 0.9496923443642254, + "grad_norm": 282.32489013671875, + "learning_rate": 1.0832053735022996e-07, + "loss": 13.0053, + "step": 470130 + }, + { + "epoch": 0.9497125449969093, + "grad_norm": 545.429443359375, + "learning_rate": 1.0824828430892831e-07, + "loss": 26.8959, + "step": 470140 + }, + { + "epoch": 0.9497327456295931, + "grad_norm": 491.9284973144531, + "learning_rate": 1.0817605510943241e-07, + "loss": 15.1654, + "step": 470150 + }, + { + "epoch": 0.9497529462622769, + "grad_norm": 368.24237060546875, + "learning_rate": 1.0810384975209254e-07, + "loss": 17.1497, + "step": 470160 + }, + { + "epoch": 0.9497731468949607, + "grad_norm": 299.6114501953125, + "learning_rate": 1.0803166823726064e-07, + "loss": 20.3, + "step": 470170 + }, + { + "epoch": 0.9497933475276445, + "grad_norm": 348.03900146484375, + "learning_rate": 1.0795951056528974e-07, + "loss": 22.592, + "step": 470180 + }, + { + "epoch": 0.9498135481603284, + "grad_norm": 344.5665283203125, + "learning_rate": 1.0788737673653072e-07, + "loss": 30.4026, + "step": 470190 + }, + { + "epoch": 0.9498337487930122, + "grad_norm": 424.1960144042969, + "learning_rate": 1.0781526675133492e-07, + "loss": 26.6032, + "step": 470200 + }, + { + "epoch": 0.949853949425696, + "grad_norm": 365.5455322265625, + "learning_rate": 1.0774318061005484e-07, + "loss": 16.6422, + "step": 470210 + }, + { + "epoch": 0.9498741500583798, + "grad_norm": 282.263427734375, + "learning_rate": 1.0767111831304022e-07, + "loss": 16.3358, + "step": 470220 + }, + { + "epoch": 0.9498943506910636, + "grad_norm": 394.3710632324219, + "learning_rate": 1.0759907986064411e-07, + "loss": 18.1085, + "step": 470230 + }, + { + "epoch": 0.9499145513237475, + "grad_norm": 353.6780090332031, + "learning_rate": 1.0752706525321622e-07, + "loss": 11.1399, + "step": 470240 + }, + { + "epoch": 0.9499347519564313, + "grad_norm": 156.2507781982422, + "learning_rate": 1.0745507449110792e-07, + "loss": 17.7139, + "step": 470250 + }, + { + "epoch": 0.9499549525891151, + "grad_norm": 188.82017517089844, + "learning_rate": 1.0738310757467064e-07, + "loss": 19.2339, + "step": 470260 + }, + { + "epoch": 0.9499751532217989, + "grad_norm": 126.23918914794922, + "learning_rate": 1.0731116450425461e-07, + "loss": 12.828, + "step": 470270 + }, + { + "epoch": 0.9499953538544827, + "grad_norm": 310.2344970703125, + "learning_rate": 1.0723924528021012e-07, + "loss": 19.8323, + "step": 470280 + }, + { + "epoch": 0.9500155544871666, + "grad_norm": 183.66943359375, + "learning_rate": 1.0716734990288801e-07, + "loss": 23.0236, + "step": 470290 + }, + { + "epoch": 0.9500357551198504, + "grad_norm": 388.44287109375, + "learning_rate": 1.0709547837263967e-07, + "loss": 21.3623, + "step": 470300 + }, + { + "epoch": 0.9500559557525342, + "grad_norm": 697.5735473632812, + "learning_rate": 1.0702363068981425e-07, + "loss": 34.2935, + "step": 470310 + }, + { + "epoch": 0.950076156385218, + "grad_norm": 300.3755798339844, + "learning_rate": 1.0695180685476148e-07, + "loss": 15.7304, + "step": 470320 + }, + { + "epoch": 0.9500963570179018, + "grad_norm": 380.5934143066406, + "learning_rate": 1.0688000686783272e-07, + "loss": 14.4428, + "step": 470330 + }, + { + "epoch": 0.9501165576505857, + "grad_norm": 265.9210205078125, + "learning_rate": 1.0680823072937774e-07, + "loss": 18.7776, + "step": 470340 + }, + { + "epoch": 0.9501367582832694, + "grad_norm": 251.99920654296875, + "learning_rate": 1.067364784397451e-07, + "loss": 26.2346, + "step": 470350 + }, + { + "epoch": 0.9501569589159532, + "grad_norm": 716.3020629882812, + "learning_rate": 1.0666474999928566e-07, + "loss": 26.2636, + "step": 470360 + }, + { + "epoch": 0.950177159548637, + "grad_norm": 427.94134521484375, + "learning_rate": 1.0659304540834914e-07, + "loss": 16.5608, + "step": 470370 + }, + { + "epoch": 0.9501973601813208, + "grad_norm": 475.6017150878906, + "learning_rate": 1.0652136466728468e-07, + "loss": 17.7835, + "step": 470380 + }, + { + "epoch": 0.9502175608140047, + "grad_norm": 361.806396484375, + "learning_rate": 1.0644970777644093e-07, + "loss": 9.2336, + "step": 470390 + }, + { + "epoch": 0.9502377614466885, + "grad_norm": 181.4495391845703, + "learning_rate": 1.0637807473616812e-07, + "loss": 33.6267, + "step": 470400 + }, + { + "epoch": 0.9502579620793723, + "grad_norm": 449.17926025390625, + "learning_rate": 1.0630646554681545e-07, + "loss": 19.7068, + "step": 470410 + }, + { + "epoch": 0.9502781627120561, + "grad_norm": 294.85003662109375, + "learning_rate": 1.0623488020873097e-07, + "loss": 26.1473, + "step": 470420 + }, + { + "epoch": 0.9502983633447399, + "grad_norm": 325.1664123535156, + "learning_rate": 1.0616331872226437e-07, + "loss": 17.327, + "step": 470430 + }, + { + "epoch": 0.9503185639774238, + "grad_norm": 345.0419006347656, + "learning_rate": 1.0609178108776375e-07, + "loss": 14.0324, + "step": 470440 + }, + { + "epoch": 0.9503387646101076, + "grad_norm": 3.2714545726776123, + "learning_rate": 1.0602026730557879e-07, + "loss": 15.103, + "step": 470450 + }, + { + "epoch": 0.9503589652427914, + "grad_norm": 419.8089294433594, + "learning_rate": 1.0594877737605702e-07, + "loss": 13.4397, + "step": 470460 + }, + { + "epoch": 0.9503791658754752, + "grad_norm": 297.6645812988281, + "learning_rate": 1.0587731129954815e-07, + "loss": 16.4085, + "step": 470470 + }, + { + "epoch": 0.950399366508159, + "grad_norm": 258.7247619628906, + "learning_rate": 1.0580586907639912e-07, + "loss": 15.0832, + "step": 470480 + }, + { + "epoch": 0.9504195671408429, + "grad_norm": 662.227294921875, + "learning_rate": 1.0573445070695853e-07, + "loss": 15.39, + "step": 470490 + }, + { + "epoch": 0.9504397677735267, + "grad_norm": 151.84469604492188, + "learning_rate": 1.0566305619157502e-07, + "loss": 19.7452, + "step": 470500 + }, + { + "epoch": 0.9504599684062105, + "grad_norm": 241.95960998535156, + "learning_rate": 1.0559168553059551e-07, + "loss": 22.2678, + "step": 470510 + }, + { + "epoch": 0.9504801690388943, + "grad_norm": 199.27572631835938, + "learning_rate": 1.0552033872436917e-07, + "loss": 13.4917, + "step": 470520 + }, + { + "epoch": 0.9505003696715781, + "grad_norm": 385.2051696777344, + "learning_rate": 1.0544901577324351e-07, + "loss": 16.1362, + "step": 470530 + }, + { + "epoch": 0.950520570304262, + "grad_norm": 159.27716064453125, + "learning_rate": 1.0537771667756436e-07, + "loss": 14.5987, + "step": 470540 + }, + { + "epoch": 0.9505407709369458, + "grad_norm": 257.7624816894531, + "learning_rate": 1.0530644143768143e-07, + "loss": 18.522, + "step": 470550 + }, + { + "epoch": 0.9505609715696296, + "grad_norm": 122.473388671875, + "learning_rate": 1.0523519005394167e-07, + "loss": 18.2859, + "step": 470560 + }, + { + "epoch": 0.9505811722023134, + "grad_norm": 326.22698974609375, + "learning_rate": 1.0516396252669092e-07, + "loss": 10.2913, + "step": 470570 + }, + { + "epoch": 0.9506013728349972, + "grad_norm": 234.60018920898438, + "learning_rate": 1.0509275885627779e-07, + "loss": 9.2405, + "step": 470580 + }, + { + "epoch": 0.9506215734676811, + "grad_norm": 449.334716796875, + "learning_rate": 1.0502157904304866e-07, + "loss": 13.7069, + "step": 470590 + }, + { + "epoch": 0.9506417741003649, + "grad_norm": 455.3882141113281, + "learning_rate": 1.0495042308735104e-07, + "loss": 19.8605, + "step": 470600 + }, + { + "epoch": 0.9506619747330486, + "grad_norm": 31.53827667236328, + "learning_rate": 1.0487929098953131e-07, + "loss": 11.4078, + "step": 470610 + }, + { + "epoch": 0.9506821753657324, + "grad_norm": 430.6929016113281, + "learning_rate": 1.0480818274993587e-07, + "loss": 10.1137, + "step": 470620 + }, + { + "epoch": 0.9507023759984162, + "grad_norm": 223.26327514648438, + "learning_rate": 1.0473709836891222e-07, + "loss": 12.6717, + "step": 470630 + }, + { + "epoch": 0.9507225766311, + "grad_norm": 348.0071716308594, + "learning_rate": 1.0466603784680562e-07, + "loss": 17.5693, + "step": 470640 + }, + { + "epoch": 0.9507427772637839, + "grad_norm": 654.5459594726562, + "learning_rate": 1.0459500118396304e-07, + "loss": 19.3442, + "step": 470650 + }, + { + "epoch": 0.9507629778964677, + "grad_norm": 272.2804870605469, + "learning_rate": 1.0452398838073141e-07, + "loss": 11.1679, + "step": 470660 + }, + { + "epoch": 0.9507831785291515, + "grad_norm": 324.45819091796875, + "learning_rate": 1.0445299943745546e-07, + "loss": 19.1832, + "step": 470670 + }, + { + "epoch": 0.9508033791618353, + "grad_norm": 358.24114990234375, + "learning_rate": 1.0438203435448157e-07, + "loss": 38.1832, + "step": 470680 + }, + { + "epoch": 0.9508235797945191, + "grad_norm": 1002.8253784179688, + "learning_rate": 1.0431109313215671e-07, + "loss": 32.5478, + "step": 470690 + }, + { + "epoch": 0.950843780427203, + "grad_norm": 336.9682312011719, + "learning_rate": 1.0424017577082556e-07, + "loss": 13.8397, + "step": 470700 + }, + { + "epoch": 0.9508639810598868, + "grad_norm": 0.0, + "learning_rate": 1.0416928227083345e-07, + "loss": 16.7932, + "step": 470710 + }, + { + "epoch": 0.9508841816925706, + "grad_norm": 398.4461364746094, + "learning_rate": 1.0409841263252673e-07, + "loss": 18.7359, + "step": 470720 + }, + { + "epoch": 0.9509043823252544, + "grad_norm": 299.9626159667969, + "learning_rate": 1.040275668562507e-07, + "loss": 20.0693, + "step": 470730 + }, + { + "epoch": 0.9509245829579382, + "grad_norm": 121.19273376464844, + "learning_rate": 1.0395674494235064e-07, + "loss": 25.1212, + "step": 470740 + }, + { + "epoch": 0.9509447835906221, + "grad_norm": 273.7412414550781, + "learning_rate": 1.038859468911707e-07, + "loss": 8.6719, + "step": 470750 + }, + { + "epoch": 0.9509649842233059, + "grad_norm": 4.967778205871582, + "learning_rate": 1.0381517270305786e-07, + "loss": 25.2678, + "step": 470760 + }, + { + "epoch": 0.9509851848559897, + "grad_norm": 0.9025456309318542, + "learning_rate": 1.0374442237835625e-07, + "loss": 7.366, + "step": 470770 + }, + { + "epoch": 0.9510053854886735, + "grad_norm": 134.75787353515625, + "learning_rate": 1.036736959174095e-07, + "loss": 13.4244, + "step": 470780 + }, + { + "epoch": 0.9510255861213573, + "grad_norm": 138.00926208496094, + "learning_rate": 1.03602993320564e-07, + "loss": 15.6263, + "step": 470790 + }, + { + "epoch": 0.9510457867540412, + "grad_norm": 460.781005859375, + "learning_rate": 1.0353231458816338e-07, + "loss": 13.2165, + "step": 470800 + }, + { + "epoch": 0.951065987386725, + "grad_norm": 607.1260375976562, + "learning_rate": 1.0346165972055233e-07, + "loss": 36.6935, + "step": 470810 + }, + { + "epoch": 0.9510861880194088, + "grad_norm": 236.06959533691406, + "learning_rate": 1.0339102871807505e-07, + "loss": 15.51, + "step": 470820 + }, + { + "epoch": 0.9511063886520926, + "grad_norm": 1040.3648681640625, + "learning_rate": 1.0332042158107624e-07, + "loss": 20.5573, + "step": 470830 + }, + { + "epoch": 0.9511265892847764, + "grad_norm": 136.85296630859375, + "learning_rate": 1.032498383099001e-07, + "loss": 8.7666, + "step": 470840 + }, + { + "epoch": 0.9511467899174603, + "grad_norm": 25.13045883178711, + "learning_rate": 1.0317927890489021e-07, + "loss": 12.975, + "step": 470850 + }, + { + "epoch": 0.951166990550144, + "grad_norm": 169.87399291992188, + "learning_rate": 1.0310874336639021e-07, + "loss": 30.7924, + "step": 470860 + }, + { + "epoch": 0.9511871911828278, + "grad_norm": 432.50494384765625, + "learning_rate": 1.030382316947448e-07, + "loss": 13.3448, + "step": 470870 + }, + { + "epoch": 0.9512073918155116, + "grad_norm": 150.0078887939453, + "learning_rate": 1.0296774389029707e-07, + "loss": 21.6321, + "step": 470880 + }, + { + "epoch": 0.9512275924481954, + "grad_norm": 23.30535316467285, + "learning_rate": 1.0289727995339005e-07, + "loss": 10.8087, + "step": 470890 + }, + { + "epoch": 0.9512477930808793, + "grad_norm": 120.26686096191406, + "learning_rate": 1.0282683988436792e-07, + "loss": 12.9034, + "step": 470900 + }, + { + "epoch": 0.9512679937135631, + "grad_norm": 24.445892333984375, + "learning_rate": 1.027564236835743e-07, + "loss": 16.5104, + "step": 470910 + }, + { + "epoch": 0.9512881943462469, + "grad_norm": 125.10945129394531, + "learning_rate": 1.0268603135135169e-07, + "loss": 13.0459, + "step": 470920 + }, + { + "epoch": 0.9513083949789307, + "grad_norm": 604.3030395507812, + "learning_rate": 1.0261566288804315e-07, + "loss": 20.8064, + "step": 470930 + }, + { + "epoch": 0.9513285956116145, + "grad_norm": 217.4271240234375, + "learning_rate": 1.0254531829399228e-07, + "loss": 13.045, + "step": 470940 + }, + { + "epoch": 0.9513487962442984, + "grad_norm": 168.72885131835938, + "learning_rate": 1.024749975695416e-07, + "loss": 14.6424, + "step": 470950 + }, + { + "epoch": 0.9513689968769822, + "grad_norm": 293.8639221191406, + "learning_rate": 1.0240470071503306e-07, + "loss": 11.9183, + "step": 470960 + }, + { + "epoch": 0.951389197509666, + "grad_norm": 154.8897705078125, + "learning_rate": 1.0233442773081026e-07, + "loss": 20.1196, + "step": 470970 + }, + { + "epoch": 0.9514093981423498, + "grad_norm": 260.27703857421875, + "learning_rate": 1.0226417861721571e-07, + "loss": 7.7497, + "step": 470980 + }, + { + "epoch": 0.9514295987750336, + "grad_norm": 214.36672973632812, + "learning_rate": 1.0219395337459137e-07, + "loss": 6.1547, + "step": 470990 + }, + { + "epoch": 0.9514497994077175, + "grad_norm": 400.2989807128906, + "learning_rate": 1.0212375200327973e-07, + "loss": 18.0779, + "step": 471000 + }, + { + "epoch": 0.9514700000404013, + "grad_norm": 151.48472595214844, + "learning_rate": 1.0205357450362275e-07, + "loss": 11.009, + "step": 471010 + }, + { + "epoch": 0.9514902006730851, + "grad_norm": 3.5933449268341064, + "learning_rate": 1.0198342087596292e-07, + "loss": 16.8308, + "step": 471020 + }, + { + "epoch": 0.9515104013057689, + "grad_norm": 340.96038818359375, + "learning_rate": 1.0191329112064164e-07, + "loss": 17.0175, + "step": 471030 + }, + { + "epoch": 0.9515306019384527, + "grad_norm": 431.24090576171875, + "learning_rate": 1.0184318523800086e-07, + "loss": 13.8514, + "step": 471040 + }, + { + "epoch": 0.9515508025711366, + "grad_norm": 370.28778076171875, + "learning_rate": 1.0177310322838251e-07, + "loss": 15.9059, + "step": 471050 + }, + { + "epoch": 0.9515710032038204, + "grad_norm": 238.84820556640625, + "learning_rate": 1.0170304509212803e-07, + "loss": 20.2611, + "step": 471060 + }, + { + "epoch": 0.9515912038365042, + "grad_norm": 171.47479248046875, + "learning_rate": 1.0163301082957821e-07, + "loss": 21.947, + "step": 471070 + }, + { + "epoch": 0.951611404469188, + "grad_norm": 345.8973693847656, + "learning_rate": 1.0156300044107559e-07, + "loss": 11.2068, + "step": 471080 + }, + { + "epoch": 0.9516316051018718, + "grad_norm": 182.06565856933594, + "learning_rate": 1.0149301392696098e-07, + "loss": 15.6309, + "step": 471090 + }, + { + "epoch": 0.9516518057345557, + "grad_norm": 104.55928802490234, + "learning_rate": 1.0142305128757468e-07, + "loss": 22.0133, + "step": 471100 + }, + { + "epoch": 0.9516720063672395, + "grad_norm": 233.50936889648438, + "learning_rate": 1.0135311252325863e-07, + "loss": 12.8193, + "step": 471110 + }, + { + "epoch": 0.9516922069999232, + "grad_norm": 240.9148406982422, + "learning_rate": 1.0128319763435312e-07, + "loss": 14.8339, + "step": 471120 + }, + { + "epoch": 0.951712407632607, + "grad_norm": 221.30917358398438, + "learning_rate": 1.0121330662119954e-07, + "loss": 12.9855, + "step": 471130 + }, + { + "epoch": 0.9517326082652908, + "grad_norm": 175.91488647460938, + "learning_rate": 1.0114343948413818e-07, + "loss": 13.9831, + "step": 471140 + }, + { + "epoch": 0.9517528088979746, + "grad_norm": 618.2254638671875, + "learning_rate": 1.0107359622350877e-07, + "loss": 27.8967, + "step": 471150 + }, + { + "epoch": 0.9517730095306585, + "grad_norm": 109.8958740234375, + "learning_rate": 1.0100377683965323e-07, + "loss": 13.6493, + "step": 471160 + }, + { + "epoch": 0.9517932101633423, + "grad_norm": 235.784912109375, + "learning_rate": 1.0093398133291132e-07, + "loss": 16.3887, + "step": 471170 + }, + { + "epoch": 0.9518134107960261, + "grad_norm": 414.00244140625, + "learning_rate": 1.0086420970362221e-07, + "loss": 17.9258, + "step": 471180 + }, + { + "epoch": 0.9518336114287099, + "grad_norm": 336.341552734375, + "learning_rate": 1.0079446195212728e-07, + "loss": 28.1355, + "step": 471190 + }, + { + "epoch": 0.9518538120613937, + "grad_norm": 264.4357604980469, + "learning_rate": 1.007247380787657e-07, + "loss": 17.1038, + "step": 471200 + }, + { + "epoch": 0.9518740126940776, + "grad_norm": 768.3027954101562, + "learning_rate": 1.0065503808387777e-07, + "loss": 24.6599, + "step": 471210 + }, + { + "epoch": 0.9518942133267614, + "grad_norm": 95.99604034423828, + "learning_rate": 1.0058536196780266e-07, + "loss": 11.4118, + "step": 471220 + }, + { + "epoch": 0.9519144139594452, + "grad_norm": 242.30259704589844, + "learning_rate": 1.0051570973088064e-07, + "loss": 17.5281, + "step": 471230 + }, + { + "epoch": 0.951934614592129, + "grad_norm": 58.12041091918945, + "learning_rate": 1.0044608137345091e-07, + "loss": 11.3245, + "step": 471240 + }, + { + "epoch": 0.9519548152248128, + "grad_norm": 324.6882629394531, + "learning_rate": 1.0037647689585207e-07, + "loss": 14.2132, + "step": 471250 + }, + { + "epoch": 0.9519750158574967, + "grad_norm": 408.84527587890625, + "learning_rate": 1.0030689629842382e-07, + "loss": 19.237, + "step": 471260 + }, + { + "epoch": 0.9519952164901805, + "grad_norm": 254.10365295410156, + "learning_rate": 1.0023733958150706e-07, + "loss": 19.3805, + "step": 471270 + }, + { + "epoch": 0.9520154171228643, + "grad_norm": 380.91107177734375, + "learning_rate": 1.0016780674543813e-07, + "loss": 15.11, + "step": 471280 + }, + { + "epoch": 0.9520356177555481, + "grad_norm": 117.3650131225586, + "learning_rate": 1.0009829779055679e-07, + "loss": 6.3612, + "step": 471290 + }, + { + "epoch": 0.952055818388232, + "grad_norm": 166.88565063476562, + "learning_rate": 1.0002881271720222e-07, + "loss": 14.1588, + "step": 471300 + }, + { + "epoch": 0.9520760190209158, + "grad_norm": 892.7716674804688, + "learning_rate": 9.995935152571357e-08, + "loss": 19.1736, + "step": 471310 + }, + { + "epoch": 0.9520962196535996, + "grad_norm": 268.48779296875, + "learning_rate": 9.988991421642779e-08, + "loss": 15.5897, + "step": 471320 + }, + { + "epoch": 0.9521164202862834, + "grad_norm": 214.12991333007812, + "learning_rate": 9.98205007896852e-08, + "loss": 11.805, + "step": 471330 + }, + { + "epoch": 0.9521366209189672, + "grad_norm": 79.6239242553711, + "learning_rate": 9.975111124582271e-08, + "loss": 18.7248, + "step": 471340 + }, + { + "epoch": 0.952156821551651, + "grad_norm": 0.0, + "learning_rate": 9.968174558517895e-08, + "loss": 10.0281, + "step": 471350 + }, + { + "epoch": 0.9521770221843349, + "grad_norm": 279.0708312988281, + "learning_rate": 9.961240380809201e-08, + "loss": 17.1007, + "step": 471360 + }, + { + "epoch": 0.9521972228170186, + "grad_norm": 392.2039794921875, + "learning_rate": 9.954308591489991e-08, + "loss": 27.0333, + "step": 471370 + }, + { + "epoch": 0.9522174234497024, + "grad_norm": 0.0, + "learning_rate": 9.947379190594076e-08, + "loss": 22.3938, + "step": 471380 + }, + { + "epoch": 0.9522376240823862, + "grad_norm": 460.63726806640625, + "learning_rate": 9.940452178155147e-08, + "loss": 20.6769, + "step": 471390 + }, + { + "epoch": 0.95225782471507, + "grad_norm": 318.01348876953125, + "learning_rate": 9.933527554207012e-08, + "loss": 23.6312, + "step": 471400 + }, + { + "epoch": 0.9522780253477539, + "grad_norm": 459.3076477050781, + "learning_rate": 9.926605318783477e-08, + "loss": 22.5429, + "step": 471410 + }, + { + "epoch": 0.9522982259804377, + "grad_norm": 373.6624755859375, + "learning_rate": 9.919685471918183e-08, + "loss": 18.406, + "step": 471420 + }, + { + "epoch": 0.9523184266131215, + "grad_norm": 182.34884643554688, + "learning_rate": 9.912768013644936e-08, + "loss": 17.1703, + "step": 471430 + }, + { + "epoch": 0.9523386272458053, + "grad_norm": 388.4763488769531, + "learning_rate": 9.905852943997374e-08, + "loss": 12.4804, + "step": 471440 + }, + { + "epoch": 0.9523588278784891, + "grad_norm": 151.5099639892578, + "learning_rate": 9.898940263009304e-08, + "loss": 10.5884, + "step": 471450 + }, + { + "epoch": 0.952379028511173, + "grad_norm": 247.20875549316406, + "learning_rate": 9.892029970714367e-08, + "loss": 14.7343, + "step": 471460 + }, + { + "epoch": 0.9523992291438568, + "grad_norm": 171.4412384033203, + "learning_rate": 9.885122067146147e-08, + "loss": 15.0036, + "step": 471470 + }, + { + "epoch": 0.9524194297765406, + "grad_norm": 182.62937927246094, + "learning_rate": 9.878216552338504e-08, + "loss": 11.706, + "step": 471480 + }, + { + "epoch": 0.9524396304092244, + "grad_norm": 360.761962890625, + "learning_rate": 9.871313426324913e-08, + "loss": 34.3857, + "step": 471490 + }, + { + "epoch": 0.9524598310419082, + "grad_norm": 96.55928039550781, + "learning_rate": 9.864412689139124e-08, + "loss": 14.3127, + "step": 471500 + }, + { + "epoch": 0.952480031674592, + "grad_norm": 144.3299560546875, + "learning_rate": 9.857514340814667e-08, + "loss": 12.1043, + "step": 471510 + }, + { + "epoch": 0.9525002323072759, + "grad_norm": 324.63421630859375, + "learning_rate": 9.850618381385346e-08, + "loss": 12.5964, + "step": 471520 + }, + { + "epoch": 0.9525204329399597, + "grad_norm": 260.4071044921875, + "learning_rate": 9.843724810884636e-08, + "loss": 10.9636, + "step": 471530 + }, + { + "epoch": 0.9525406335726435, + "grad_norm": 204.39683532714844, + "learning_rate": 9.836833629346121e-08, + "loss": 13.2621, + "step": 471540 + }, + { + "epoch": 0.9525608342053273, + "grad_norm": 417.4044189453125, + "learning_rate": 9.82994483680344e-08, + "loss": 21.0501, + "step": 471550 + }, + { + "epoch": 0.9525810348380112, + "grad_norm": 186.35858154296875, + "learning_rate": 9.823058433290178e-08, + "loss": 14.8036, + "step": 471560 + }, + { + "epoch": 0.952601235470695, + "grad_norm": 221.61087036132812, + "learning_rate": 9.816174418839863e-08, + "loss": 14.2294, + "step": 471570 + }, + { + "epoch": 0.9526214361033788, + "grad_norm": 43.475059509277344, + "learning_rate": 9.809292793486025e-08, + "loss": 11.6246, + "step": 471580 + }, + { + "epoch": 0.9526416367360626, + "grad_norm": 436.1901550292969, + "learning_rate": 9.802413557262302e-08, + "loss": 18.3258, + "step": 471590 + }, + { + "epoch": 0.9526618373687464, + "grad_norm": 393.25921630859375, + "learning_rate": 9.795536710202169e-08, + "loss": 16.0255, + "step": 471600 + }, + { + "epoch": 0.9526820380014303, + "grad_norm": 248.4064483642578, + "learning_rate": 9.788662252339099e-08, + "loss": 16.6259, + "step": 471610 + }, + { + "epoch": 0.9527022386341141, + "grad_norm": 183.06944274902344, + "learning_rate": 9.781790183706674e-08, + "loss": 19.8227, + "step": 471620 + }, + { + "epoch": 0.9527224392667978, + "grad_norm": 277.7666015625, + "learning_rate": 9.774920504338315e-08, + "loss": 27.9314, + "step": 471630 + }, + { + "epoch": 0.9527426398994816, + "grad_norm": 0.0, + "learning_rate": 9.768053214267548e-08, + "loss": 17.4172, + "step": 471640 + }, + { + "epoch": 0.9527628405321654, + "grad_norm": 146.64297485351562, + "learning_rate": 9.761188313527792e-08, + "loss": 16.2158, + "step": 471650 + }, + { + "epoch": 0.9527830411648492, + "grad_norm": 15.5716552734375, + "learning_rate": 9.754325802152575e-08, + "loss": 10.0231, + "step": 471660 + }, + { + "epoch": 0.9528032417975331, + "grad_norm": 118.41764831542969, + "learning_rate": 9.747465680175316e-08, + "loss": 15.2342, + "step": 471670 + }, + { + "epoch": 0.9528234424302169, + "grad_norm": 186.61383056640625, + "learning_rate": 9.740607947629433e-08, + "loss": 17.2289, + "step": 471680 + }, + { + "epoch": 0.9528436430629007, + "grad_norm": 441.5155944824219, + "learning_rate": 9.733752604548397e-08, + "loss": 23.8443, + "step": 471690 + }, + { + "epoch": 0.9528638436955845, + "grad_norm": 549.6482543945312, + "learning_rate": 9.726899650965626e-08, + "loss": 21.348, + "step": 471700 + }, + { + "epoch": 0.9528840443282683, + "grad_norm": 234.8240509033203, + "learning_rate": 9.720049086914374e-08, + "loss": 21.2825, + "step": 471710 + }, + { + "epoch": 0.9529042449609522, + "grad_norm": 252.1730499267578, + "learning_rate": 9.713200912428222e-08, + "loss": 23.3847, + "step": 471720 + }, + { + "epoch": 0.952924445593636, + "grad_norm": 107.79186248779297, + "learning_rate": 9.706355127540423e-08, + "loss": 12.5654, + "step": 471730 + }, + { + "epoch": 0.9529446462263198, + "grad_norm": 59.06559371948242, + "learning_rate": 9.699511732284395e-08, + "loss": 13.7314, + "step": 471740 + }, + { + "epoch": 0.9529648468590036, + "grad_norm": 165.02932739257812, + "learning_rate": 9.692670726693498e-08, + "loss": 11.965, + "step": 471750 + }, + { + "epoch": 0.9529850474916874, + "grad_norm": 11.239250183105469, + "learning_rate": 9.68583211080104e-08, + "loss": 12.9003, + "step": 471760 + }, + { + "epoch": 0.9530052481243713, + "grad_norm": 470.4634094238281, + "learning_rate": 9.678995884640385e-08, + "loss": 19.7971, + "step": 471770 + }, + { + "epoch": 0.9530254487570551, + "grad_norm": 160.3477783203125, + "learning_rate": 9.672162048244838e-08, + "loss": 19.9627, + "step": 471780 + }, + { + "epoch": 0.9530456493897389, + "grad_norm": 55.016910552978516, + "learning_rate": 9.66533060164765e-08, + "loss": 11.3744, + "step": 471790 + }, + { + "epoch": 0.9530658500224227, + "grad_norm": 407.41015625, + "learning_rate": 9.658501544882182e-08, + "loss": 16.6285, + "step": 471800 + }, + { + "epoch": 0.9530860506551065, + "grad_norm": 283.1016540527344, + "learning_rate": 9.651674877981743e-08, + "loss": 16.5085, + "step": 471810 + }, + { + "epoch": 0.9531062512877904, + "grad_norm": 219.71046447753906, + "learning_rate": 9.644850600979583e-08, + "loss": 24.2003, + "step": 471820 + }, + { + "epoch": 0.9531264519204742, + "grad_norm": 236.0611114501953, + "learning_rate": 9.638028713908898e-08, + "loss": 10.6865, + "step": 471830 + }, + { + "epoch": 0.953146652553158, + "grad_norm": 228.70433044433594, + "learning_rate": 9.63120921680305e-08, + "loss": 13.9107, + "step": 471840 + }, + { + "epoch": 0.9531668531858418, + "grad_norm": 213.1024169921875, + "learning_rate": 9.62439210969518e-08, + "loss": 17.9185, + "step": 471850 + }, + { + "epoch": 0.9531870538185256, + "grad_norm": 306.6065673828125, + "learning_rate": 9.617577392618538e-08, + "loss": 16.2468, + "step": 471860 + }, + { + "epoch": 0.9532072544512095, + "grad_norm": 266.8642883300781, + "learning_rate": 9.61076506560632e-08, + "loss": 13.3353, + "step": 471870 + }, + { + "epoch": 0.9532274550838933, + "grad_norm": 342.0880126953125, + "learning_rate": 9.603955128691833e-08, + "loss": 11.9232, + "step": 471880 + }, + { + "epoch": 0.953247655716577, + "grad_norm": 213.37538146972656, + "learning_rate": 9.597147581908107e-08, + "loss": 12.8125, + "step": 471890 + }, + { + "epoch": 0.9532678563492608, + "grad_norm": 186.4586944580078, + "learning_rate": 9.590342425288446e-08, + "loss": 16.5308, + "step": 471900 + }, + { + "epoch": 0.9532880569819446, + "grad_norm": 446.7265625, + "learning_rate": 9.583539658865992e-08, + "loss": 12.6748, + "step": 471910 + }, + { + "epoch": 0.9533082576146285, + "grad_norm": 308.08984375, + "learning_rate": 9.576739282673886e-08, + "loss": 13.9051, + "step": 471920 + }, + { + "epoch": 0.9533284582473123, + "grad_norm": 381.631591796875, + "learning_rate": 9.569941296745212e-08, + "loss": 24.1847, + "step": 471930 + }, + { + "epoch": 0.9533486588799961, + "grad_norm": 297.9588317871094, + "learning_rate": 9.563145701113219e-08, + "loss": 33.634, + "step": 471940 + }, + { + "epoch": 0.9533688595126799, + "grad_norm": 29.095582962036133, + "learning_rate": 9.556352495810994e-08, + "loss": 13.6166, + "step": 471950 + }, + { + "epoch": 0.9533890601453637, + "grad_norm": 594.6957397460938, + "learning_rate": 9.549561680871566e-08, + "loss": 28.8783, + "step": 471960 + }, + { + "epoch": 0.9534092607780476, + "grad_norm": 399.8542175292969, + "learning_rate": 9.542773256328075e-08, + "loss": 18.6644, + "step": 471970 + }, + { + "epoch": 0.9534294614107314, + "grad_norm": 143.90599060058594, + "learning_rate": 9.53598722221366e-08, + "loss": 13.9902, + "step": 471980 + }, + { + "epoch": 0.9534496620434152, + "grad_norm": 51.62968444824219, + "learning_rate": 9.529203578561353e-08, + "loss": 16.3586, + "step": 471990 + }, + { + "epoch": 0.953469862676099, + "grad_norm": 589.9480590820312, + "learning_rate": 9.522422325404234e-08, + "loss": 24.4369, + "step": 472000 + }, + { + "epoch": 0.9534900633087828, + "grad_norm": 291.8258361816406, + "learning_rate": 9.515643462775337e-08, + "loss": 24.2265, + "step": 472010 + }, + { + "epoch": 0.9535102639414667, + "grad_norm": 483.8877258300781, + "learning_rate": 9.508866990707688e-08, + "loss": 16.6844, + "step": 472020 + }, + { + "epoch": 0.9535304645741505, + "grad_norm": 495.4203796386719, + "learning_rate": 9.502092909234317e-08, + "loss": 18.2375, + "step": 472030 + }, + { + "epoch": 0.9535506652068343, + "grad_norm": 362.7491760253906, + "learning_rate": 9.495321218388309e-08, + "loss": 26.3503, + "step": 472040 + }, + { + "epoch": 0.9535708658395181, + "grad_norm": 207.70855712890625, + "learning_rate": 9.488551918202527e-08, + "loss": 10.3865, + "step": 472050 + }, + { + "epoch": 0.9535910664722019, + "grad_norm": 382.47027587890625, + "learning_rate": 9.481785008710165e-08, + "loss": 21.5717, + "step": 472060 + }, + { + "epoch": 0.9536112671048858, + "grad_norm": 213.85243225097656, + "learning_rate": 9.475020489944032e-08, + "loss": 7.1432, + "step": 472070 + }, + { + "epoch": 0.9536314677375696, + "grad_norm": 305.5526123046875, + "learning_rate": 9.468258361937155e-08, + "loss": 14.1012, + "step": 472080 + }, + { + "epoch": 0.9536516683702534, + "grad_norm": 192.07510375976562, + "learning_rate": 9.461498624722509e-08, + "loss": 13.8535, + "step": 472090 + }, + { + "epoch": 0.9536718690029372, + "grad_norm": 718.9573364257812, + "learning_rate": 9.454741278333013e-08, + "loss": 20.1048, + "step": 472100 + }, + { + "epoch": 0.953692069635621, + "grad_norm": 213.24778747558594, + "learning_rate": 9.447986322801583e-08, + "loss": 18.3788, + "step": 472110 + }, + { + "epoch": 0.9537122702683049, + "grad_norm": 92.05439758300781, + "learning_rate": 9.441233758161139e-08, + "loss": 11.6426, + "step": 472120 + }, + { + "epoch": 0.9537324709009887, + "grad_norm": 32.383121490478516, + "learning_rate": 9.434483584444709e-08, + "loss": 12.3286, + "step": 472130 + }, + { + "epoch": 0.9537526715336724, + "grad_norm": 90.15750122070312, + "learning_rate": 9.427735801685101e-08, + "loss": 11.0646, + "step": 472140 + }, + { + "epoch": 0.9537728721663562, + "grad_norm": 607.479248046875, + "learning_rate": 9.420990409915176e-08, + "loss": 27.6571, + "step": 472150 + }, + { + "epoch": 0.95379307279904, + "grad_norm": 428.8316345214844, + "learning_rate": 9.414247409167854e-08, + "loss": 20.6816, + "step": 472160 + }, + { + "epoch": 0.9538132734317238, + "grad_norm": 114.02137756347656, + "learning_rate": 9.407506799475996e-08, + "loss": 20.4459, + "step": 472170 + }, + { + "epoch": 0.9538334740644077, + "grad_norm": 25.569461822509766, + "learning_rate": 9.400768580872411e-08, + "loss": 12.369, + "step": 472180 + }, + { + "epoch": 0.9538536746970915, + "grad_norm": 149.68170166015625, + "learning_rate": 9.394032753390014e-08, + "loss": 12.3208, + "step": 472190 + }, + { + "epoch": 0.9538738753297753, + "grad_norm": 2.610166311264038, + "learning_rate": 9.387299317061615e-08, + "loss": 13.1623, + "step": 472200 + }, + { + "epoch": 0.9538940759624591, + "grad_norm": 71.14301300048828, + "learning_rate": 9.380568271919966e-08, + "loss": 6.1099, + "step": 472210 + }, + { + "epoch": 0.9539142765951429, + "grad_norm": 137.8601531982422, + "learning_rate": 9.373839617997926e-08, + "loss": 19.6041, + "step": 472220 + }, + { + "epoch": 0.9539344772278268, + "grad_norm": 227.89263916015625, + "learning_rate": 9.367113355328361e-08, + "loss": 17.0364, + "step": 472230 + }, + { + "epoch": 0.9539546778605106, + "grad_norm": 239.04676818847656, + "learning_rate": 9.36038948394391e-08, + "loss": 20.7242, + "step": 472240 + }, + { + "epoch": 0.9539748784931944, + "grad_norm": 242.86077880859375, + "learning_rate": 9.353668003877437e-08, + "loss": 26.4324, + "step": 472250 + }, + { + "epoch": 0.9539950791258782, + "grad_norm": 222.70523071289062, + "learning_rate": 9.346948915161636e-08, + "loss": 18.3903, + "step": 472260 + }, + { + "epoch": 0.954015279758562, + "grad_norm": 197.29324340820312, + "learning_rate": 9.340232217829371e-08, + "loss": 11.9223, + "step": 472270 + }, + { + "epoch": 0.9540354803912459, + "grad_norm": 477.68975830078125, + "learning_rate": 9.333517911913281e-08, + "loss": 14.8718, + "step": 472280 + }, + { + "epoch": 0.9540556810239297, + "grad_norm": 248.9418182373047, + "learning_rate": 9.326805997446065e-08, + "loss": 30.3828, + "step": 472290 + }, + { + "epoch": 0.9540758816566135, + "grad_norm": 326.0284729003906, + "learning_rate": 9.320096474460527e-08, + "loss": 14.7554, + "step": 472300 + }, + { + "epoch": 0.9540960822892973, + "grad_norm": 496.3621826171875, + "learning_rate": 9.31338934298931e-08, + "loss": 15.1102, + "step": 472310 + }, + { + "epoch": 0.9541162829219811, + "grad_norm": 335.30010986328125, + "learning_rate": 9.306684603065108e-08, + "loss": 16.1921, + "step": 472320 + }, + { + "epoch": 0.954136483554665, + "grad_norm": 271.0550537109375, + "learning_rate": 9.299982254720674e-08, + "loss": 11.4808, + "step": 472330 + }, + { + "epoch": 0.9541566841873488, + "grad_norm": 316.6662902832031, + "learning_rate": 9.293282297988537e-08, + "loss": 25.0978, + "step": 472340 + }, + { + "epoch": 0.9541768848200326, + "grad_norm": 146.21981811523438, + "learning_rate": 9.28658473290145e-08, + "loss": 18.0033, + "step": 472350 + }, + { + "epoch": 0.9541970854527164, + "grad_norm": 153.33340454101562, + "learning_rate": 9.27988955949205e-08, + "loss": 20.8644, + "step": 472360 + }, + { + "epoch": 0.9542172860854002, + "grad_norm": 101.7238540649414, + "learning_rate": 9.273196777792926e-08, + "loss": 19.9834, + "step": 472370 + }, + { + "epoch": 0.9542374867180841, + "grad_norm": 636.7554931640625, + "learning_rate": 9.266506387836771e-08, + "loss": 19.0273, + "step": 472380 + }, + { + "epoch": 0.9542576873507679, + "grad_norm": 293.48046875, + "learning_rate": 9.259818389656117e-08, + "loss": 10.0128, + "step": 472390 + }, + { + "epoch": 0.9542778879834516, + "grad_norm": 214.25135803222656, + "learning_rate": 9.253132783283548e-08, + "loss": 17.1083, + "step": 472400 + }, + { + "epoch": 0.9542980886161354, + "grad_norm": 296.0226135253906, + "learning_rate": 9.246449568751702e-08, + "loss": 15.2656, + "step": 472410 + }, + { + "epoch": 0.9543182892488192, + "grad_norm": 204.22378540039062, + "learning_rate": 9.239768746093226e-08, + "loss": 14.079, + "step": 472420 + }, + { + "epoch": 0.954338489881503, + "grad_norm": 450.76800537109375, + "learning_rate": 9.233090315340532e-08, + "loss": 17.212, + "step": 472430 + }, + { + "epoch": 0.9543586905141869, + "grad_norm": 127.94630432128906, + "learning_rate": 9.226414276526208e-08, + "loss": 19.5001, + "step": 472440 + }, + { + "epoch": 0.9543788911468707, + "grad_norm": 148.956787109375, + "learning_rate": 9.219740629682838e-08, + "loss": 17.2413, + "step": 472450 + }, + { + "epoch": 0.9543990917795545, + "grad_norm": 441.98602294921875, + "learning_rate": 9.213069374842953e-08, + "loss": 20.9059, + "step": 472460 + }, + { + "epoch": 0.9544192924122383, + "grad_norm": 0.0, + "learning_rate": 9.206400512039026e-08, + "loss": 15.4225, + "step": 472470 + }, + { + "epoch": 0.9544394930449221, + "grad_norm": 259.5444030761719, + "learning_rate": 9.199734041303532e-08, + "loss": 17.8902, + "step": 472480 + }, + { + "epoch": 0.954459693677606, + "grad_norm": 297.65185546875, + "learning_rate": 9.19306996266911e-08, + "loss": 14.0223, + "step": 472490 + }, + { + "epoch": 0.9544798943102898, + "grad_norm": 515.2940673828125, + "learning_rate": 9.186408276168012e-08, + "loss": 13.3307, + "step": 472500 + }, + { + "epoch": 0.9545000949429736, + "grad_norm": 328.2804260253906, + "learning_rate": 9.179748981832881e-08, + "loss": 18.1669, + "step": 472510 + }, + { + "epoch": 0.9545202955756574, + "grad_norm": 175.72622680664062, + "learning_rate": 9.173092079696188e-08, + "loss": 9.2535, + "step": 472520 + }, + { + "epoch": 0.9545404962083412, + "grad_norm": 200.9589080810547, + "learning_rate": 9.166437569790242e-08, + "loss": 29.9611, + "step": 472530 + }, + { + "epoch": 0.9545606968410251, + "grad_norm": 206.6644287109375, + "learning_rate": 9.159785452147574e-08, + "loss": 7.6536, + "step": 472540 + }, + { + "epoch": 0.9545808974737089, + "grad_norm": 35.999141693115234, + "learning_rate": 9.153135726800599e-08, + "loss": 13.8692, + "step": 472550 + }, + { + "epoch": 0.9546010981063927, + "grad_norm": 502.7469787597656, + "learning_rate": 9.146488393781683e-08, + "loss": 12.1722, + "step": 472560 + }, + { + "epoch": 0.9546212987390765, + "grad_norm": 374.749267578125, + "learning_rate": 9.139843453123243e-08, + "loss": 18.9526, + "step": 472570 + }, + { + "epoch": 0.9546414993717603, + "grad_norm": 475.12982177734375, + "learning_rate": 9.133200904857642e-08, + "loss": 16.7067, + "step": 472580 + }, + { + "epoch": 0.9546617000044442, + "grad_norm": 101.17208099365234, + "learning_rate": 9.126560749017354e-08, + "loss": 20.7566, + "step": 472590 + }, + { + "epoch": 0.954681900637128, + "grad_norm": 206.51406860351562, + "learning_rate": 9.119922985634633e-08, + "loss": 13.3556, + "step": 472600 + }, + { + "epoch": 0.9547021012698118, + "grad_norm": 189.1208953857422, + "learning_rate": 9.113287614741895e-08, + "loss": 18.4321, + "step": 472610 + }, + { + "epoch": 0.9547223019024956, + "grad_norm": 337.6694641113281, + "learning_rate": 9.106654636371448e-08, + "loss": 17.0854, + "step": 472620 + }, + { + "epoch": 0.9547425025351794, + "grad_norm": 70.41694641113281, + "learning_rate": 9.1000240505556e-08, + "loss": 16.453, + "step": 472630 + }, + { + "epoch": 0.9547627031678633, + "grad_norm": 110.73184967041016, + "learning_rate": 9.093395857326714e-08, + "loss": 19.7817, + "step": 472640 + }, + { + "epoch": 0.954782903800547, + "grad_norm": 193.75955200195312, + "learning_rate": 9.086770056717099e-08, + "loss": 17.0077, + "step": 472650 + }, + { + "epoch": 0.9548031044332308, + "grad_norm": 93.84293365478516, + "learning_rate": 9.080146648759003e-08, + "loss": 35.6111, + "step": 472660 + }, + { + "epoch": 0.9548233050659146, + "grad_norm": 207.8497314453125, + "learning_rate": 9.073525633484737e-08, + "loss": 11.2637, + "step": 472670 + }, + { + "epoch": 0.9548435056985984, + "grad_norm": 258.3289794921875, + "learning_rate": 9.066907010926551e-08, + "loss": 19.0346, + "step": 472680 + }, + { + "epoch": 0.9548637063312823, + "grad_norm": 325.7560729980469, + "learning_rate": 9.060290781116698e-08, + "loss": 26.075, + "step": 472690 + }, + { + "epoch": 0.9548839069639661, + "grad_norm": 388.6772766113281, + "learning_rate": 9.053676944087542e-08, + "loss": 27.1975, + "step": 472700 + }, + { + "epoch": 0.9549041075966499, + "grad_norm": 433.8287658691406, + "learning_rate": 9.04706549987111e-08, + "loss": 19.3901, + "step": 472710 + }, + { + "epoch": 0.9549243082293337, + "grad_norm": 294.281494140625, + "learning_rate": 9.040456448499769e-08, + "loss": 17.0707, + "step": 472720 + }, + { + "epoch": 0.9549445088620175, + "grad_norm": 642.8132934570312, + "learning_rate": 9.03384979000571e-08, + "loss": 23.4874, + "step": 472730 + }, + { + "epoch": 0.9549647094947014, + "grad_norm": 125.74943542480469, + "learning_rate": 9.027245524421135e-08, + "loss": 15.4275, + "step": 472740 + }, + { + "epoch": 0.9549849101273852, + "grad_norm": 280.635986328125, + "learning_rate": 9.020643651778183e-08, + "loss": 25.7181, + "step": 472750 + }, + { + "epoch": 0.955005110760069, + "grad_norm": 138.8530731201172, + "learning_rate": 9.014044172109049e-08, + "loss": 11.7531, + "step": 472760 + }, + { + "epoch": 0.9550253113927528, + "grad_norm": 281.1873474121094, + "learning_rate": 9.007447085445987e-08, + "loss": 19.3867, + "step": 472770 + }, + { + "epoch": 0.9550455120254366, + "grad_norm": 130.60691833496094, + "learning_rate": 9.00085239182108e-08, + "loss": 12.7674, + "step": 472780 + }, + { + "epoch": 0.9550657126581205, + "grad_norm": 80.7839126586914, + "learning_rate": 8.99426009126636e-08, + "loss": 17.2423, + "step": 472790 + }, + { + "epoch": 0.9550859132908043, + "grad_norm": 228.19493103027344, + "learning_rate": 8.987670183814134e-08, + "loss": 17.0725, + "step": 472800 + }, + { + "epoch": 0.9551061139234881, + "grad_norm": 2.6318907737731934, + "learning_rate": 8.981082669496433e-08, + "loss": 22.8002, + "step": 472810 + }, + { + "epoch": 0.9551263145561719, + "grad_norm": 355.2213439941406, + "learning_rate": 8.974497548345396e-08, + "loss": 16.1209, + "step": 472820 + }, + { + "epoch": 0.9551465151888557, + "grad_norm": 349.511962890625, + "learning_rate": 8.967914820393108e-08, + "loss": 16.256, + "step": 472830 + }, + { + "epoch": 0.9551667158215396, + "grad_norm": 165.65904235839844, + "learning_rate": 8.961334485671657e-08, + "loss": 9.6964, + "step": 472840 + }, + { + "epoch": 0.9551869164542234, + "grad_norm": 196.5765838623047, + "learning_rate": 8.954756544213128e-08, + "loss": 6.1107, + "step": 472850 + }, + { + "epoch": 0.9552071170869072, + "grad_norm": 369.4851989746094, + "learning_rate": 8.948180996049493e-08, + "loss": 26.2368, + "step": 472860 + }, + { + "epoch": 0.955227317719591, + "grad_norm": 179.2471466064453, + "learning_rate": 8.941607841212841e-08, + "loss": 11.5676, + "step": 472870 + }, + { + "epoch": 0.9552475183522748, + "grad_norm": 244.04078674316406, + "learning_rate": 8.93503707973531e-08, + "loss": 13.2306, + "step": 472880 + }, + { + "epoch": 0.9552677189849587, + "grad_norm": 32.12590408325195, + "learning_rate": 8.928468711648875e-08, + "loss": 26.8184, + "step": 472890 + }, + { + "epoch": 0.9552879196176425, + "grad_norm": 181.82196044921875, + "learning_rate": 8.921902736985399e-08, + "loss": 13.9331, + "step": 472900 + }, + { + "epoch": 0.9553081202503262, + "grad_norm": 211.99375915527344, + "learning_rate": 8.915339155777136e-08, + "loss": 25.6744, + "step": 472910 + }, + { + "epoch": 0.95532832088301, + "grad_norm": 597.634765625, + "learning_rate": 8.908777968055893e-08, + "loss": 23.3503, + "step": 472920 + }, + { + "epoch": 0.9553485215156938, + "grad_norm": 178.07797241210938, + "learning_rate": 8.902219173853699e-08, + "loss": 13.8494, + "step": 472930 + }, + { + "epoch": 0.9553687221483776, + "grad_norm": 567.0872192382812, + "learning_rate": 8.895662773202529e-08, + "loss": 18.0347, + "step": 472940 + }, + { + "epoch": 0.9553889227810615, + "grad_norm": 343.8282775878906, + "learning_rate": 8.889108766134358e-08, + "loss": 24.0426, + "step": 472950 + }, + { + "epoch": 0.9554091234137453, + "grad_norm": 303.4576721191406, + "learning_rate": 8.882557152681104e-08, + "loss": 10.7665, + "step": 472960 + }, + { + "epoch": 0.9554293240464291, + "grad_norm": 76.28064727783203, + "learning_rate": 8.876007932874686e-08, + "loss": 8.4141, + "step": 472970 + }, + { + "epoch": 0.9554495246791129, + "grad_norm": 100.38688659667969, + "learning_rate": 8.869461106747024e-08, + "loss": 11.032, + "step": 472980 + }, + { + "epoch": 0.9554697253117967, + "grad_norm": 358.0327453613281, + "learning_rate": 8.862916674330091e-08, + "loss": 22.9662, + "step": 472990 + }, + { + "epoch": 0.9554899259444806, + "grad_norm": 359.1787414550781, + "learning_rate": 8.856374635655696e-08, + "loss": 13.3273, + "step": 473000 + }, + { + "epoch": 0.9555101265771644, + "grad_norm": 296.2112731933594, + "learning_rate": 8.849834990755757e-08, + "loss": 16.9119, + "step": 473010 + }, + { + "epoch": 0.9555303272098482, + "grad_norm": 236.23472595214844, + "learning_rate": 8.843297739662138e-08, + "loss": 16.3383, + "step": 473020 + }, + { + "epoch": 0.955550527842532, + "grad_norm": 66.28463745117188, + "learning_rate": 8.836762882406757e-08, + "loss": 11.8923, + "step": 473030 + }, + { + "epoch": 0.9555707284752158, + "grad_norm": 162.89300537109375, + "learning_rate": 8.830230419021424e-08, + "loss": 27.8157, + "step": 473040 + }, + { + "epoch": 0.9555909291078997, + "grad_norm": 209.03518676757812, + "learning_rate": 8.823700349537945e-08, + "loss": 18.1876, + "step": 473050 + }, + { + "epoch": 0.9556111297405835, + "grad_norm": 419.4091491699219, + "learning_rate": 8.817172673988184e-08, + "loss": 46.1519, + "step": 473060 + }, + { + "epoch": 0.9556313303732673, + "grad_norm": 129.33718872070312, + "learning_rate": 8.810647392404004e-08, + "loss": 15.8276, + "step": 473070 + }, + { + "epoch": 0.9556515310059511, + "grad_norm": 261.8002624511719, + "learning_rate": 8.804124504817046e-08, + "loss": 16.3749, + "step": 473080 + }, + { + "epoch": 0.955671731638635, + "grad_norm": 462.8828125, + "learning_rate": 8.797604011259287e-08, + "loss": 20.0273, + "step": 473090 + }, + { + "epoch": 0.9556919322713188, + "grad_norm": 211.12620544433594, + "learning_rate": 8.791085911762476e-08, + "loss": 10.8481, + "step": 473100 + }, + { + "epoch": 0.9557121329040026, + "grad_norm": 536.9437255859375, + "learning_rate": 8.784570206358201e-08, + "loss": 24.6362, + "step": 473110 + }, + { + "epoch": 0.9557323335366864, + "grad_norm": 210.85113525390625, + "learning_rate": 8.778056895078435e-08, + "loss": 25.9357, + "step": 473120 + }, + { + "epoch": 0.9557525341693702, + "grad_norm": 598.8668212890625, + "learning_rate": 8.77154597795482e-08, + "loss": 30.8427, + "step": 473130 + }, + { + "epoch": 0.955772734802054, + "grad_norm": 79.7105484008789, + "learning_rate": 8.765037455019165e-08, + "loss": 16.2035, + "step": 473140 + }, + { + "epoch": 0.9557929354347379, + "grad_norm": 374.01458740234375, + "learning_rate": 8.758531326303054e-08, + "loss": 12.7645, + "step": 473150 + }, + { + "epoch": 0.9558131360674217, + "grad_norm": 166.92881774902344, + "learning_rate": 8.752027591838352e-08, + "loss": 13.3774, + "step": 473160 + }, + { + "epoch": 0.9558333367001054, + "grad_norm": 317.66131591796875, + "learning_rate": 8.7455262516567e-08, + "loss": 10.217, + "step": 473170 + }, + { + "epoch": 0.9558535373327892, + "grad_norm": 161.99180603027344, + "learning_rate": 8.739027305789682e-08, + "loss": 9.1677, + "step": 473180 + }, + { + "epoch": 0.955873737965473, + "grad_norm": 327.8397216796875, + "learning_rate": 8.732530754269108e-08, + "loss": 15.8845, + "step": 473190 + }, + { + "epoch": 0.9558939385981569, + "grad_norm": 79.6656494140625, + "learning_rate": 8.726036597126619e-08, + "loss": 22.2873, + "step": 473200 + }, + { + "epoch": 0.9559141392308407, + "grad_norm": 133.7283935546875, + "learning_rate": 8.719544834393855e-08, + "loss": 10.4272, + "step": 473210 + }, + { + "epoch": 0.9559343398635245, + "grad_norm": 6.810715675354004, + "learning_rate": 8.713055466102349e-08, + "loss": 11.7141, + "step": 473220 + }, + { + "epoch": 0.9559545404962083, + "grad_norm": 94.8315658569336, + "learning_rate": 8.706568492283907e-08, + "loss": 17.678, + "step": 473230 + }, + { + "epoch": 0.9559747411288921, + "grad_norm": 80.83332824707031, + "learning_rate": 8.700083912970058e-08, + "loss": 13.8116, + "step": 473240 + }, + { + "epoch": 0.955994941761576, + "grad_norm": 328.4601135253906, + "learning_rate": 8.693601728192392e-08, + "loss": 20.038, + "step": 473250 + }, + { + "epoch": 0.9560151423942598, + "grad_norm": 396.4517517089844, + "learning_rate": 8.687121937982545e-08, + "loss": 22.6981, + "step": 473260 + }, + { + "epoch": 0.9560353430269436, + "grad_norm": 177.8253631591797, + "learning_rate": 8.680644542372052e-08, + "loss": 15.6881, + "step": 473270 + }, + { + "epoch": 0.9560555436596274, + "grad_norm": 219.24429321289062, + "learning_rate": 8.674169541392552e-08, + "loss": 13.0421, + "step": 473280 + }, + { + "epoch": 0.9560757442923112, + "grad_norm": 308.054931640625, + "learning_rate": 8.66769693507552e-08, + "loss": 13.0121, + "step": 473290 + }, + { + "epoch": 0.9560959449249951, + "grad_norm": 461.49658203125, + "learning_rate": 8.661226723452542e-08, + "loss": 30.7874, + "step": 473300 + }, + { + "epoch": 0.9561161455576789, + "grad_norm": 271.1308288574219, + "learning_rate": 8.65475890655515e-08, + "loss": 26.9139, + "step": 473310 + }, + { + "epoch": 0.9561363461903627, + "grad_norm": 109.32914733886719, + "learning_rate": 8.648293484414871e-08, + "loss": 14.0129, + "step": 473320 + }, + { + "epoch": 0.9561565468230465, + "grad_norm": 280.9069519042969, + "learning_rate": 8.641830457063239e-08, + "loss": 15.7103, + "step": 473330 + }, + { + "epoch": 0.9561767474557303, + "grad_norm": 195.9000244140625, + "learning_rate": 8.63536982453167e-08, + "loss": 18.6843, + "step": 473340 + }, + { + "epoch": 0.9561969480884142, + "grad_norm": 193.5458221435547, + "learning_rate": 8.628911586851752e-08, + "loss": 19.0862, + "step": 473350 + }, + { + "epoch": 0.956217148721098, + "grad_norm": 267.5543518066406, + "learning_rate": 8.622455744054958e-08, + "loss": 15.2687, + "step": 473360 + }, + { + "epoch": 0.9562373493537818, + "grad_norm": 443.3758544921875, + "learning_rate": 8.616002296172654e-08, + "loss": 21.8186, + "step": 473370 + }, + { + "epoch": 0.9562575499864656, + "grad_norm": 283.0821838378906, + "learning_rate": 8.609551243236424e-08, + "loss": 14.6029, + "step": 473380 + }, + { + "epoch": 0.9562777506191494, + "grad_norm": 376.0336608886719, + "learning_rate": 8.603102585277634e-08, + "loss": 22.0365, + "step": 473390 + }, + { + "epoch": 0.9562979512518333, + "grad_norm": 201.6820526123047, + "learning_rate": 8.596656322327645e-08, + "loss": 19.2655, + "step": 473400 + }, + { + "epoch": 0.9563181518845171, + "grad_norm": 170.3756866455078, + "learning_rate": 8.59021245441799e-08, + "loss": 20.9075, + "step": 473410 + }, + { + "epoch": 0.9563383525172008, + "grad_norm": 113.07420349121094, + "learning_rate": 8.583770981580142e-08, + "loss": 14.0275, + "step": 473420 + }, + { + "epoch": 0.9563585531498846, + "grad_norm": 113.4518051147461, + "learning_rate": 8.577331903845243e-08, + "loss": 15.9164, + "step": 473430 + }, + { + "epoch": 0.9563787537825684, + "grad_norm": 144.70835876464844, + "learning_rate": 8.57089522124488e-08, + "loss": 19.1881, + "step": 473440 + }, + { + "epoch": 0.9563989544152522, + "grad_norm": 488.23638916015625, + "learning_rate": 8.564460933810414e-08, + "loss": 15.9569, + "step": 473450 + }, + { + "epoch": 0.9564191550479361, + "grad_norm": 203.3751678466797, + "learning_rate": 8.558029041573157e-08, + "loss": 17.6322, + "step": 473460 + }, + { + "epoch": 0.9564393556806199, + "grad_norm": 232.36102294921875, + "learning_rate": 8.55159954456436e-08, + "loss": 14.4901, + "step": 473470 + }, + { + "epoch": 0.9564595563133037, + "grad_norm": 64.39278411865234, + "learning_rate": 8.545172442815552e-08, + "loss": 10.8955, + "step": 473480 + }, + { + "epoch": 0.9564797569459875, + "grad_norm": 495.407470703125, + "learning_rate": 8.538747736357933e-08, + "loss": 25.2176, + "step": 473490 + }, + { + "epoch": 0.9564999575786713, + "grad_norm": 47.280120849609375, + "learning_rate": 8.53232542522292e-08, + "loss": 22.3482, + "step": 473500 + }, + { + "epoch": 0.9565201582113552, + "grad_norm": 107.16753387451172, + "learning_rate": 8.525905509441656e-08, + "loss": 18.7533, + "step": 473510 + }, + { + "epoch": 0.956540358844039, + "grad_norm": 58.44377517700195, + "learning_rate": 8.51948798904556e-08, + "loss": 30.5177, + "step": 473520 + }, + { + "epoch": 0.9565605594767228, + "grad_norm": 438.2406921386719, + "learning_rate": 8.513072864065885e-08, + "loss": 20.7406, + "step": 473530 + }, + { + "epoch": 0.9565807601094066, + "grad_norm": 313.06787109375, + "learning_rate": 8.506660134533828e-08, + "loss": 16.4932, + "step": 473540 + }, + { + "epoch": 0.9566009607420904, + "grad_norm": 299.9169921875, + "learning_rate": 8.500249800480754e-08, + "loss": 11.3267, + "step": 473550 + }, + { + "epoch": 0.9566211613747743, + "grad_norm": 603.1043090820312, + "learning_rate": 8.493841861937802e-08, + "loss": 37.6081, + "step": 473560 + }, + { + "epoch": 0.9566413620074581, + "grad_norm": 216.57540893554688, + "learning_rate": 8.487436318936282e-08, + "loss": 19.5097, + "step": 473570 + }, + { + "epoch": 0.9566615626401419, + "grad_norm": 372.18536376953125, + "learning_rate": 8.481033171507391e-08, + "loss": 21.0429, + "step": 473580 + }, + { + "epoch": 0.9566817632728257, + "grad_norm": 336.1340637207031, + "learning_rate": 8.474632419682327e-08, + "loss": 10.3598, + "step": 473590 + }, + { + "epoch": 0.9567019639055095, + "grad_norm": 165.48570251464844, + "learning_rate": 8.468234063492287e-08, + "loss": 13.1302, + "step": 473600 + }, + { + "epoch": 0.9567221645381934, + "grad_norm": 298.9831848144531, + "learning_rate": 8.461838102968467e-08, + "loss": 26.003, + "step": 473610 + }, + { + "epoch": 0.9567423651708772, + "grad_norm": 261.3507385253906, + "learning_rate": 8.45544453814201e-08, + "loss": 13.8456, + "step": 473620 + }, + { + "epoch": 0.956762565803561, + "grad_norm": 297.2005310058594, + "learning_rate": 8.449053369044058e-08, + "loss": 40.8161, + "step": 473630 + }, + { + "epoch": 0.9567827664362448, + "grad_norm": 387.9478759765625, + "learning_rate": 8.442664595705862e-08, + "loss": 18.4141, + "step": 473640 + }, + { + "epoch": 0.9568029670689286, + "grad_norm": 212.37924194335938, + "learning_rate": 8.436278218158511e-08, + "loss": 14.217, + "step": 473650 + }, + { + "epoch": 0.9568231677016125, + "grad_norm": 356.33892822265625, + "learning_rate": 8.429894236433089e-08, + "loss": 26.0145, + "step": 473660 + }, + { + "epoch": 0.9568433683342963, + "grad_norm": 199.64849853515625, + "learning_rate": 8.423512650560795e-08, + "loss": 12.3553, + "step": 473670 + }, + { + "epoch": 0.95686356896698, + "grad_norm": 55.902217864990234, + "learning_rate": 8.417133460572658e-08, + "loss": 13.6445, + "step": 473680 + }, + { + "epoch": 0.9568837695996638, + "grad_norm": 109.74313354492188, + "learning_rate": 8.410756666499709e-08, + "loss": 10.69, + "step": 473690 + }, + { + "epoch": 0.9569039702323476, + "grad_norm": 291.5455627441406, + "learning_rate": 8.404382268373145e-08, + "loss": 28.3833, + "step": 473700 + }, + { + "epoch": 0.9569241708650315, + "grad_norm": 0.0, + "learning_rate": 8.39801026622411e-08, + "loss": 9.3024, + "step": 473710 + }, + { + "epoch": 0.9569443714977153, + "grad_norm": 331.5452880859375, + "learning_rate": 8.391640660083411e-08, + "loss": 17.7631, + "step": 473720 + }, + { + "epoch": 0.9569645721303991, + "grad_norm": 288.2303161621094, + "learning_rate": 8.3852734499823e-08, + "loss": 25.573, + "step": 473730 + }, + { + "epoch": 0.9569847727630829, + "grad_norm": 136.45462036132812, + "learning_rate": 8.3789086359517e-08, + "loss": 12.9635, + "step": 473740 + }, + { + "epoch": 0.9570049733957667, + "grad_norm": 228.79193115234375, + "learning_rate": 8.372546218022747e-08, + "loss": 14.8749, + "step": 473750 + }, + { + "epoch": 0.9570251740284506, + "grad_norm": 702.3757934570312, + "learning_rate": 8.366186196226311e-08, + "loss": 10.4563, + "step": 473760 + }, + { + "epoch": 0.9570453746611344, + "grad_norm": 490.6363830566406, + "learning_rate": 8.35982857059342e-08, + "loss": 24.4584, + "step": 473770 + }, + { + "epoch": 0.9570655752938182, + "grad_norm": 299.2497253417969, + "learning_rate": 8.353473341155216e-08, + "loss": 17.0274, + "step": 473780 + }, + { + "epoch": 0.957085775926502, + "grad_norm": 409.9384460449219, + "learning_rate": 8.347120507942453e-08, + "loss": 23.91, + "step": 473790 + }, + { + "epoch": 0.9571059765591858, + "grad_norm": 227.42718505859375, + "learning_rate": 8.340770070986215e-08, + "loss": 12.4463, + "step": 473800 + }, + { + "epoch": 0.9571261771918697, + "grad_norm": 338.9191589355469, + "learning_rate": 8.334422030317424e-08, + "loss": 16.1405, + "step": 473810 + }, + { + "epoch": 0.9571463778245535, + "grad_norm": 196.53457641601562, + "learning_rate": 8.328076385967055e-08, + "loss": 14.9167, + "step": 473820 + }, + { + "epoch": 0.9571665784572373, + "grad_norm": 96.96067810058594, + "learning_rate": 8.321733137966026e-08, + "loss": 14.137, + "step": 473830 + }, + { + "epoch": 0.9571867790899211, + "grad_norm": 165.6729278564453, + "learning_rate": 8.315392286345203e-08, + "loss": 13.696, + "step": 473840 + }, + { + "epoch": 0.9572069797226049, + "grad_norm": 152.39344787597656, + "learning_rate": 8.30905383113556e-08, + "loss": 10.6705, + "step": 473850 + }, + { + "epoch": 0.9572271803552888, + "grad_norm": 103.52873992919922, + "learning_rate": 8.302717772367908e-08, + "loss": 14.0565, + "step": 473860 + }, + { + "epoch": 0.9572473809879726, + "grad_norm": 624.4776611328125, + "learning_rate": 8.296384110073164e-08, + "loss": 20.4844, + "step": 473870 + }, + { + "epoch": 0.9572675816206564, + "grad_norm": 55.818180084228516, + "learning_rate": 8.290052844282248e-08, + "loss": 23.9011, + "step": 473880 + }, + { + "epoch": 0.9572877822533402, + "grad_norm": 334.4815979003906, + "learning_rate": 8.283723975025971e-08, + "loss": 10.7081, + "step": 473890 + }, + { + "epoch": 0.957307982886024, + "grad_norm": 338.7004699707031, + "learning_rate": 8.277397502335194e-08, + "loss": 13.664, + "step": 473900 + }, + { + "epoch": 0.9573281835187079, + "grad_norm": 306.1288757324219, + "learning_rate": 8.271073426240672e-08, + "loss": 15.5262, + "step": 473910 + }, + { + "epoch": 0.9573483841513917, + "grad_norm": 404.05316162109375, + "learning_rate": 8.264751746773381e-08, + "loss": 19.4341, + "step": 473920 + }, + { + "epoch": 0.9573685847840754, + "grad_norm": 138.5911865234375, + "learning_rate": 8.258432463964016e-08, + "loss": 9.5202, + "step": 473930 + }, + { + "epoch": 0.9573887854167592, + "grad_norm": 86.55339813232422, + "learning_rate": 8.252115577843444e-08, + "loss": 17.1271, + "step": 473940 + }, + { + "epoch": 0.957408986049443, + "grad_norm": 184.81588745117188, + "learning_rate": 8.245801088442362e-08, + "loss": 19.6974, + "step": 473950 + }, + { + "epoch": 0.9574291866821268, + "grad_norm": 114.68429565429688, + "learning_rate": 8.239488995791633e-08, + "loss": 12.5398, + "step": 473960 + }, + { + "epoch": 0.9574493873148107, + "grad_norm": 92.18013000488281, + "learning_rate": 8.233179299922012e-08, + "loss": 9.8894, + "step": 473970 + }, + { + "epoch": 0.9574695879474945, + "grad_norm": 447.7590637207031, + "learning_rate": 8.226872000864194e-08, + "loss": 18.6785, + "step": 473980 + }, + { + "epoch": 0.9574897885801783, + "grad_norm": 243.25074768066406, + "learning_rate": 8.22056709864899e-08, + "loss": 28.9401, + "step": 473990 + }, + { + "epoch": 0.9575099892128621, + "grad_norm": 63.45094680786133, + "learning_rate": 8.214264593307097e-08, + "loss": 14.1391, + "step": 474000 + }, + { + "epoch": 0.9575301898455459, + "grad_norm": 286.0859069824219, + "learning_rate": 8.207964484869158e-08, + "loss": 22.7937, + "step": 474010 + }, + { + "epoch": 0.9575503904782298, + "grad_norm": 251.81008911132812, + "learning_rate": 8.201666773365979e-08, + "loss": 12.8405, + "step": 474020 + }, + { + "epoch": 0.9575705911109136, + "grad_norm": 154.5972442626953, + "learning_rate": 8.195371458828316e-08, + "loss": 9.8992, + "step": 474030 + }, + { + "epoch": 0.9575907917435974, + "grad_norm": 1024.4110107421875, + "learning_rate": 8.1890785412867e-08, + "loss": 36.8507, + "step": 474040 + }, + { + "epoch": 0.9576109923762812, + "grad_norm": 520.576416015625, + "learning_rate": 8.182788020771826e-08, + "loss": 14.4984, + "step": 474050 + }, + { + "epoch": 0.957631193008965, + "grad_norm": 365.233642578125, + "learning_rate": 8.176499897314505e-08, + "loss": 16.9359, + "step": 474060 + }, + { + "epoch": 0.9576513936416489, + "grad_norm": 237.56802368164062, + "learning_rate": 8.170214170945212e-08, + "loss": 18.8759, + "step": 474070 + }, + { + "epoch": 0.9576715942743327, + "grad_norm": 0.0, + "learning_rate": 8.163930841694589e-08, + "loss": 7.2028, + "step": 474080 + }, + { + "epoch": 0.9576917949070165, + "grad_norm": 567.1417236328125, + "learning_rate": 8.157649909593335e-08, + "loss": 21.3314, + "step": 474090 + }, + { + "epoch": 0.9577119955397003, + "grad_norm": 430.8260192871094, + "learning_rate": 8.151371374672146e-08, + "loss": 25.491, + "step": 474100 + }, + { + "epoch": 0.9577321961723841, + "grad_norm": 342.25030517578125, + "learning_rate": 8.145095236961387e-08, + "loss": 17.3369, + "step": 474110 + }, + { + "epoch": 0.957752396805068, + "grad_norm": 325.0548095703125, + "learning_rate": 8.13882149649181e-08, + "loss": 17.8569, + "step": 474120 + }, + { + "epoch": 0.9577725974377518, + "grad_norm": 251.9890594482422, + "learning_rate": 8.132550153294005e-08, + "loss": 15.6318, + "step": 474130 + }, + { + "epoch": 0.9577927980704356, + "grad_norm": 468.25421142578125, + "learning_rate": 8.1262812073985e-08, + "loss": 21.4562, + "step": 474140 + }, + { + "epoch": 0.9578129987031194, + "grad_norm": 70.49613952636719, + "learning_rate": 8.120014658835828e-08, + "loss": 19.4513, + "step": 474150 + }, + { + "epoch": 0.9578331993358032, + "grad_norm": 129.25839233398438, + "learning_rate": 8.11375050763652e-08, + "loss": 9.8043, + "step": 474160 + }, + { + "epoch": 0.9578533999684871, + "grad_norm": 212.5464630126953, + "learning_rate": 8.107488753831161e-08, + "loss": 8.6988, + "step": 474170 + }, + { + "epoch": 0.9578736006011709, + "grad_norm": 196.2848358154297, + "learning_rate": 8.101229397450228e-08, + "loss": 23.1565, + "step": 474180 + }, + { + "epoch": 0.9578938012338546, + "grad_norm": 357.7814636230469, + "learning_rate": 8.094972438524251e-08, + "loss": 23.4829, + "step": 474190 + }, + { + "epoch": 0.9579140018665384, + "grad_norm": 38.56194305419922, + "learning_rate": 8.088717877083706e-08, + "loss": 9.1624, + "step": 474200 + }, + { + "epoch": 0.9579342024992222, + "grad_norm": 337.6510314941406, + "learning_rate": 8.082465713159126e-08, + "loss": 14.772, + "step": 474210 + }, + { + "epoch": 0.957954403131906, + "grad_norm": 395.1022644042969, + "learning_rate": 8.076215946780874e-08, + "loss": 16.72, + "step": 474220 + }, + { + "epoch": 0.9579746037645899, + "grad_norm": 240.04920959472656, + "learning_rate": 8.069968577979536e-08, + "loss": 19.9663, + "step": 474230 + }, + { + "epoch": 0.9579948043972737, + "grad_norm": 191.10911560058594, + "learning_rate": 8.063723606785478e-08, + "loss": 12.3374, + "step": 474240 + }, + { + "epoch": 0.9580150050299575, + "grad_norm": 229.42955017089844, + "learning_rate": 8.057481033229176e-08, + "loss": 12.8458, + "step": 474250 + }, + { + "epoch": 0.9580352056626413, + "grad_norm": 476.42681884765625, + "learning_rate": 8.051240857341102e-08, + "loss": 21.7681, + "step": 474260 + }, + { + "epoch": 0.9580554062953252, + "grad_norm": 274.9783020019531, + "learning_rate": 8.045003079151514e-08, + "loss": 14.8177, + "step": 474270 + }, + { + "epoch": 0.958075606928009, + "grad_norm": 245.0540008544922, + "learning_rate": 8.038767698690996e-08, + "loss": 21.4797, + "step": 474280 + }, + { + "epoch": 0.9580958075606928, + "grad_norm": 32.84925079345703, + "learning_rate": 8.032534715989859e-08, + "loss": 15.4773, + "step": 474290 + }, + { + "epoch": 0.9581160081933766, + "grad_norm": 3.756037473678589, + "learning_rate": 8.02630413107841e-08, + "loss": 28.1191, + "step": 474300 + }, + { + "epoch": 0.9581362088260604, + "grad_norm": 201.16249084472656, + "learning_rate": 8.020075943987071e-08, + "loss": 27.9424, + "step": 474310 + }, + { + "epoch": 0.9581564094587443, + "grad_norm": 316.2496337890625, + "learning_rate": 8.013850154746317e-08, + "loss": 23.3295, + "step": 474320 + }, + { + "epoch": 0.9581766100914281, + "grad_norm": 191.16314697265625, + "learning_rate": 8.007626763386345e-08, + "loss": 10.7816, + "step": 474330 + }, + { + "epoch": 0.9581968107241119, + "grad_norm": 86.80632019042969, + "learning_rate": 8.001405769937464e-08, + "loss": 43.165, + "step": 474340 + }, + { + "epoch": 0.9582170113567957, + "grad_norm": 150.78231811523438, + "learning_rate": 7.995187174430152e-08, + "loss": 8.0702, + "step": 474350 + }, + { + "epoch": 0.9582372119894795, + "grad_norm": 297.2938537597656, + "learning_rate": 7.988970976894605e-08, + "loss": 15.6809, + "step": 474360 + }, + { + "epoch": 0.9582574126221634, + "grad_norm": 327.9884948730469, + "learning_rate": 7.982757177361078e-08, + "loss": 26.3823, + "step": 474370 + }, + { + "epoch": 0.9582776132548472, + "grad_norm": 111.11241149902344, + "learning_rate": 7.976545775859934e-08, + "loss": 16.9001, + "step": 474380 + }, + { + "epoch": 0.958297813887531, + "grad_norm": 230.60694885253906, + "learning_rate": 7.970336772421483e-08, + "loss": 10.2513, + "step": 474390 + }, + { + "epoch": 0.9583180145202148, + "grad_norm": 46.74585723876953, + "learning_rate": 7.964130167075923e-08, + "loss": 26.2545, + "step": 474400 + }, + { + "epoch": 0.9583382151528986, + "grad_norm": 31.040271759033203, + "learning_rate": 7.957925959853452e-08, + "loss": 17.7189, + "step": 474410 + }, + { + "epoch": 0.9583584157855825, + "grad_norm": 274.662353515625, + "learning_rate": 7.951724150784434e-08, + "loss": 14.8723, + "step": 474420 + }, + { + "epoch": 0.9583786164182663, + "grad_norm": 784.5784301757812, + "learning_rate": 7.945524739899069e-08, + "loss": 16.8366, + "step": 474430 + }, + { + "epoch": 0.95839881705095, + "grad_norm": 176.93814086914062, + "learning_rate": 7.939327727227441e-08, + "loss": 13.5646, + "step": 474440 + }, + { + "epoch": 0.9584190176836338, + "grad_norm": 500.37481689453125, + "learning_rate": 7.933133112799918e-08, + "loss": 6.064, + "step": 474450 + }, + { + "epoch": 0.9584392183163176, + "grad_norm": 275.79901123046875, + "learning_rate": 7.926940896646584e-08, + "loss": 24.7872, + "step": 474460 + }, + { + "epoch": 0.9584594189490014, + "grad_norm": 1403.5462646484375, + "learning_rate": 7.920751078797695e-08, + "loss": 26.6344, + "step": 474470 + }, + { + "epoch": 0.9584796195816853, + "grad_norm": 24.37729263305664, + "learning_rate": 7.914563659283392e-08, + "loss": 6.7677, + "step": 474480 + }, + { + "epoch": 0.9584998202143691, + "grad_norm": 424.0550231933594, + "learning_rate": 7.908378638133762e-08, + "loss": 13.5092, + "step": 474490 + }, + { + "epoch": 0.9585200208470529, + "grad_norm": 538.5086059570312, + "learning_rate": 7.90219601537906e-08, + "loss": 28.8774, + "step": 474500 + }, + { + "epoch": 0.9585402214797367, + "grad_norm": 89.05743408203125, + "learning_rate": 7.896015791049372e-08, + "loss": 21.9172, + "step": 474510 + }, + { + "epoch": 0.9585604221124205, + "grad_norm": 416.69683837890625, + "learning_rate": 7.889837965174784e-08, + "loss": 11.4691, + "step": 474520 + }, + { + "epoch": 0.9585806227451044, + "grad_norm": 116.03609466552734, + "learning_rate": 7.883662537785442e-08, + "loss": 21.3867, + "step": 474530 + }, + { + "epoch": 0.9586008233777882, + "grad_norm": 52.5672721862793, + "learning_rate": 7.877489508911429e-08, + "loss": 28.9836, + "step": 474540 + }, + { + "epoch": 0.958621024010472, + "grad_norm": 275.4264831542969, + "learning_rate": 7.871318878582889e-08, + "loss": 16.1185, + "step": 474550 + }, + { + "epoch": 0.9586412246431558, + "grad_norm": 49.7515754699707, + "learning_rate": 7.865150646829855e-08, + "loss": 7.3196, + "step": 474560 + }, + { + "epoch": 0.9586614252758396, + "grad_norm": 361.1689147949219, + "learning_rate": 7.858984813682357e-08, + "loss": 14.6561, + "step": 474570 + }, + { + "epoch": 0.9586816259085235, + "grad_norm": 375.77764892578125, + "learning_rate": 7.852821379170538e-08, + "loss": 28.1445, + "step": 474580 + }, + { + "epoch": 0.9587018265412073, + "grad_norm": 277.7471923828125, + "learning_rate": 7.846660343324263e-08, + "loss": 17.1649, + "step": 474590 + }, + { + "epoch": 0.9587220271738911, + "grad_norm": 536.0797729492188, + "learning_rate": 7.840501706173786e-08, + "loss": 17.6065, + "step": 474600 + }, + { + "epoch": 0.9587422278065749, + "grad_norm": 269.5274353027344, + "learning_rate": 7.834345467748972e-08, + "loss": 18.4193, + "step": 474610 + }, + { + "epoch": 0.9587624284392587, + "grad_norm": 243.64309692382812, + "learning_rate": 7.828191628079851e-08, + "loss": 18.9776, + "step": 474620 + }, + { + "epoch": 0.9587826290719426, + "grad_norm": 81.72669982910156, + "learning_rate": 7.8220401871964e-08, + "loss": 27.3236, + "step": 474630 + }, + { + "epoch": 0.9588028297046264, + "grad_norm": 7.575145244598389, + "learning_rate": 7.815891145128763e-08, + "loss": 13.8749, + "step": 474640 + }, + { + "epoch": 0.9588230303373102, + "grad_norm": 256.9676208496094, + "learning_rate": 7.809744501906635e-08, + "loss": 13.6655, + "step": 474650 + }, + { + "epoch": 0.958843230969994, + "grad_norm": 251.22300720214844, + "learning_rate": 7.803600257560162e-08, + "loss": 28.9949, + "step": 474660 + }, + { + "epoch": 0.9588634316026778, + "grad_norm": 429.87322998046875, + "learning_rate": 7.797458412119264e-08, + "loss": 14.0549, + "step": 474670 + }, + { + "epoch": 0.9588836322353617, + "grad_norm": 198.52183532714844, + "learning_rate": 7.791318965613859e-08, + "loss": 11.3697, + "step": 474680 + }, + { + "epoch": 0.9589038328680455, + "grad_norm": 487.17156982421875, + "learning_rate": 7.785181918073814e-08, + "loss": 12.1822, + "step": 474690 + }, + { + "epoch": 0.9589240335007292, + "grad_norm": 0.0, + "learning_rate": 7.779047269529105e-08, + "loss": 24.6676, + "step": 474700 + }, + { + "epoch": 0.958944234133413, + "grad_norm": 272.5414733886719, + "learning_rate": 7.772915020009708e-08, + "loss": 17.4134, + "step": 474710 + }, + { + "epoch": 0.9589644347660968, + "grad_norm": 132.16969299316406, + "learning_rate": 7.766785169545376e-08, + "loss": 23.6353, + "step": 474720 + }, + { + "epoch": 0.9589846353987806, + "grad_norm": 159.81565856933594, + "learning_rate": 7.760657718165976e-08, + "loss": 20.8954, + "step": 474730 + }, + { + "epoch": 0.9590048360314645, + "grad_norm": 252.47760009765625, + "learning_rate": 7.754532665901482e-08, + "loss": 28.9729, + "step": 474740 + }, + { + "epoch": 0.9590250366641483, + "grad_norm": 334.49664306640625, + "learning_rate": 7.748410012781705e-08, + "loss": 10.6586, + "step": 474750 + }, + { + "epoch": 0.9590452372968321, + "grad_norm": 94.72216033935547, + "learning_rate": 7.742289758836452e-08, + "loss": 13.7154, + "step": 474760 + }, + { + "epoch": 0.9590654379295159, + "grad_norm": 237.7094268798828, + "learning_rate": 7.736171904095591e-08, + "loss": 28.3998, + "step": 474770 + }, + { + "epoch": 0.9590856385621997, + "grad_norm": 125.54972076416016, + "learning_rate": 7.73005644858893e-08, + "loss": 21.3815, + "step": 474780 + }, + { + "epoch": 0.9591058391948836, + "grad_norm": 32.88019561767578, + "learning_rate": 7.723943392346223e-08, + "loss": 19.0009, + "step": 474790 + }, + { + "epoch": 0.9591260398275674, + "grad_norm": 14.027314186096191, + "learning_rate": 7.717832735397335e-08, + "loss": 6.6176, + "step": 474800 + }, + { + "epoch": 0.9591462404602512, + "grad_norm": 96.62458801269531, + "learning_rate": 7.71172447777202e-08, + "loss": 12.2616, + "step": 474810 + }, + { + "epoch": 0.959166441092935, + "grad_norm": 196.66140747070312, + "learning_rate": 7.705618619500032e-08, + "loss": 17.6131, + "step": 474820 + }, + { + "epoch": 0.9591866417256188, + "grad_norm": 20.23488998413086, + "learning_rate": 7.699515160611182e-08, + "loss": 15.0959, + "step": 474830 + }, + { + "epoch": 0.9592068423583027, + "grad_norm": 274.4444274902344, + "learning_rate": 7.693414101135166e-08, + "loss": 10.6709, + "step": 474840 + }, + { + "epoch": 0.9592270429909865, + "grad_norm": 168.56292724609375, + "learning_rate": 7.687315441101795e-08, + "loss": 10.3256, + "step": 474850 + }, + { + "epoch": 0.9592472436236703, + "grad_norm": 698.96826171875, + "learning_rate": 7.681219180540655e-08, + "loss": 19.0904, + "step": 474860 + }, + { + "epoch": 0.9592674442563541, + "grad_norm": 267.51019287109375, + "learning_rate": 7.675125319481614e-08, + "loss": 35.6655, + "step": 474870 + }, + { + "epoch": 0.959287644889038, + "grad_norm": 214.7691192626953, + "learning_rate": 7.669033857954255e-08, + "loss": 6.8796, + "step": 474880 + }, + { + "epoch": 0.9593078455217218, + "grad_norm": 271.8771057128906, + "learning_rate": 7.662944795988337e-08, + "loss": 14.7434, + "step": 474890 + }, + { + "epoch": 0.9593280461544056, + "grad_norm": 143.24952697753906, + "learning_rate": 7.656858133613498e-08, + "loss": 14.6898, + "step": 474900 + }, + { + "epoch": 0.9593482467870894, + "grad_norm": 404.09478759765625, + "learning_rate": 7.65077387085944e-08, + "loss": 18.0967, + "step": 474910 + }, + { + "epoch": 0.9593684474197732, + "grad_norm": 0.0, + "learning_rate": 7.64469200775575e-08, + "loss": 8.2077, + "step": 474920 + }, + { + "epoch": 0.959388648052457, + "grad_norm": 140.16885375976562, + "learning_rate": 7.638612544332181e-08, + "loss": 20.7628, + "step": 474930 + }, + { + "epoch": 0.9594088486851409, + "grad_norm": 200.65782165527344, + "learning_rate": 7.632535480618264e-08, + "loss": 10.9137, + "step": 474940 + }, + { + "epoch": 0.9594290493178247, + "grad_norm": 151.9487762451172, + "learning_rate": 7.626460816643588e-08, + "loss": 13.8894, + "step": 474950 + }, + { + "epoch": 0.9594492499505084, + "grad_norm": 333.2170715332031, + "learning_rate": 7.620388552437907e-08, + "loss": 19.9184, + "step": 474960 + }, + { + "epoch": 0.9594694505831922, + "grad_norm": 267.5528869628906, + "learning_rate": 7.614318688030753e-08, + "loss": 14.4488, + "step": 474970 + }, + { + "epoch": 0.959489651215876, + "grad_norm": 434.326171875, + "learning_rate": 7.608251223451601e-08, + "loss": 17.0198, + "step": 474980 + }, + { + "epoch": 0.9595098518485599, + "grad_norm": 65.68861389160156, + "learning_rate": 7.602186158730152e-08, + "loss": 22.1237, + "step": 474990 + }, + { + "epoch": 0.9595300524812437, + "grad_norm": 366.1726989746094, + "learning_rate": 7.59612349389599e-08, + "loss": 14.4474, + "step": 475000 + }, + { + "epoch": 0.9595502531139275, + "grad_norm": 195.4129180908203, + "learning_rate": 7.590063228978539e-08, + "loss": 19.7011, + "step": 475010 + }, + { + "epoch": 0.9595704537466113, + "grad_norm": 338.5332336425781, + "learning_rate": 7.584005364007386e-08, + "loss": 19.4208, + "step": 475020 + }, + { + "epoch": 0.9595906543792951, + "grad_norm": 820.9507446289062, + "learning_rate": 7.577949899012116e-08, + "loss": 19.7084, + "step": 475030 + }, + { + "epoch": 0.959610855011979, + "grad_norm": 199.35757446289062, + "learning_rate": 7.571896834022152e-08, + "loss": 12.0899, + "step": 475040 + }, + { + "epoch": 0.9596310556446628, + "grad_norm": 252.28346252441406, + "learning_rate": 7.565846169067026e-08, + "loss": 19.6838, + "step": 475050 + }, + { + "epoch": 0.9596512562773466, + "grad_norm": 113.28939056396484, + "learning_rate": 7.559797904176325e-08, + "loss": 11.5452, + "step": 475060 + }, + { + "epoch": 0.9596714569100304, + "grad_norm": 136.12945556640625, + "learning_rate": 7.553752039379359e-08, + "loss": 19.6234, + "step": 475070 + }, + { + "epoch": 0.9596916575427142, + "grad_norm": 152.21763610839844, + "learning_rate": 7.547708574705714e-08, + "loss": 7.7885, + "step": 475080 + }, + { + "epoch": 0.9597118581753981, + "grad_norm": 595.0191040039062, + "learning_rate": 7.541667510184813e-08, + "loss": 11.2779, + "step": 475090 + }, + { + "epoch": 0.9597320588080819, + "grad_norm": 248.594482421875, + "learning_rate": 7.535628845846077e-08, + "loss": 21.5807, + "step": 475100 + }, + { + "epoch": 0.9597522594407657, + "grad_norm": 220.2259063720703, + "learning_rate": 7.529592581718981e-08, + "loss": 20.4683, + "step": 475110 + }, + { + "epoch": 0.9597724600734495, + "grad_norm": 333.22723388671875, + "learning_rate": 7.52355871783289e-08, + "loss": 25.0653, + "step": 475120 + }, + { + "epoch": 0.9597926607061333, + "grad_norm": 406.2622985839844, + "learning_rate": 7.517527254217282e-08, + "loss": 14.6823, + "step": 475130 + }, + { + "epoch": 0.9598128613388172, + "grad_norm": 321.54608154296875, + "learning_rate": 7.511498190901467e-08, + "loss": 15.3557, + "step": 475140 + }, + { + "epoch": 0.959833061971501, + "grad_norm": 410.90118408203125, + "learning_rate": 7.50547152791492e-08, + "loss": 10.3963, + "step": 475150 + }, + { + "epoch": 0.9598532626041848, + "grad_norm": 106.2830810546875, + "learning_rate": 7.499447265286952e-08, + "loss": 13.0822, + "step": 475160 + }, + { + "epoch": 0.9598734632368686, + "grad_norm": 260.9781494140625, + "learning_rate": 7.493425403046928e-08, + "loss": 11.039, + "step": 475170 + }, + { + "epoch": 0.9598936638695524, + "grad_norm": 260.24176025390625, + "learning_rate": 7.487405941224268e-08, + "loss": 15.9699, + "step": 475180 + }, + { + "epoch": 0.9599138645022363, + "grad_norm": 290.000732421875, + "learning_rate": 7.481388879848228e-08, + "loss": 14.3762, + "step": 475190 + }, + { + "epoch": 0.9599340651349201, + "grad_norm": 86.14088439941406, + "learning_rate": 7.475374218948118e-08, + "loss": 17.105, + "step": 475200 + }, + { + "epoch": 0.9599542657676038, + "grad_norm": 144.34207153320312, + "learning_rate": 7.469361958553356e-08, + "loss": 6.9962, + "step": 475210 + }, + { + "epoch": 0.9599744664002876, + "grad_norm": 364.90679931640625, + "learning_rate": 7.463352098693199e-08, + "loss": 9.7677, + "step": 475220 + }, + { + "epoch": 0.9599946670329714, + "grad_norm": 401.1788024902344, + "learning_rate": 7.457344639396902e-08, + "loss": 16.2795, + "step": 475230 + }, + { + "epoch": 0.9600148676656552, + "grad_norm": 152.72698974609375, + "learning_rate": 7.451339580693718e-08, + "loss": 11.5242, + "step": 475240 + }, + { + "epoch": 0.9600350682983391, + "grad_norm": 209.91641235351562, + "learning_rate": 7.445336922613067e-08, + "loss": 20.3743, + "step": 475250 + }, + { + "epoch": 0.9600552689310229, + "grad_norm": 125.84286499023438, + "learning_rate": 7.439336665184039e-08, + "loss": 18.534, + "step": 475260 + }, + { + "epoch": 0.9600754695637067, + "grad_norm": 209.10047912597656, + "learning_rate": 7.433338808435941e-08, + "loss": 16.4652, + "step": 475270 + }, + { + "epoch": 0.9600956701963905, + "grad_norm": 274.1868896484375, + "learning_rate": 7.427343352398031e-08, + "loss": 22.5486, + "step": 475280 + }, + { + "epoch": 0.9601158708290743, + "grad_norm": 446.7681884765625, + "learning_rate": 7.421350297099505e-08, + "loss": 22.5897, + "step": 475290 + }, + { + "epoch": 0.9601360714617582, + "grad_norm": 383.9038391113281, + "learning_rate": 7.415359642569564e-08, + "loss": 40.8068, + "step": 475300 + }, + { + "epoch": 0.960156272094442, + "grad_norm": 149.6638946533203, + "learning_rate": 7.409371388837405e-08, + "loss": 14.5479, + "step": 475310 + }, + { + "epoch": 0.9601764727271258, + "grad_norm": 411.8772277832031, + "learning_rate": 7.403385535932284e-08, + "loss": 14.5164, + "step": 475320 + }, + { + "epoch": 0.9601966733598096, + "grad_norm": 178.7208251953125, + "learning_rate": 7.397402083883287e-08, + "loss": 5.3804, + "step": 475330 + }, + { + "epoch": 0.9602168739924934, + "grad_norm": 212.35629272460938, + "learning_rate": 7.39142103271956e-08, + "loss": 32.1982, + "step": 475340 + }, + { + "epoch": 0.9602370746251773, + "grad_norm": 46.84968948364258, + "learning_rate": 7.385442382470354e-08, + "loss": 13.0413, + "step": 475350 + }, + { + "epoch": 0.9602572752578611, + "grad_norm": 400.7052307128906, + "learning_rate": 7.379466133164759e-08, + "loss": 12.3183, + "step": 475360 + }, + { + "epoch": 0.9602774758905449, + "grad_norm": 53.30480194091797, + "learning_rate": 7.373492284831862e-08, + "loss": 12.3386, + "step": 475370 + }, + { + "epoch": 0.9602976765232287, + "grad_norm": 337.81402587890625, + "learning_rate": 7.367520837500808e-08, + "loss": 20.6608, + "step": 475380 + }, + { + "epoch": 0.9603178771559125, + "grad_norm": 200.33563232421875, + "learning_rate": 7.361551791200794e-08, + "loss": 13.0962, + "step": 475390 + }, + { + "epoch": 0.9603380777885964, + "grad_norm": 277.03057861328125, + "learning_rate": 7.355585145960743e-08, + "loss": 20.8616, + "step": 475400 + }, + { + "epoch": 0.9603582784212802, + "grad_norm": 368.3315124511719, + "learning_rate": 7.34962090180985e-08, + "loss": 13.296, + "step": 475410 + }, + { + "epoch": 0.960378479053964, + "grad_norm": 233.2793731689453, + "learning_rate": 7.343659058777098e-08, + "loss": 19.747, + "step": 475420 + }, + { + "epoch": 0.9603986796866478, + "grad_norm": 0.0, + "learning_rate": 7.33769961689168e-08, + "loss": 11.615, + "step": 475430 + }, + { + "epoch": 0.9604188803193316, + "grad_norm": 312.4758605957031, + "learning_rate": 7.331742576182466e-08, + "loss": 22.6936, + "step": 475440 + }, + { + "epoch": 0.9604390809520155, + "grad_norm": 43.45466995239258, + "learning_rate": 7.325787936678708e-08, + "loss": 19.882, + "step": 475450 + }, + { + "epoch": 0.9604592815846993, + "grad_norm": 128.6321563720703, + "learning_rate": 7.319835698409217e-08, + "loss": 16.9376, + "step": 475460 + }, + { + "epoch": 0.960479482217383, + "grad_norm": 405.3039855957031, + "learning_rate": 7.313885861403135e-08, + "loss": 21.7552, + "step": 475470 + }, + { + "epoch": 0.9604996828500668, + "grad_norm": 125.45901489257812, + "learning_rate": 7.307938425689388e-08, + "loss": 12.1929, + "step": 475480 + }, + { + "epoch": 0.9605198834827506, + "grad_norm": 514.5453491210938, + "learning_rate": 7.301993391297003e-08, + "loss": 20.2388, + "step": 475490 + }, + { + "epoch": 0.9605400841154345, + "grad_norm": 390.7613830566406, + "learning_rate": 7.296050758254958e-08, + "loss": 17.4189, + "step": 475500 + }, + { + "epoch": 0.9605602847481183, + "grad_norm": 220.3173065185547, + "learning_rate": 7.290110526592231e-08, + "loss": 14.4539, + "step": 475510 + }, + { + "epoch": 0.9605804853808021, + "grad_norm": 182.73268127441406, + "learning_rate": 7.284172696337688e-08, + "loss": 17.0045, + "step": 475520 + }, + { + "epoch": 0.9606006860134859, + "grad_norm": 77.84659576416016, + "learning_rate": 7.27823726752036e-08, + "loss": 13.1571, + "step": 475530 + }, + { + "epoch": 0.9606208866461697, + "grad_norm": 392.80963134765625, + "learning_rate": 7.272304240169115e-08, + "loss": 14.0392, + "step": 475540 + }, + { + "epoch": 0.9606410872788536, + "grad_norm": 223.89820861816406, + "learning_rate": 7.266373614312927e-08, + "loss": 11.1937, + "step": 475550 + }, + { + "epoch": 0.9606612879115374, + "grad_norm": 472.5323486328125, + "learning_rate": 7.260445389980609e-08, + "loss": 15.6538, + "step": 475560 + }, + { + "epoch": 0.9606814885442212, + "grad_norm": 399.0464782714844, + "learning_rate": 7.25451956720119e-08, + "loss": 22.7643, + "step": 475570 + }, + { + "epoch": 0.960701689176905, + "grad_norm": 484.1720275878906, + "learning_rate": 7.248596146003484e-08, + "loss": 16.677, + "step": 475580 + }, + { + "epoch": 0.9607218898095888, + "grad_norm": 407.47369384765625, + "learning_rate": 7.242675126416299e-08, + "loss": 27.4333, + "step": 475590 + }, + { + "epoch": 0.9607420904422727, + "grad_norm": 186.0641632080078, + "learning_rate": 7.236756508468612e-08, + "loss": 15.0088, + "step": 475600 + }, + { + "epoch": 0.9607622910749565, + "grad_norm": 353.9106750488281, + "learning_rate": 7.230840292189179e-08, + "loss": 10.1284, + "step": 475610 + }, + { + "epoch": 0.9607824917076403, + "grad_norm": 245.0760040283203, + "learning_rate": 7.224926477606864e-08, + "loss": 18.781, + "step": 475620 + }, + { + "epoch": 0.9608026923403241, + "grad_norm": 286.08154296875, + "learning_rate": 7.219015064750478e-08, + "loss": 16.0606, + "step": 475630 + }, + { + "epoch": 0.9608228929730079, + "grad_norm": 372.6519470214844, + "learning_rate": 7.213106053648889e-08, + "loss": 13.3908, + "step": 475640 + }, + { + "epoch": 0.9608430936056918, + "grad_norm": 14.131331443786621, + "learning_rate": 7.207199444330847e-08, + "loss": 14.3899, + "step": 475650 + }, + { + "epoch": 0.9608632942383756, + "grad_norm": 264.9721374511719, + "learning_rate": 7.201295236825112e-08, + "loss": 13.8373, + "step": 475660 + }, + { + "epoch": 0.9608834948710594, + "grad_norm": 441.21099853515625, + "learning_rate": 7.195393431160491e-08, + "loss": 11.3617, + "step": 475670 + }, + { + "epoch": 0.9609036955037432, + "grad_norm": 28.24907684326172, + "learning_rate": 7.189494027365795e-08, + "loss": 25.5611, + "step": 475680 + }, + { + "epoch": 0.960923896136427, + "grad_norm": 647.5918579101562, + "learning_rate": 7.183597025469669e-08, + "loss": 17.409, + "step": 475690 + }, + { + "epoch": 0.9609440967691109, + "grad_norm": 231.3723602294922, + "learning_rate": 7.177702425500977e-08, + "loss": 17.2756, + "step": 475700 + }, + { + "epoch": 0.9609642974017947, + "grad_norm": 125.4808578491211, + "learning_rate": 7.171810227488363e-08, + "loss": 11.1491, + "step": 475710 + }, + { + "epoch": 0.9609844980344784, + "grad_norm": 154.3079376220703, + "learning_rate": 7.165920431460637e-08, + "loss": 20.8417, + "step": 475720 + }, + { + "epoch": 0.9610046986671622, + "grad_norm": 258.5376281738281, + "learning_rate": 7.16003303744639e-08, + "loss": 14.5675, + "step": 475730 + }, + { + "epoch": 0.961024899299846, + "grad_norm": 205.18617248535156, + "learning_rate": 7.154148045474319e-08, + "loss": 21.4444, + "step": 475740 + }, + { + "epoch": 0.9610450999325298, + "grad_norm": 260.52996826171875, + "learning_rate": 7.148265455573233e-08, + "loss": 15.3334, + "step": 475750 + }, + { + "epoch": 0.9610653005652137, + "grad_norm": 186.31607055664062, + "learning_rate": 7.142385267771667e-08, + "loss": 23.7496, + "step": 475760 + }, + { + "epoch": 0.9610855011978975, + "grad_norm": 232.94505310058594, + "learning_rate": 7.136507482098375e-08, + "loss": 18.573, + "step": 475770 + }, + { + "epoch": 0.9611057018305813, + "grad_norm": 29.94649887084961, + "learning_rate": 7.130632098581947e-08, + "loss": 19.7735, + "step": 475780 + }, + { + "epoch": 0.9611259024632651, + "grad_norm": 818.3164672851562, + "learning_rate": 7.124759117251078e-08, + "loss": 29.18, + "step": 475790 + }, + { + "epoch": 0.961146103095949, + "grad_norm": 446.5696105957031, + "learning_rate": 7.118888538134361e-08, + "loss": 11.0974, + "step": 475800 + }, + { + "epoch": 0.9611663037286328, + "grad_norm": 433.7945556640625, + "learning_rate": 7.113020361260325e-08, + "loss": 17.209, + "step": 475810 + }, + { + "epoch": 0.9611865043613166, + "grad_norm": 42.12403869628906, + "learning_rate": 7.107154586657727e-08, + "loss": 13.7848, + "step": 475820 + }, + { + "epoch": 0.9612067049940004, + "grad_norm": 154.81005859375, + "learning_rate": 7.101291214355043e-08, + "loss": 14.8902, + "step": 475830 + }, + { + "epoch": 0.9612269056266842, + "grad_norm": 119.46757507324219, + "learning_rate": 7.095430244380863e-08, + "loss": 11.1297, + "step": 475840 + }, + { + "epoch": 0.961247106259368, + "grad_norm": 419.2917785644531, + "learning_rate": 7.089571676763773e-08, + "loss": 25.2929, + "step": 475850 + }, + { + "epoch": 0.9612673068920519, + "grad_norm": 170.31443786621094, + "learning_rate": 7.083715511532419e-08, + "loss": 14.8543, + "step": 475860 + }, + { + "epoch": 0.9612875075247357, + "grad_norm": 313.43658447265625, + "learning_rate": 7.077861748715165e-08, + "loss": 12.2518, + "step": 475870 + }, + { + "epoch": 0.9613077081574195, + "grad_norm": 8.207794189453125, + "learning_rate": 7.072010388340656e-08, + "loss": 8.0544, + "step": 475880 + }, + { + "epoch": 0.9613279087901033, + "grad_norm": 329.4176330566406, + "learning_rate": 7.066161430437368e-08, + "loss": 16.7999, + "step": 475890 + }, + { + "epoch": 0.9613481094227871, + "grad_norm": 272.11688232421875, + "learning_rate": 7.060314875033836e-08, + "loss": 11.1837, + "step": 475900 + }, + { + "epoch": 0.961368310055471, + "grad_norm": 52.65071105957031, + "learning_rate": 7.054470722158535e-08, + "loss": 16.2709, + "step": 475910 + }, + { + "epoch": 0.9613885106881548, + "grad_norm": 216.29507446289062, + "learning_rate": 7.048628971839944e-08, + "loss": 25.8152, + "step": 475920 + }, + { + "epoch": 0.9614087113208386, + "grad_norm": 190.11618041992188, + "learning_rate": 7.042789624106594e-08, + "loss": 14.2205, + "step": 475930 + }, + { + "epoch": 0.9614289119535224, + "grad_norm": 501.01043701171875, + "learning_rate": 7.036952678986852e-08, + "loss": 27.5411, + "step": 475940 + }, + { + "epoch": 0.9614491125862062, + "grad_norm": 245.2197265625, + "learning_rate": 7.031118136509196e-08, + "loss": 20.0645, + "step": 475950 + }, + { + "epoch": 0.9614693132188901, + "grad_norm": 322.0236511230469, + "learning_rate": 7.025285996702158e-08, + "loss": 27.4476, + "step": 475960 + }, + { + "epoch": 0.9614895138515739, + "grad_norm": 322.7179260253906, + "learning_rate": 7.019456259594049e-08, + "loss": 21.5527, + "step": 475970 + }, + { + "epoch": 0.9615097144842576, + "grad_norm": 268.53509521484375, + "learning_rate": 7.01362892521329e-08, + "loss": 12.8646, + "step": 475980 + }, + { + "epoch": 0.9615299151169414, + "grad_norm": 343.81280517578125, + "learning_rate": 7.007803993588358e-08, + "loss": 16.417, + "step": 475990 + }, + { + "epoch": 0.9615501157496252, + "grad_norm": 294.3307800292969, + "learning_rate": 7.001981464747565e-08, + "loss": 21.8227, + "step": 476000 + }, + { + "epoch": 0.961570316382309, + "grad_norm": 237.7524871826172, + "learning_rate": 6.996161338719332e-08, + "loss": 20.9503, + "step": 476010 + }, + { + "epoch": 0.9615905170149929, + "grad_norm": 68.56524658203125, + "learning_rate": 6.990343615532025e-08, + "loss": 19.5319, + "step": 476020 + }, + { + "epoch": 0.9616107176476767, + "grad_norm": 307.2353210449219, + "learning_rate": 6.9845282952139e-08, + "loss": 17.4761, + "step": 476030 + }, + { + "epoch": 0.9616309182803605, + "grad_norm": 369.35650634765625, + "learning_rate": 6.978715377793489e-08, + "loss": 16.5511, + "step": 476040 + }, + { + "epoch": 0.9616511189130443, + "grad_norm": 201.0762939453125, + "learning_rate": 6.972904863298991e-08, + "loss": 17.8656, + "step": 476050 + }, + { + "epoch": 0.9616713195457282, + "grad_norm": 351.0091857910156, + "learning_rate": 6.967096751758773e-08, + "loss": 15.0056, + "step": 476060 + }, + { + "epoch": 0.961691520178412, + "grad_norm": 385.28924560546875, + "learning_rate": 6.961291043201145e-08, + "loss": 17.2955, + "step": 476070 + }, + { + "epoch": 0.9617117208110958, + "grad_norm": 245.84681701660156, + "learning_rate": 6.955487737654309e-08, + "loss": 13.5286, + "step": 476080 + }, + { + "epoch": 0.9617319214437796, + "grad_norm": 306.4953918457031, + "learning_rate": 6.949686835146685e-08, + "loss": 17.9128, + "step": 476090 + }, + { + "epoch": 0.9617521220764634, + "grad_norm": 122.697509765625, + "learning_rate": 6.943888335706472e-08, + "loss": 21.3639, + "step": 476100 + }, + { + "epoch": 0.9617723227091473, + "grad_norm": 0.0, + "learning_rate": 6.938092239361982e-08, + "loss": 6.4645, + "step": 476110 + }, + { + "epoch": 0.9617925233418311, + "grad_norm": 368.9552307128906, + "learning_rate": 6.932298546141413e-08, + "loss": 14.1267, + "step": 476120 + }, + { + "epoch": 0.9618127239745149, + "grad_norm": 334.83331298828125, + "learning_rate": 6.926507256072967e-08, + "loss": 25.3837, + "step": 476130 + }, + { + "epoch": 0.9618329246071987, + "grad_norm": 244.06455993652344, + "learning_rate": 6.920718369185009e-08, + "loss": 13.8009, + "step": 476140 + }, + { + "epoch": 0.9618531252398825, + "grad_norm": 283.6094055175781, + "learning_rate": 6.914931885505626e-08, + "loss": 13.1624, + "step": 476150 + }, + { + "epoch": 0.9618733258725664, + "grad_norm": 208.09109497070312, + "learning_rate": 6.909147805063021e-08, + "loss": 47.2771, + "step": 476160 + }, + { + "epoch": 0.9618935265052502, + "grad_norm": 317.6132507324219, + "learning_rate": 6.903366127885447e-08, + "loss": 12.3003, + "step": 476170 + }, + { + "epoch": 0.961913727137934, + "grad_norm": 201.04429626464844, + "learning_rate": 6.897586854001048e-08, + "loss": 26.5013, + "step": 476180 + }, + { + "epoch": 0.9619339277706178, + "grad_norm": 231.37472534179688, + "learning_rate": 6.89180998343808e-08, + "loss": 21.9749, + "step": 476190 + }, + { + "epoch": 0.9619541284033016, + "grad_norm": 196.7626495361328, + "learning_rate": 6.88603551622452e-08, + "loss": 15.4808, + "step": 476200 + }, + { + "epoch": 0.9619743290359855, + "grad_norm": 521.8115844726562, + "learning_rate": 6.88026345238868e-08, + "loss": 29.6634, + "step": 476210 + }, + { + "epoch": 0.9619945296686693, + "grad_norm": 386.37030029296875, + "learning_rate": 6.874493791958648e-08, + "loss": 14.1437, + "step": 476220 + }, + { + "epoch": 0.9620147303013531, + "grad_norm": 260.12591552734375, + "learning_rate": 6.868726534962456e-08, + "loss": 20.8724, + "step": 476230 + }, + { + "epoch": 0.9620349309340368, + "grad_norm": 239.26620483398438, + "learning_rate": 6.862961681428304e-08, + "loss": 32.5396, + "step": 476240 + }, + { + "epoch": 0.9620551315667206, + "grad_norm": 331.8974304199219, + "learning_rate": 6.857199231384282e-08, + "loss": 24.4839, + "step": 476250 + }, + { + "epoch": 0.9620753321994044, + "grad_norm": 369.7519836425781, + "learning_rate": 6.851439184858477e-08, + "loss": 22.7557, + "step": 476260 + }, + { + "epoch": 0.9620955328320883, + "grad_norm": 375.781005859375, + "learning_rate": 6.845681541878924e-08, + "loss": 16.3223, + "step": 476270 + }, + { + "epoch": 0.9621157334647721, + "grad_norm": 234.12542724609375, + "learning_rate": 6.83992630247371e-08, + "loss": 16.0636, + "step": 476280 + }, + { + "epoch": 0.9621359340974559, + "grad_norm": 13.830657005310059, + "learning_rate": 6.834173466670923e-08, + "loss": 12.3598, + "step": 476290 + }, + { + "epoch": 0.9621561347301397, + "grad_norm": 0.0, + "learning_rate": 6.828423034498488e-08, + "loss": 7.1124, + "step": 476300 + }, + { + "epoch": 0.9621763353628235, + "grad_norm": 351.7645568847656, + "learning_rate": 6.822675005984547e-08, + "loss": 13.3322, + "step": 476310 + }, + { + "epoch": 0.9621965359955074, + "grad_norm": 311.44940185546875, + "learning_rate": 6.816929381157023e-08, + "loss": 14.2324, + "step": 476320 + }, + { + "epoch": 0.9622167366281912, + "grad_norm": 296.13848876953125, + "learning_rate": 6.811186160044004e-08, + "loss": 22.1924, + "step": 476330 + }, + { + "epoch": 0.962236937260875, + "grad_norm": 232.44021606445312, + "learning_rate": 6.805445342673467e-08, + "loss": 15.257, + "step": 476340 + }, + { + "epoch": 0.9622571378935588, + "grad_norm": 378.85284423828125, + "learning_rate": 6.799706929073335e-08, + "loss": 17.1737, + "step": 476350 + }, + { + "epoch": 0.9622773385262426, + "grad_norm": 75.30892181396484, + "learning_rate": 6.793970919271642e-08, + "loss": 20.28, + "step": 476360 + }, + { + "epoch": 0.9622975391589265, + "grad_norm": 468.77410888671875, + "learning_rate": 6.788237313296309e-08, + "loss": 21.4126, + "step": 476370 + }, + { + "epoch": 0.9623177397916103, + "grad_norm": 248.53173828125, + "learning_rate": 6.782506111175313e-08, + "loss": 19.6177, + "step": 476380 + }, + { + "epoch": 0.9623379404242941, + "grad_norm": 253.32679748535156, + "learning_rate": 6.776777312936522e-08, + "loss": 7.8689, + "step": 476390 + }, + { + "epoch": 0.9623581410569779, + "grad_norm": 4.7597880363464355, + "learning_rate": 6.771050918607913e-08, + "loss": 22.3711, + "step": 476400 + }, + { + "epoch": 0.9623783416896617, + "grad_norm": 215.17648315429688, + "learning_rate": 6.765326928217408e-08, + "loss": 22.2809, + "step": 476410 + }, + { + "epoch": 0.9623985423223456, + "grad_norm": 174.30722045898438, + "learning_rate": 6.759605341792819e-08, + "loss": 16.431, + "step": 476420 + }, + { + "epoch": 0.9624187429550294, + "grad_norm": 174.83242797851562, + "learning_rate": 6.753886159362122e-08, + "loss": 16.3768, + "step": 476430 + }, + { + "epoch": 0.9624389435877132, + "grad_norm": 347.6761169433594, + "learning_rate": 6.748169380953184e-08, + "loss": 21.2614, + "step": 476440 + }, + { + "epoch": 0.962459144220397, + "grad_norm": 322.230224609375, + "learning_rate": 6.742455006593762e-08, + "loss": 24.265, + "step": 476450 + }, + { + "epoch": 0.9624793448530808, + "grad_norm": 295.3970947265625, + "learning_rate": 6.736743036311832e-08, + "loss": 28.6171, + "step": 476460 + }, + { + "epoch": 0.9624995454857647, + "grad_norm": 582.6659545898438, + "learning_rate": 6.731033470135262e-08, + "loss": 21.0782, + "step": 476470 + }, + { + "epoch": 0.9625197461184485, + "grad_norm": 438.8211975097656, + "learning_rate": 6.725326308091751e-08, + "loss": 24.4582, + "step": 476480 + }, + { + "epoch": 0.9625399467511322, + "grad_norm": 236.2390594482422, + "learning_rate": 6.71962155020911e-08, + "loss": 21.9683, + "step": 476490 + }, + { + "epoch": 0.962560147383816, + "grad_norm": 535.292724609375, + "learning_rate": 6.713919196515317e-08, + "loss": 24.2894, + "step": 476500 + }, + { + "epoch": 0.9625803480164998, + "grad_norm": 183.3269805908203, + "learning_rate": 6.708219247038017e-08, + "loss": 17.0726, + "step": 476510 + }, + { + "epoch": 0.9626005486491837, + "grad_norm": 291.149169921875, + "learning_rate": 6.702521701804965e-08, + "loss": 18.624, + "step": 476520 + }, + { + "epoch": 0.9626207492818675, + "grad_norm": 192.3579559326172, + "learning_rate": 6.696826560844027e-08, + "loss": 19.7489, + "step": 476530 + }, + { + "epoch": 0.9626409499145513, + "grad_norm": 285.07794189453125, + "learning_rate": 6.691133824183016e-08, + "loss": 26.5873, + "step": 476540 + }, + { + "epoch": 0.9626611505472351, + "grad_norm": 122.84087371826172, + "learning_rate": 6.685443491849464e-08, + "loss": 20.8861, + "step": 476550 + }, + { + "epoch": 0.9626813511799189, + "grad_norm": 106.66376495361328, + "learning_rate": 6.679755563871292e-08, + "loss": 15.7506, + "step": 476560 + }, + { + "epoch": 0.9627015518126028, + "grad_norm": 464.84716796875, + "learning_rate": 6.674070040276148e-08, + "loss": 23.3472, + "step": 476570 + }, + { + "epoch": 0.9627217524452866, + "grad_norm": 312.5509338378906, + "learning_rate": 6.66838692109173e-08, + "loss": 22.8308, + "step": 476580 + }, + { + "epoch": 0.9627419530779704, + "grad_norm": 59.928245544433594, + "learning_rate": 6.662706206345793e-08, + "loss": 10.7981, + "step": 476590 + }, + { + "epoch": 0.9627621537106542, + "grad_norm": 200.5502166748047, + "learning_rate": 6.657027896065982e-08, + "loss": 15.4725, + "step": 476600 + }, + { + "epoch": 0.962782354343338, + "grad_norm": 77.65949249267578, + "learning_rate": 6.651351990279997e-08, + "loss": 3.993, + "step": 476610 + }, + { + "epoch": 0.9628025549760219, + "grad_norm": 181.7306365966797, + "learning_rate": 6.645678489015428e-08, + "loss": 15.1982, + "step": 476620 + }, + { + "epoch": 0.9628227556087057, + "grad_norm": 362.1164245605469, + "learning_rate": 6.64000739230003e-08, + "loss": 28.1297, + "step": 476630 + }, + { + "epoch": 0.9628429562413895, + "grad_norm": 525.9012451171875, + "learning_rate": 6.634338700161392e-08, + "loss": 21.3613, + "step": 476640 + }, + { + "epoch": 0.9628631568740733, + "grad_norm": 80.144287109375, + "learning_rate": 6.628672412627158e-08, + "loss": 11.4526, + "step": 476650 + }, + { + "epoch": 0.9628833575067571, + "grad_norm": 275.218505859375, + "learning_rate": 6.623008529724917e-08, + "loss": 16.0609, + "step": 476660 + }, + { + "epoch": 0.962903558139441, + "grad_norm": 254.39096069335938, + "learning_rate": 6.617347051482315e-08, + "loss": 18.5169, + "step": 476670 + }, + { + "epoch": 0.9629237587721248, + "grad_norm": 220.32614135742188, + "learning_rate": 6.611687977926939e-08, + "loss": 14.3258, + "step": 476680 + }, + { + "epoch": 0.9629439594048086, + "grad_norm": 162.96490478515625, + "learning_rate": 6.606031309086269e-08, + "loss": 17.6257, + "step": 476690 + }, + { + "epoch": 0.9629641600374924, + "grad_norm": 395.3836364746094, + "learning_rate": 6.60037704498806e-08, + "loss": 23.2249, + "step": 476700 + }, + { + "epoch": 0.9629843606701762, + "grad_norm": 113.44120025634766, + "learning_rate": 6.594725185659734e-08, + "loss": 15.892, + "step": 476710 + }, + { + "epoch": 0.96300456130286, + "grad_norm": 245.11105346679688, + "learning_rate": 6.58907573112888e-08, + "loss": 15.0643, + "step": 476720 + }, + { + "epoch": 0.9630247619355439, + "grad_norm": 350.1731872558594, + "learning_rate": 6.583428681423032e-08, + "loss": 24.1536, + "step": 476730 + }, + { + "epoch": 0.9630449625682277, + "grad_norm": 329.8271789550781, + "learning_rate": 6.577784036569668e-08, + "loss": 16.0069, + "step": 476740 + }, + { + "epoch": 0.9630651632009114, + "grad_norm": 440.2545471191406, + "learning_rate": 6.572141796596376e-08, + "loss": 17.5044, + "step": 476750 + }, + { + "epoch": 0.9630853638335952, + "grad_norm": 88.97496795654297, + "learning_rate": 6.566501961530636e-08, + "loss": 13.9166, + "step": 476760 + }, + { + "epoch": 0.963105564466279, + "grad_norm": 68.35948944091797, + "learning_rate": 6.560864531399869e-08, + "loss": 12.471, + "step": 476770 + }, + { + "epoch": 0.9631257650989629, + "grad_norm": 322.03924560546875, + "learning_rate": 6.555229506231608e-08, + "loss": 23.7958, + "step": 476780 + }, + { + "epoch": 0.9631459657316467, + "grad_norm": 251.2469024658203, + "learning_rate": 6.549596886053334e-08, + "loss": 15.9685, + "step": 476790 + }, + { + "epoch": 0.9631661663643305, + "grad_norm": 123.99053192138672, + "learning_rate": 6.543966670892465e-08, + "loss": 18.9374, + "step": 476800 + }, + { + "epoch": 0.9631863669970143, + "grad_norm": 20.874664306640625, + "learning_rate": 6.538338860776483e-08, + "loss": 10.2562, + "step": 476810 + }, + { + "epoch": 0.9632065676296981, + "grad_norm": 234.0839385986328, + "learning_rate": 6.532713455732753e-08, + "loss": 19.7447, + "step": 476820 + }, + { + "epoch": 0.963226768262382, + "grad_norm": 254.33309936523438, + "learning_rate": 6.527090455788754e-08, + "loss": 13.8631, + "step": 476830 + }, + { + "epoch": 0.9632469688950658, + "grad_norm": 211.86892700195312, + "learning_rate": 6.521469860971852e-08, + "loss": 16.7011, + "step": 476840 + }, + { + "epoch": 0.9632671695277496, + "grad_norm": 371.6062316894531, + "learning_rate": 6.515851671309414e-08, + "loss": 23.3379, + "step": 476850 + }, + { + "epoch": 0.9632873701604334, + "grad_norm": 565.3331298828125, + "learning_rate": 6.51023588682892e-08, + "loss": 16.0675, + "step": 476860 + }, + { + "epoch": 0.9633075707931172, + "grad_norm": 634.7963256835938, + "learning_rate": 6.504622507557679e-08, + "loss": 28.1137, + "step": 476870 + }, + { + "epoch": 0.9633277714258011, + "grad_norm": 259.01446533203125, + "learning_rate": 6.499011533523003e-08, + "loss": 17.5227, + "step": 476880 + }, + { + "epoch": 0.9633479720584849, + "grad_norm": 247.61936950683594, + "learning_rate": 6.493402964752371e-08, + "loss": 16.2293, + "step": 476890 + }, + { + "epoch": 0.9633681726911687, + "grad_norm": 385.30706787109375, + "learning_rate": 6.487796801272983e-08, + "loss": 15.4351, + "step": 476900 + }, + { + "epoch": 0.9633883733238525, + "grad_norm": 0.0, + "learning_rate": 6.482193043112206e-08, + "loss": 9.617, + "step": 476910 + }, + { + "epoch": 0.9634085739565363, + "grad_norm": 351.1435241699219, + "learning_rate": 6.476591690297407e-08, + "loss": 10.6954, + "step": 476920 + }, + { + "epoch": 0.9634287745892202, + "grad_norm": 229.2272186279297, + "learning_rate": 6.470992742855786e-08, + "loss": 16.816, + "step": 476930 + }, + { + "epoch": 0.963448975221904, + "grad_norm": 295.8770751953125, + "learning_rate": 6.465396200814766e-08, + "loss": 19.5691, + "step": 476940 + }, + { + "epoch": 0.9634691758545878, + "grad_norm": 305.0475769042969, + "learning_rate": 6.459802064201437e-08, + "loss": 19.6025, + "step": 476950 + }, + { + "epoch": 0.9634893764872716, + "grad_norm": 346.71063232421875, + "learning_rate": 6.454210333043275e-08, + "loss": 17.3941, + "step": 476960 + }, + { + "epoch": 0.9635095771199554, + "grad_norm": 132.2841339111328, + "learning_rate": 6.448621007367428e-08, + "loss": 19.3763, + "step": 476970 + }, + { + "epoch": 0.9635297777526393, + "grad_norm": 224.59109497070312, + "learning_rate": 6.443034087201095e-08, + "loss": 21.4958, + "step": 476980 + }, + { + "epoch": 0.9635499783853231, + "grad_norm": 483.6046142578125, + "learning_rate": 6.437449572571586e-08, + "loss": 19.1856, + "step": 476990 + }, + { + "epoch": 0.9635701790180068, + "grad_norm": 29.51258659362793, + "learning_rate": 6.431867463506047e-08, + "loss": 11.6267, + "step": 477000 + }, + { + "epoch": 0.9635903796506906, + "grad_norm": 242.21133422851562, + "learning_rate": 6.426287760031736e-08, + "loss": 15.2442, + "step": 477010 + }, + { + "epoch": 0.9636105802833744, + "grad_norm": 118.39478302001953, + "learning_rate": 6.42071046217585e-08, + "loss": 8.5603, + "step": 477020 + }, + { + "epoch": 0.9636307809160582, + "grad_norm": 108.25749969482422, + "learning_rate": 6.415135569965536e-08, + "loss": 19.3951, + "step": 477030 + }, + { + "epoch": 0.9636509815487421, + "grad_norm": 233.133056640625, + "learning_rate": 6.40956308342805e-08, + "loss": 19.9293, + "step": 477040 + }, + { + "epoch": 0.9636711821814259, + "grad_norm": 435.89239501953125, + "learning_rate": 6.403993002590425e-08, + "loss": 14.1264, + "step": 477050 + }, + { + "epoch": 0.9636913828141097, + "grad_norm": 42.959861755371094, + "learning_rate": 6.398425327479863e-08, + "loss": 12.8804, + "step": 477060 + }, + { + "epoch": 0.9637115834467935, + "grad_norm": 398.9713439941406, + "learning_rate": 6.392860058123506e-08, + "loss": 13.7219, + "step": 477070 + }, + { + "epoch": 0.9637317840794773, + "grad_norm": 423.4179382324219, + "learning_rate": 6.387297194548558e-08, + "loss": 29.0164, + "step": 477080 + }, + { + "epoch": 0.9637519847121612, + "grad_norm": 171.33631896972656, + "learning_rate": 6.381736736781996e-08, + "loss": 12.3808, + "step": 477090 + }, + { + "epoch": 0.963772185344845, + "grad_norm": 136.57261657714844, + "learning_rate": 6.376178684850965e-08, + "loss": 17.7935, + "step": 477100 + }, + { + "epoch": 0.9637923859775288, + "grad_norm": 288.0144348144531, + "learning_rate": 6.370623038782608e-08, + "loss": 19.6789, + "step": 477110 + }, + { + "epoch": 0.9638125866102126, + "grad_norm": 27.331716537475586, + "learning_rate": 6.365069798603962e-08, + "loss": 32.6814, + "step": 477120 + }, + { + "epoch": 0.9638327872428964, + "grad_norm": 219.19517517089844, + "learning_rate": 6.359518964342059e-08, + "loss": 12.6894, + "step": 477130 + }, + { + "epoch": 0.9638529878755803, + "grad_norm": 382.98626708984375, + "learning_rate": 6.353970536024045e-08, + "loss": 19.4926, + "step": 477140 + }, + { + "epoch": 0.9638731885082641, + "grad_norm": 256.29315185546875, + "learning_rate": 6.348424513676898e-08, + "loss": 6.9898, + "step": 477150 + }, + { + "epoch": 0.9638933891409479, + "grad_norm": 256.78692626953125, + "learning_rate": 6.342880897327597e-08, + "loss": 13.9918, + "step": 477160 + }, + { + "epoch": 0.9639135897736317, + "grad_norm": 135.3890838623047, + "learning_rate": 6.337339687003286e-08, + "loss": 14.0374, + "step": 477170 + }, + { + "epoch": 0.9639337904063155, + "grad_norm": 377.88525390625, + "learning_rate": 6.331800882730887e-08, + "loss": 18.6535, + "step": 477180 + }, + { + "epoch": 0.9639539910389994, + "grad_norm": 97.87631225585938, + "learning_rate": 6.326264484537437e-08, + "loss": 12.3394, + "step": 477190 + }, + { + "epoch": 0.9639741916716832, + "grad_norm": 488.96820068359375, + "learning_rate": 6.3207304924498e-08, + "loss": 23.6453, + "step": 477200 + }, + { + "epoch": 0.963994392304367, + "grad_norm": 314.15008544921875, + "learning_rate": 6.315198906495179e-08, + "loss": 21.1307, + "step": 477210 + }, + { + "epoch": 0.9640145929370508, + "grad_norm": 405.3951110839844, + "learning_rate": 6.30966972670033e-08, + "loss": 23.4745, + "step": 477220 + }, + { + "epoch": 0.9640347935697346, + "grad_norm": 431.6580810546875, + "learning_rate": 6.304142953092285e-08, + "loss": 18.9896, + "step": 477230 + }, + { + "epoch": 0.9640549942024185, + "grad_norm": 366.255126953125, + "learning_rate": 6.298618585697968e-08, + "loss": 14.078, + "step": 477240 + }, + { + "epoch": 0.9640751948351023, + "grad_norm": 146.75399780273438, + "learning_rate": 6.293096624544304e-08, + "loss": 8.5715, + "step": 477250 + }, + { + "epoch": 0.964095395467786, + "grad_norm": 382.1266784667969, + "learning_rate": 6.287577069658213e-08, + "loss": 10.4208, + "step": 477260 + }, + { + "epoch": 0.9641155961004698, + "grad_norm": 202.35650634765625, + "learning_rate": 6.282059921066564e-08, + "loss": 13.6529, + "step": 477270 + }, + { + "epoch": 0.9641357967331536, + "grad_norm": 148.67845153808594, + "learning_rate": 6.276545178796333e-08, + "loss": 9.3581, + "step": 477280 + }, + { + "epoch": 0.9641559973658375, + "grad_norm": 226.326171875, + "learning_rate": 6.271032842874281e-08, + "loss": 26.5209, + "step": 477290 + }, + { + "epoch": 0.9641761979985213, + "grad_norm": 175.58291625976562, + "learning_rate": 6.265522913327326e-08, + "loss": 9.0418, + "step": 477300 + }, + { + "epoch": 0.9641963986312051, + "grad_norm": 995.6618041992188, + "learning_rate": 6.260015390182395e-08, + "loss": 31.1935, + "step": 477310 + }, + { + "epoch": 0.9642165992638889, + "grad_norm": 40.515159606933594, + "learning_rate": 6.254510273466186e-08, + "loss": 14.612, + "step": 477320 + }, + { + "epoch": 0.9642367998965727, + "grad_norm": 415.0593566894531, + "learning_rate": 6.249007563205679e-08, + "loss": 34.3038, + "step": 477330 + }, + { + "epoch": 0.9642570005292566, + "grad_norm": 590.2013549804688, + "learning_rate": 6.243507259427628e-08, + "loss": 21.9941, + "step": 477340 + }, + { + "epoch": 0.9642772011619404, + "grad_norm": 278.40460205078125, + "learning_rate": 6.238009362158793e-08, + "loss": 16.9624, + "step": 477350 + }, + { + "epoch": 0.9642974017946242, + "grad_norm": 152.50302124023438, + "learning_rate": 6.232513871426038e-08, + "loss": 18.1703, + "step": 477360 + }, + { + "epoch": 0.964317602427308, + "grad_norm": 78.60377502441406, + "learning_rate": 6.227020787256122e-08, + "loss": 14.3266, + "step": 477370 + }, + { + "epoch": 0.9643378030599918, + "grad_norm": 281.2479248046875, + "learning_rate": 6.2215301096758e-08, + "loss": 10.5865, + "step": 477380 + }, + { + "epoch": 0.9643580036926757, + "grad_norm": 558.7536010742188, + "learning_rate": 6.216041838711828e-08, + "loss": 17.3697, + "step": 477390 + }, + { + "epoch": 0.9643782043253595, + "grad_norm": 75.66012573242188, + "learning_rate": 6.210555974391075e-08, + "loss": 28.0086, + "step": 477400 + }, + { + "epoch": 0.9643984049580433, + "grad_norm": 0.0, + "learning_rate": 6.205072516740129e-08, + "loss": 8.1656, + "step": 477410 + }, + { + "epoch": 0.9644186055907271, + "grad_norm": 288.9216613769531, + "learning_rate": 6.199591465785748e-08, + "loss": 10.5967, + "step": 477420 + }, + { + "epoch": 0.9644388062234109, + "grad_norm": 414.6328125, + "learning_rate": 6.194112821554687e-08, + "loss": 27.4639, + "step": 477430 + }, + { + "epoch": 0.9644590068560948, + "grad_norm": 488.18212890625, + "learning_rate": 6.188636584073648e-08, + "loss": 19.7257, + "step": 477440 + }, + { + "epoch": 0.9644792074887786, + "grad_norm": 48.27674102783203, + "learning_rate": 6.183162753369221e-08, + "loss": 11.156, + "step": 477450 + }, + { + "epoch": 0.9644994081214624, + "grad_norm": 413.32781982421875, + "learning_rate": 6.177691329468217e-08, + "loss": 21.3233, + "step": 477460 + }, + { + "epoch": 0.9645196087541462, + "grad_norm": 91.32913970947266, + "learning_rate": 6.17222231239728e-08, + "loss": 13.5473, + "step": 477470 + }, + { + "epoch": 0.96453980938683, + "grad_norm": 288.6951904296875, + "learning_rate": 6.166755702183058e-08, + "loss": 21.0884, + "step": 477480 + }, + { + "epoch": 0.9645600100195139, + "grad_norm": 11.18078327178955, + "learning_rate": 6.161291498852084e-08, + "loss": 17.6018, + "step": 477490 + }, + { + "epoch": 0.9645802106521977, + "grad_norm": 481.84539794921875, + "learning_rate": 6.15582970243117e-08, + "loss": 16.2343, + "step": 477500 + }, + { + "epoch": 0.9646004112848814, + "grad_norm": 32.75508499145508, + "learning_rate": 6.150370312946797e-08, + "loss": 6.4613, + "step": 477510 + }, + { + "epoch": 0.9646206119175652, + "grad_norm": 283.7071533203125, + "learning_rate": 6.144913330425606e-08, + "loss": 23.7083, + "step": 477520 + }, + { + "epoch": 0.964640812550249, + "grad_norm": 204.29095458984375, + "learning_rate": 6.139458754894245e-08, + "loss": 19.4765, + "step": 477530 + }, + { + "epoch": 0.9646610131829328, + "grad_norm": 514.5524291992188, + "learning_rate": 6.134006586379249e-08, + "loss": 17.3997, + "step": 477540 + }, + { + "epoch": 0.9646812138156167, + "grad_norm": 426.7435607910156, + "learning_rate": 6.128556824907205e-08, + "loss": 20.9471, + "step": 477550 + }, + { + "epoch": 0.9647014144483005, + "grad_norm": 450.2146301269531, + "learning_rate": 6.12310947050465e-08, + "loss": 10.5752, + "step": 477560 + }, + { + "epoch": 0.9647216150809843, + "grad_norm": 241.98593139648438, + "learning_rate": 6.11766452319823e-08, + "loss": 14.8117, + "step": 477570 + }, + { + "epoch": 0.9647418157136681, + "grad_norm": 138.69134521484375, + "learning_rate": 6.112221983014366e-08, + "loss": 9.8652, + "step": 477580 + }, + { + "epoch": 0.964762016346352, + "grad_norm": 202.12562561035156, + "learning_rate": 6.106781849979648e-08, + "loss": 13.2307, + "step": 477590 + }, + { + "epoch": 0.9647822169790358, + "grad_norm": 251.06512451171875, + "learning_rate": 6.101344124120557e-08, + "loss": 28.6517, + "step": 477600 + }, + { + "epoch": 0.9648024176117196, + "grad_norm": 472.91064453125, + "learning_rate": 6.095908805463624e-08, + "loss": 28.1801, + "step": 477610 + }, + { + "epoch": 0.9648226182444034, + "grad_norm": 192.46087646484375, + "learning_rate": 6.09047589403533e-08, + "loss": 22.7737, + "step": 477620 + }, + { + "epoch": 0.9648428188770872, + "grad_norm": 327.7209777832031, + "learning_rate": 6.085045389862154e-08, + "loss": 19.1767, + "step": 477630 + }, + { + "epoch": 0.964863019509771, + "grad_norm": 281.77117919921875, + "learning_rate": 6.079617292970519e-08, + "loss": 9.5608, + "step": 477640 + }, + { + "epoch": 0.9648832201424549, + "grad_norm": 411.31976318359375, + "learning_rate": 6.074191603386958e-08, + "loss": 24.7571, + "step": 477650 + }, + { + "epoch": 0.9649034207751387, + "grad_norm": 374.9639587402344, + "learning_rate": 6.068768321137897e-08, + "loss": 11.2828, + "step": 477660 + }, + { + "epoch": 0.9649236214078225, + "grad_norm": 3.5396199226379395, + "learning_rate": 6.0633474462497e-08, + "loss": 5.726, + "step": 477670 + }, + { + "epoch": 0.9649438220405063, + "grad_norm": 147.08334350585938, + "learning_rate": 6.057928978748906e-08, + "loss": 9.2, + "step": 477680 + }, + { + "epoch": 0.9649640226731901, + "grad_norm": 285.4568176269531, + "learning_rate": 6.052512918661879e-08, + "loss": 15.6435, + "step": 477690 + }, + { + "epoch": 0.964984223305874, + "grad_norm": 93.05352783203125, + "learning_rate": 6.047099266014877e-08, + "loss": 21.0014, + "step": 477700 + }, + { + "epoch": 0.9650044239385578, + "grad_norm": 378.9557189941406, + "learning_rate": 6.041688020834491e-08, + "loss": 19.2169, + "step": 477710 + }, + { + "epoch": 0.9650246245712416, + "grad_norm": 6.093135833740234, + "learning_rate": 6.036279183146975e-08, + "loss": 18.4705, + "step": 477720 + }, + { + "epoch": 0.9650448252039254, + "grad_norm": 305.2705993652344, + "learning_rate": 6.030872752978756e-08, + "loss": 16.7821, + "step": 477730 + }, + { + "epoch": 0.9650650258366092, + "grad_norm": 249.75381469726562, + "learning_rate": 6.025468730356144e-08, + "loss": 16.2057, + "step": 477740 + }, + { + "epoch": 0.9650852264692931, + "grad_norm": 3.7394824028015137, + "learning_rate": 6.020067115305451e-08, + "loss": 24.2809, + "step": 477750 + }, + { + "epoch": 0.9651054271019769, + "grad_norm": 169.91648864746094, + "learning_rate": 6.0146679078531e-08, + "loss": 7.8114, + "step": 477760 + }, + { + "epoch": 0.9651256277346606, + "grad_norm": 184.37428283691406, + "learning_rate": 6.009271108025294e-08, + "loss": 9.9487, + "step": 477770 + }, + { + "epoch": 0.9651458283673444, + "grad_norm": 389.8765563964844, + "learning_rate": 6.003876715848345e-08, + "loss": 15.8888, + "step": 477780 + }, + { + "epoch": 0.9651660290000282, + "grad_norm": 524.4351806640625, + "learning_rate": 5.998484731348675e-08, + "loss": 13.3921, + "step": 477790 + }, + { + "epoch": 0.9651862296327121, + "grad_norm": 99.16090393066406, + "learning_rate": 5.993095154552431e-08, + "loss": 11.3058, + "step": 477800 + }, + { + "epoch": 0.9652064302653959, + "grad_norm": 250.9905548095703, + "learning_rate": 5.987707985485925e-08, + "loss": 23.6366, + "step": 477810 + }, + { + "epoch": 0.9652266308980797, + "grad_norm": 243.146240234375, + "learning_rate": 5.982323224175468e-08, + "loss": 12.6277, + "step": 477820 + }, + { + "epoch": 0.9652468315307635, + "grad_norm": 102.24500274658203, + "learning_rate": 5.976940870647207e-08, + "loss": 25.9193, + "step": 477830 + }, + { + "epoch": 0.9652670321634473, + "grad_norm": 115.31416320800781, + "learning_rate": 5.9715609249274e-08, + "loss": 15.9024, + "step": 477840 + }, + { + "epoch": 0.9652872327961312, + "grad_norm": 155.22604370117188, + "learning_rate": 5.966183387042246e-08, + "loss": 22.6824, + "step": 477850 + }, + { + "epoch": 0.965307433428815, + "grad_norm": 185.16635131835938, + "learning_rate": 5.960808257018113e-08, + "loss": 18.6027, + "step": 477860 + }, + { + "epoch": 0.9653276340614988, + "grad_norm": 362.2665710449219, + "learning_rate": 5.955435534881038e-08, + "loss": 20.9415, + "step": 477870 + }, + { + "epoch": 0.9653478346941826, + "grad_norm": 119.57987976074219, + "learning_rate": 5.950065220657164e-08, + "loss": 5.1876, + "step": 477880 + }, + { + "epoch": 0.9653680353268664, + "grad_norm": 383.5547180175781, + "learning_rate": 5.9446973143728605e-08, + "loss": 20.0983, + "step": 477890 + }, + { + "epoch": 0.9653882359595503, + "grad_norm": 17.12600326538086, + "learning_rate": 5.939331816054161e-08, + "loss": 20.9111, + "step": 477900 + }, + { + "epoch": 0.9654084365922341, + "grad_norm": 361.46722412109375, + "learning_rate": 5.9339687257272126e-08, + "loss": 27.1896, + "step": 477910 + }, + { + "epoch": 0.9654286372249179, + "grad_norm": 139.56591796875, + "learning_rate": 5.92860804341816e-08, + "loss": 17.6758, + "step": 477920 + }, + { + "epoch": 0.9654488378576017, + "grad_norm": 302.6304626464844, + "learning_rate": 5.9232497691531496e-08, + "loss": 20.5391, + "step": 477930 + }, + { + "epoch": 0.9654690384902855, + "grad_norm": 114.1827621459961, + "learning_rate": 5.917893902958327e-08, + "loss": 15.3628, + "step": 477940 + }, + { + "epoch": 0.9654892391229694, + "grad_norm": 422.5317077636719, + "learning_rate": 5.9125404448597825e-08, + "loss": 9.8448, + "step": 477950 + }, + { + "epoch": 0.9655094397556532, + "grad_norm": 388.36810302734375, + "learning_rate": 5.9071893948835505e-08, + "loss": 26.9694, + "step": 477960 + }, + { + "epoch": 0.965529640388337, + "grad_norm": 124.12110900878906, + "learning_rate": 5.901840753055776e-08, + "loss": 34.7576, + "step": 477970 + }, + { + "epoch": 0.9655498410210208, + "grad_norm": 166.75607299804688, + "learning_rate": 5.896494519402496e-08, + "loss": 14.2673, + "step": 477980 + }, + { + "epoch": 0.9655700416537046, + "grad_norm": 239.36776733398438, + "learning_rate": 5.891150693949743e-08, + "loss": 9.9732, + "step": 477990 + }, + { + "epoch": 0.9655902422863885, + "grad_norm": 527.8530883789062, + "learning_rate": 5.8858092767236084e-08, + "loss": 29.0657, + "step": 478000 + }, + { + "epoch": 0.9656104429190723, + "grad_norm": 128.67298889160156, + "learning_rate": 5.880470267750127e-08, + "loss": 28.3848, + "step": 478010 + }, + { + "epoch": 0.9656306435517561, + "grad_norm": 322.1239929199219, + "learning_rate": 5.8751336670552775e-08, + "loss": 19.7528, + "step": 478020 + }, + { + "epoch": 0.9656508441844398, + "grad_norm": 436.1974182128906, + "learning_rate": 5.8697994746650946e-08, + "loss": 25.9408, + "step": 478030 + }, + { + "epoch": 0.9656710448171236, + "grad_norm": 1870.925537109375, + "learning_rate": 5.864467690605613e-08, + "loss": 25.4245, + "step": 478040 + }, + { + "epoch": 0.9656912454498074, + "grad_norm": 183.9953155517578, + "learning_rate": 5.8591383149028126e-08, + "loss": 28.0338, + "step": 478050 + }, + { + "epoch": 0.9657114460824913, + "grad_norm": 78.80882263183594, + "learning_rate": 5.8538113475825606e-08, + "loss": 23.307, + "step": 478060 + }, + { + "epoch": 0.9657316467151751, + "grad_norm": 145.49549865722656, + "learning_rate": 5.848486788670893e-08, + "loss": 20.6319, + "step": 478070 + }, + { + "epoch": 0.9657518473478589, + "grad_norm": 355.9044189453125, + "learning_rate": 5.843164638193899e-08, + "loss": 12.1357, + "step": 478080 + }, + { + "epoch": 0.9657720479805427, + "grad_norm": 185.50003051757812, + "learning_rate": 5.837844896177225e-08, + "loss": 16.2037, + "step": 478090 + }, + { + "epoch": 0.9657922486132265, + "grad_norm": 202.33474731445312, + "learning_rate": 5.8325275626470166e-08, + "loss": 13.9172, + "step": 478100 + }, + { + "epoch": 0.9658124492459104, + "grad_norm": 0.0, + "learning_rate": 5.827212637629198e-08, + "loss": 14.8709, + "step": 478110 + }, + { + "epoch": 0.9658326498785942, + "grad_norm": 349.94708251953125, + "learning_rate": 5.821900121149582e-08, + "loss": 31.447, + "step": 478120 + }, + { + "epoch": 0.965852850511278, + "grad_norm": 59.9437141418457, + "learning_rate": 5.8165900132340356e-08, + "loss": 25.3907, + "step": 478130 + }, + { + "epoch": 0.9658730511439618, + "grad_norm": 432.94598388671875, + "learning_rate": 5.8112823139085396e-08, + "loss": 22.3759, + "step": 478140 + }, + { + "epoch": 0.9658932517766456, + "grad_norm": 230.91351318359375, + "learning_rate": 5.80597702319885e-08, + "loss": 17.5498, + "step": 478150 + }, + { + "epoch": 0.9659134524093295, + "grad_norm": 357.3666076660156, + "learning_rate": 5.800674141130946e-08, + "loss": 19.6385, + "step": 478160 + }, + { + "epoch": 0.9659336530420133, + "grad_norm": 388.39483642578125, + "learning_rate": 5.795373667730586e-08, + "loss": 20.044, + "step": 478170 + }, + { + "epoch": 0.9659538536746971, + "grad_norm": 66.93562316894531, + "learning_rate": 5.7900756030236924e-08, + "loss": 17.7336, + "step": 478180 + }, + { + "epoch": 0.9659740543073809, + "grad_norm": 332.9247741699219, + "learning_rate": 5.7847799470360236e-08, + "loss": 11.2676, + "step": 478190 + }, + { + "epoch": 0.9659942549400647, + "grad_norm": 193.0968780517578, + "learning_rate": 5.7794866997933355e-08, + "loss": 12.7786, + "step": 478200 + }, + { + "epoch": 0.9660144555727486, + "grad_norm": 276.2154235839844, + "learning_rate": 5.774195861321552e-08, + "loss": 36.7903, + "step": 478210 + }, + { + "epoch": 0.9660346562054324, + "grad_norm": 274.4327697753906, + "learning_rate": 5.76890743164632e-08, + "loss": 14.1745, + "step": 478220 + }, + { + "epoch": 0.9660548568381162, + "grad_norm": 235.90101623535156, + "learning_rate": 5.763621410793563e-08, + "loss": 27.2531, + "step": 478230 + }, + { + "epoch": 0.9660750574708, + "grad_norm": 61.51955032348633, + "learning_rate": 5.758337798788982e-08, + "loss": 10.9076, + "step": 478240 + }, + { + "epoch": 0.9660952581034838, + "grad_norm": 436.40478515625, + "learning_rate": 5.753056595658224e-08, + "loss": 17.9183, + "step": 478250 + }, + { + "epoch": 0.9661154587361677, + "grad_norm": 154.50656127929688, + "learning_rate": 5.7477778014272124e-08, + "loss": 22.435, + "step": 478260 + }, + { + "epoch": 0.9661356593688515, + "grad_norm": 288.6327209472656, + "learning_rate": 5.7425014161215375e-08, + "loss": 23.1011, + "step": 478270 + }, + { + "epoch": 0.9661558600015352, + "grad_norm": 638.116455078125, + "learning_rate": 5.737227439766957e-08, + "loss": 18.3505, + "step": 478280 + }, + { + "epoch": 0.966176060634219, + "grad_norm": 116.31895446777344, + "learning_rate": 5.7319558723892275e-08, + "loss": 11.1959, + "step": 478290 + }, + { + "epoch": 0.9661962612669028, + "grad_norm": 597.60498046875, + "learning_rate": 5.726686714013996e-08, + "loss": 22.2574, + "step": 478300 + }, + { + "epoch": 0.9662164618995867, + "grad_norm": 80.69058990478516, + "learning_rate": 5.7214199646669076e-08, + "loss": 29.2931, + "step": 478310 + }, + { + "epoch": 0.9662366625322705, + "grad_norm": 150.7561798095703, + "learning_rate": 5.716155624373665e-08, + "loss": 16.8851, + "step": 478320 + }, + { + "epoch": 0.9662568631649543, + "grad_norm": 103.19973754882812, + "learning_rate": 5.710893693159969e-08, + "loss": 17.6151, + "step": 478330 + }, + { + "epoch": 0.9662770637976381, + "grad_norm": 207.61679077148438, + "learning_rate": 5.705634171051411e-08, + "loss": 18.4357, + "step": 478340 + }, + { + "epoch": 0.9662972644303219, + "grad_norm": 57.48310852050781, + "learning_rate": 5.700377058073636e-08, + "loss": 27.1168, + "step": 478350 + }, + { + "epoch": 0.9663174650630058, + "grad_norm": 227.81890869140625, + "learning_rate": 5.6951223542522915e-08, + "loss": 33.3957, + "step": 478360 + }, + { + "epoch": 0.9663376656956896, + "grad_norm": 342.69482421875, + "learning_rate": 5.6898700596129674e-08, + "loss": 9.5855, + "step": 478370 + }, + { + "epoch": 0.9663578663283734, + "grad_norm": 184.25108337402344, + "learning_rate": 5.684620174181255e-08, + "loss": 14.7649, + "step": 478380 + }, + { + "epoch": 0.9663780669610572, + "grad_norm": 163.93824768066406, + "learning_rate": 5.679372697982688e-08, + "loss": 16.5577, + "step": 478390 + }, + { + "epoch": 0.966398267593741, + "grad_norm": 257.0531921386719, + "learning_rate": 5.674127631043025e-08, + "loss": 11.9526, + "step": 478400 + }, + { + "epoch": 0.9664184682264249, + "grad_norm": 1.74449622631073, + "learning_rate": 5.668884973387634e-08, + "loss": 12.227, + "step": 478410 + }, + { + "epoch": 0.9664386688591087, + "grad_norm": 440.2716369628906, + "learning_rate": 5.663644725042161e-08, + "loss": 29.5727, + "step": 478420 + }, + { + "epoch": 0.9664588694917925, + "grad_norm": 326.8595275878906, + "learning_rate": 5.658406886032142e-08, + "loss": 17.7575, + "step": 478430 + }, + { + "epoch": 0.9664790701244763, + "grad_norm": 476.65045166015625, + "learning_rate": 5.653171456383055e-08, + "loss": 22.3876, + "step": 478440 + }, + { + "epoch": 0.9664992707571601, + "grad_norm": 235.6327362060547, + "learning_rate": 5.647938436120437e-08, + "loss": 10.9182, + "step": 478450 + }, + { + "epoch": 0.966519471389844, + "grad_norm": 751.4415893554688, + "learning_rate": 5.642707825269822e-08, + "loss": 22.4554, + "step": 478460 + }, + { + "epoch": 0.9665396720225278, + "grad_norm": 132.41456604003906, + "learning_rate": 5.637479623856745e-08, + "loss": 19.5785, + "step": 478470 + }, + { + "epoch": 0.9665598726552116, + "grad_norm": 219.9902801513672, + "learning_rate": 5.632253831906631e-08, + "loss": 19.1593, + "step": 478480 + }, + { + "epoch": 0.9665800732878954, + "grad_norm": 184.803955078125, + "learning_rate": 5.6270304494449035e-08, + "loss": 20.6811, + "step": 478490 + }, + { + "epoch": 0.9666002739205792, + "grad_norm": 40.44912338256836, + "learning_rate": 5.621809476497098e-08, + "loss": 32.0742, + "step": 478500 + }, + { + "epoch": 0.966620474553263, + "grad_norm": 190.54283142089844, + "learning_rate": 5.616590913088638e-08, + "loss": 16.1784, + "step": 478510 + }, + { + "epoch": 0.9666406751859469, + "grad_norm": 149.3194122314453, + "learning_rate": 5.611374759244892e-08, + "loss": 11.1327, + "step": 478520 + }, + { + "epoch": 0.9666608758186307, + "grad_norm": 491.3622741699219, + "learning_rate": 5.6061610149913957e-08, + "loss": 37.307, + "step": 478530 + }, + { + "epoch": 0.9666810764513144, + "grad_norm": 163.3978729248047, + "learning_rate": 5.6009496803534624e-08, + "loss": 24.0111, + "step": 478540 + }, + { + "epoch": 0.9667012770839982, + "grad_norm": 292.5745544433594, + "learning_rate": 5.595740755356627e-08, + "loss": 16.4687, + "step": 478550 + }, + { + "epoch": 0.966721477716682, + "grad_norm": 4066.260009765625, + "learning_rate": 5.590534240026146e-08, + "loss": 43.7808, + "step": 478560 + }, + { + "epoch": 0.9667416783493659, + "grad_norm": 19.287261962890625, + "learning_rate": 5.58533013438739e-08, + "loss": 18.9646, + "step": 478570 + }, + { + "epoch": 0.9667618789820497, + "grad_norm": 186.73593139648438, + "learning_rate": 5.580128438465837e-08, + "loss": 11.4306, + "step": 478580 + }, + { + "epoch": 0.9667820796147335, + "grad_norm": 394.5108642578125, + "learning_rate": 5.574929152286745e-08, + "loss": 15.8368, + "step": 478590 + }, + { + "epoch": 0.9668022802474173, + "grad_norm": 170.31736755371094, + "learning_rate": 5.569732275875428e-08, + "loss": 18.6165, + "step": 478600 + }, + { + "epoch": 0.9668224808801011, + "grad_norm": 207.41769409179688, + "learning_rate": 5.5645378092573085e-08, + "loss": 35.4603, + "step": 478610 + }, + { + "epoch": 0.966842681512785, + "grad_norm": 209.5960235595703, + "learning_rate": 5.559345752457701e-08, + "loss": 13.7902, + "step": 478620 + }, + { + "epoch": 0.9668628821454688, + "grad_norm": 342.3749084472656, + "learning_rate": 5.554156105501862e-08, + "loss": 37.1281, + "step": 478630 + }, + { + "epoch": 0.9668830827781526, + "grad_norm": 2.9106838703155518, + "learning_rate": 5.54896886841505e-08, + "loss": 21.7941, + "step": 478640 + }, + { + "epoch": 0.9669032834108364, + "grad_norm": 124.96565246582031, + "learning_rate": 5.543784041222633e-08, + "loss": 10.9223, + "step": 478650 + }, + { + "epoch": 0.9669234840435202, + "grad_norm": 252.74948120117188, + "learning_rate": 5.538601623949869e-08, + "loss": 12.1952, + "step": 478660 + }, + { + "epoch": 0.9669436846762041, + "grad_norm": 360.6080627441406, + "learning_rate": 5.533421616621903e-08, + "loss": 22.9077, + "step": 478670 + }, + { + "epoch": 0.9669638853088879, + "grad_norm": 282.87744140625, + "learning_rate": 5.528244019264106e-08, + "loss": 17.5386, + "step": 478680 + }, + { + "epoch": 0.9669840859415717, + "grad_norm": 116.16239929199219, + "learning_rate": 5.5230688319017344e-08, + "loss": 18.8524, + "step": 478690 + }, + { + "epoch": 0.9670042865742555, + "grad_norm": 491.3843078613281, + "learning_rate": 5.517896054559879e-08, + "loss": 21.7241, + "step": 478700 + }, + { + "epoch": 0.9670244872069393, + "grad_norm": 352.613037109375, + "learning_rate": 5.512725687263853e-08, + "loss": 16.5981, + "step": 478710 + }, + { + "epoch": 0.9670446878396232, + "grad_norm": 576.197998046875, + "learning_rate": 5.507557730038859e-08, + "loss": 14.4004, + "step": 478720 + }, + { + "epoch": 0.967064888472307, + "grad_norm": 394.2955322265625, + "learning_rate": 5.5023921829100434e-08, + "loss": 25.8643, + "step": 478730 + }, + { + "epoch": 0.9670850891049908, + "grad_norm": 498.21356201171875, + "learning_rate": 5.497229045902552e-08, + "loss": 32.8693, + "step": 478740 + }, + { + "epoch": 0.9671052897376746, + "grad_norm": 5.832070350646973, + "learning_rate": 5.492068319041588e-08, + "loss": 34.8002, + "step": 478750 + }, + { + "epoch": 0.9671254903703584, + "grad_norm": 367.8793029785156, + "learning_rate": 5.4869100023523526e-08, + "loss": 18.0233, + "step": 478760 + }, + { + "epoch": 0.9671456910030423, + "grad_norm": 101.5944595336914, + "learning_rate": 5.4817540958598814e-08, + "loss": 5.5856, + "step": 478770 + }, + { + "epoch": 0.9671658916357261, + "grad_norm": 380.5604553222656, + "learning_rate": 5.476600599589377e-08, + "loss": 24.7774, + "step": 478780 + }, + { + "epoch": 0.9671860922684098, + "grad_norm": 149.87095642089844, + "learning_rate": 5.471449513565985e-08, + "loss": 21.6804, + "step": 478790 + }, + { + "epoch": 0.9672062929010936, + "grad_norm": 147.40972900390625, + "learning_rate": 5.466300837814797e-08, + "loss": 16.2145, + "step": 478800 + }, + { + "epoch": 0.9672264935337774, + "grad_norm": 272.8534240722656, + "learning_rate": 5.461154572360794e-08, + "loss": 21.2751, + "step": 478810 + }, + { + "epoch": 0.9672466941664613, + "grad_norm": 401.5909729003906, + "learning_rate": 5.456010717229177e-08, + "loss": 22.2067, + "step": 478820 + }, + { + "epoch": 0.9672668947991451, + "grad_norm": 242.17041015625, + "learning_rate": 5.4508692724449806e-08, + "loss": 18.2951, + "step": 478830 + }, + { + "epoch": 0.9672870954318289, + "grad_norm": 166.62091064453125, + "learning_rate": 5.445730238033298e-08, + "loss": 7.8322, + "step": 478840 + }, + { + "epoch": 0.9673072960645127, + "grad_norm": 176.90423583984375, + "learning_rate": 5.440593614019107e-08, + "loss": 12.1154, + "step": 478850 + }, + { + "epoch": 0.9673274966971965, + "grad_norm": 189.25193786621094, + "learning_rate": 5.435459400427501e-08, + "loss": 14.5552, + "step": 478860 + }, + { + "epoch": 0.9673476973298804, + "grad_norm": 443.8551330566406, + "learning_rate": 5.4303275972834577e-08, + "loss": 23.8017, + "step": 478870 + }, + { + "epoch": 0.9673678979625642, + "grad_norm": 216.1768341064453, + "learning_rate": 5.42519820461207e-08, + "loss": 10.8594, + "step": 478880 + }, + { + "epoch": 0.967388098595248, + "grad_norm": 56.928367614746094, + "learning_rate": 5.4200712224382056e-08, + "loss": 10.5989, + "step": 478890 + }, + { + "epoch": 0.9674082992279318, + "grad_norm": 219.41473388671875, + "learning_rate": 5.414946650786957e-08, + "loss": 10.5286, + "step": 478900 + }, + { + "epoch": 0.9674284998606156, + "grad_norm": 177.71142578125, + "learning_rate": 5.409824489683247e-08, + "loss": 29.7072, + "step": 478910 + }, + { + "epoch": 0.9674487004932995, + "grad_norm": 189.0150604248047, + "learning_rate": 5.4047047391521114e-08, + "loss": 22.9577, + "step": 478920 + }, + { + "epoch": 0.9674689011259833, + "grad_norm": 148.5370330810547, + "learning_rate": 5.39958739921842e-08, + "loss": 19.9695, + "step": 478930 + }, + { + "epoch": 0.9674891017586671, + "grad_norm": 355.3103942871094, + "learning_rate": 5.394472469907208e-08, + "loss": 25.7296, + "step": 478940 + }, + { + "epoch": 0.9675093023913509, + "grad_norm": 356.6740417480469, + "learning_rate": 5.389359951243345e-08, + "loss": 9.252, + "step": 478950 + }, + { + "epoch": 0.9675295030240347, + "grad_norm": 148.18887329101562, + "learning_rate": 5.3842498432516986e-08, + "loss": 13.152, + "step": 478960 + }, + { + "epoch": 0.9675497036567186, + "grad_norm": 0.0, + "learning_rate": 5.3791421459571947e-08, + "loss": 11.5646, + "step": 478970 + }, + { + "epoch": 0.9675699042894024, + "grad_norm": 124.81631469726562, + "learning_rate": 5.374036859384868e-08, + "loss": 10.3315, + "step": 478980 + }, + { + "epoch": 0.9675901049220862, + "grad_norm": 374.0554504394531, + "learning_rate": 5.3689339835594215e-08, + "loss": 12.807, + "step": 478990 + }, + { + "epoch": 0.96761030555477, + "grad_norm": 206.81459045410156, + "learning_rate": 5.363833518505834e-08, + "loss": 8.8482, + "step": 479000 + }, + { + "epoch": 0.9676305061874538, + "grad_norm": 415.3966369628906, + "learning_rate": 5.358735464248921e-08, + "loss": 16.134, + "step": 479010 + }, + { + "epoch": 0.9676507068201377, + "grad_norm": 365.6966247558594, + "learning_rate": 5.3536398208135495e-08, + "loss": 21.1034, + "step": 479020 + }, + { + "epoch": 0.9676709074528215, + "grad_norm": 42.270973205566406, + "learning_rate": 5.348546588224535e-08, + "loss": 21.1337, + "step": 479030 + }, + { + "epoch": 0.9676911080855053, + "grad_norm": 120.83961486816406, + "learning_rate": 5.343455766506689e-08, + "loss": 21.1428, + "step": 479040 + }, + { + "epoch": 0.967711308718189, + "grad_norm": 136.2984619140625, + "learning_rate": 5.338367355684881e-08, + "loss": 21.5983, + "step": 479050 + }, + { + "epoch": 0.9677315093508728, + "grad_norm": 217.37240600585938, + "learning_rate": 5.33328135578387e-08, + "loss": 25.2889, + "step": 479060 + }, + { + "epoch": 0.9677517099835566, + "grad_norm": 302.94403076171875, + "learning_rate": 5.3281977668284136e-08, + "loss": 19.4852, + "step": 479070 + }, + { + "epoch": 0.9677719106162405, + "grad_norm": 233.90975952148438, + "learning_rate": 5.323116588843324e-08, + "loss": 15.5937, + "step": 479080 + }, + { + "epoch": 0.9677921112489243, + "grad_norm": 372.6578674316406, + "learning_rate": 5.318037821853417e-08, + "loss": 18.6789, + "step": 479090 + }, + { + "epoch": 0.9678123118816081, + "grad_norm": 291.18841552734375, + "learning_rate": 5.312961465883393e-08, + "loss": 16.7501, + "step": 479100 + }, + { + "epoch": 0.9678325125142919, + "grad_norm": 382.44415283203125, + "learning_rate": 5.307887520957955e-08, + "loss": 10.8481, + "step": 479110 + }, + { + "epoch": 0.9678527131469757, + "grad_norm": 246.8311309814453, + "learning_rate": 5.302815987101917e-08, + "loss": 10.9505, + "step": 479120 + }, + { + "epoch": 0.9678729137796596, + "grad_norm": 475.86279296875, + "learning_rate": 5.2977468643399254e-08, + "loss": 22.5141, + "step": 479130 + }, + { + "epoch": 0.9678931144123434, + "grad_norm": 458.67535400390625, + "learning_rate": 5.292680152696739e-08, + "loss": 19.024, + "step": 479140 + }, + { + "epoch": 0.9679133150450272, + "grad_norm": 102.90484619140625, + "learning_rate": 5.2876158521969476e-08, + "loss": 20.0739, + "step": 479150 + }, + { + "epoch": 0.967933515677711, + "grad_norm": 497.6946105957031, + "learning_rate": 5.282553962865422e-08, + "loss": 11.9773, + "step": 479160 + }, + { + "epoch": 0.9679537163103948, + "grad_norm": 279.33380126953125, + "learning_rate": 5.2774944847266976e-08, + "loss": 16.3153, + "step": 479170 + }, + { + "epoch": 0.9679739169430787, + "grad_norm": 376.793701171875, + "learning_rate": 5.27243741780542e-08, + "loss": 18.424, + "step": 479180 + }, + { + "epoch": 0.9679941175757625, + "grad_norm": 335.0399475097656, + "learning_rate": 5.267382762126294e-08, + "loss": 19.7251, + "step": 479190 + }, + { + "epoch": 0.9680143182084463, + "grad_norm": 216.72479248046875, + "learning_rate": 5.262330517713965e-08, + "loss": 9.3309, + "step": 479200 + }, + { + "epoch": 0.9680345188411301, + "grad_norm": 217.37112426757812, + "learning_rate": 5.2572806845930244e-08, + "loss": 23.5566, + "step": 479210 + }, + { + "epoch": 0.9680547194738139, + "grad_norm": 208.50352478027344, + "learning_rate": 5.252233262788065e-08, + "loss": 14.9861, + "step": 479220 + }, + { + "epoch": 0.9680749201064978, + "grad_norm": 129.575927734375, + "learning_rate": 5.247188252323787e-08, + "loss": 12.8478, + "step": 479230 + }, + { + "epoch": 0.9680951207391816, + "grad_norm": 295.2030029296875, + "learning_rate": 5.242145653224673e-08, + "loss": 13.1718, + "step": 479240 + }, + { + "epoch": 0.9681153213718654, + "grad_norm": 169.93435668945312, + "learning_rate": 5.237105465515258e-08, + "loss": 17.8442, + "step": 479250 + }, + { + "epoch": 0.9681355220045492, + "grad_norm": 218.6941375732422, + "learning_rate": 5.2320676892202996e-08, + "loss": 17.0933, + "step": 479260 + }, + { + "epoch": 0.968155722637233, + "grad_norm": 210.277099609375, + "learning_rate": 5.227032324364167e-08, + "loss": 16.3555, + "step": 479270 + }, + { + "epoch": 0.9681759232699169, + "grad_norm": 180.5042724609375, + "learning_rate": 5.2219993709714535e-08, + "loss": 11.6651, + "step": 479280 + }, + { + "epoch": 0.9681961239026007, + "grad_norm": 233.86984252929688, + "learning_rate": 5.2169688290667485e-08, + "loss": 18.3108, + "step": 479290 + }, + { + "epoch": 0.9682163245352845, + "grad_norm": 415.8732604980469, + "learning_rate": 5.2119406986745336e-08, + "loss": 16.1397, + "step": 479300 + }, + { + "epoch": 0.9682365251679682, + "grad_norm": 219.26419067382812, + "learning_rate": 5.206914979819289e-08, + "loss": 19.1014, + "step": 479310 + }, + { + "epoch": 0.968256725800652, + "grad_norm": 318.5694274902344, + "learning_rate": 5.2018916725254945e-08, + "loss": 31.2966, + "step": 479320 + }, + { + "epoch": 0.9682769264333358, + "grad_norm": 189.90823364257812, + "learning_rate": 5.196870776817742e-08, + "loss": 17.2804, + "step": 479330 + }, + { + "epoch": 0.9682971270660197, + "grad_norm": 182.29135131835938, + "learning_rate": 5.191852292720401e-08, + "loss": 30.5925, + "step": 479340 + }, + { + "epoch": 0.9683173276987035, + "grad_norm": 503.1014099121094, + "learning_rate": 5.186836220257951e-08, + "loss": 19.8871, + "step": 479350 + }, + { + "epoch": 0.9683375283313873, + "grad_norm": 464.2020263671875, + "learning_rate": 5.1818225594548185e-08, + "loss": 12.8837, + "step": 479360 + }, + { + "epoch": 0.9683577289640711, + "grad_norm": 62.10761642456055, + "learning_rate": 5.176811310335539e-08, + "loss": 19.4416, + "step": 479370 + }, + { + "epoch": 0.968377929596755, + "grad_norm": 142.53768920898438, + "learning_rate": 5.17180247292437e-08, + "loss": 10.611, + "step": 479380 + }, + { + "epoch": 0.9683981302294388, + "grad_norm": 214.14083862304688, + "learning_rate": 5.1667960472459034e-08, + "loss": 13.5588, + "step": 479390 + }, + { + "epoch": 0.9684183308621226, + "grad_norm": 101.42478942871094, + "learning_rate": 5.161792033324398e-08, + "loss": 12.2453, + "step": 479400 + }, + { + "epoch": 0.9684385314948064, + "grad_norm": 175.5721435546875, + "learning_rate": 5.1567904311843886e-08, + "loss": 15.277, + "step": 479410 + }, + { + "epoch": 0.9684587321274902, + "grad_norm": 202.87779235839844, + "learning_rate": 5.151791240850079e-08, + "loss": 15.1492, + "step": 479420 + }, + { + "epoch": 0.968478932760174, + "grad_norm": 276.27227783203125, + "learning_rate": 5.14679446234595e-08, + "loss": 41.4851, + "step": 479430 + }, + { + "epoch": 0.9684991333928579, + "grad_norm": 384.14471435546875, + "learning_rate": 5.14180009569637e-08, + "loss": 22.8071, + "step": 479440 + }, + { + "epoch": 0.9685193340255417, + "grad_norm": 278.8869323730469, + "learning_rate": 5.136808140925542e-08, + "loss": 13.2723, + "step": 479450 + }, + { + "epoch": 0.9685395346582255, + "grad_norm": 376.0140686035156, + "learning_rate": 5.131818598057947e-08, + "loss": 17.5005, + "step": 479460 + }, + { + "epoch": 0.9685597352909093, + "grad_norm": 220.8706817626953, + "learning_rate": 5.126831467117843e-08, + "loss": 23.8557, + "step": 479470 + }, + { + "epoch": 0.9685799359235931, + "grad_norm": 190.7847137451172, + "learning_rate": 5.121846748129544e-08, + "loss": 20.3755, + "step": 479480 + }, + { + "epoch": 0.968600136556277, + "grad_norm": 310.7080078125, + "learning_rate": 5.116864441117364e-08, + "loss": 21.222, + "step": 479490 + }, + { + "epoch": 0.9686203371889608, + "grad_norm": 335.2868957519531, + "learning_rate": 5.111884546105506e-08, + "loss": 18.6405, + "step": 479500 + }, + { + "epoch": 0.9686405378216446, + "grad_norm": 477.43853759765625, + "learning_rate": 5.106907063118394e-08, + "loss": 15.0737, + "step": 479510 + }, + { + "epoch": 0.9686607384543284, + "grad_norm": 352.6179504394531, + "learning_rate": 5.10193199218012e-08, + "loss": 23.3457, + "step": 479520 + }, + { + "epoch": 0.9686809390870122, + "grad_norm": 299.53912353515625, + "learning_rate": 5.0969593333149994e-08, + "loss": 15.7751, + "step": 479530 + }, + { + "epoch": 0.9687011397196961, + "grad_norm": 234.4359893798828, + "learning_rate": 5.091989086547289e-08, + "loss": 20.7437, + "step": 479540 + }, + { + "epoch": 0.9687213403523799, + "grad_norm": 18.35843849182129, + "learning_rate": 5.0870212519012477e-08, + "loss": 15.5351, + "step": 479550 + }, + { + "epoch": 0.9687415409850636, + "grad_norm": 217.72280883789062, + "learning_rate": 5.082055829400967e-08, + "loss": 17.0471, + "step": 479560 + }, + { + "epoch": 0.9687617416177474, + "grad_norm": 457.1471252441406, + "learning_rate": 5.077092819070761e-08, + "loss": 9.496, + "step": 479570 + }, + { + "epoch": 0.9687819422504312, + "grad_norm": 162.7769775390625, + "learning_rate": 5.072132220934722e-08, + "loss": 23.2226, + "step": 479580 + }, + { + "epoch": 0.9688021428831151, + "grad_norm": 402.1329345703125, + "learning_rate": 5.067174035017164e-08, + "loss": 11.5721, + "step": 479590 + }, + { + "epoch": 0.9688223435157989, + "grad_norm": 137.4611358642578, + "learning_rate": 5.062218261342122e-08, + "loss": 15.9592, + "step": 479600 + }, + { + "epoch": 0.9688425441484827, + "grad_norm": 252.6926727294922, + "learning_rate": 5.0572648999338e-08, + "loss": 10.2002, + "step": 479610 + }, + { + "epoch": 0.9688627447811665, + "grad_norm": 322.97845458984375, + "learning_rate": 5.052313950816401e-08, + "loss": 13.9055, + "step": 479620 + }, + { + "epoch": 0.9688829454138503, + "grad_norm": 331.7187194824219, + "learning_rate": 5.0473654140139604e-08, + "loss": 22.4178, + "step": 479630 + }, + { + "epoch": 0.9689031460465342, + "grad_norm": 489.8714904785156, + "learning_rate": 5.042419289550571e-08, + "loss": 10.4482, + "step": 479640 + }, + { + "epoch": 0.968923346679218, + "grad_norm": 760.1708984375, + "learning_rate": 5.0374755774504346e-08, + "loss": 22.504, + "step": 479650 + }, + { + "epoch": 0.9689435473119018, + "grad_norm": 482.96337890625, + "learning_rate": 5.032534277737644e-08, + "loss": 15.7896, + "step": 479660 + }, + { + "epoch": 0.9689637479445856, + "grad_norm": 342.7398986816406, + "learning_rate": 5.027595390436235e-08, + "loss": 13.5051, + "step": 479670 + }, + { + "epoch": 0.9689839485772694, + "grad_norm": 422.38238525390625, + "learning_rate": 5.0226589155702445e-08, + "loss": 16.7095, + "step": 479680 + }, + { + "epoch": 0.9690041492099533, + "grad_norm": 458.7433776855469, + "learning_rate": 5.017724853163819e-08, + "loss": 28.2092, + "step": 479690 + }, + { + "epoch": 0.9690243498426371, + "grad_norm": 295.3371887207031, + "learning_rate": 5.012793203240995e-08, + "loss": 13.9451, + "step": 479700 + }, + { + "epoch": 0.9690445504753209, + "grad_norm": 325.6849365234375, + "learning_rate": 5.007863965825754e-08, + "loss": 15.4571, + "step": 479710 + }, + { + "epoch": 0.9690647511080047, + "grad_norm": 232.40927124023438, + "learning_rate": 5.002937140942132e-08, + "loss": 9.5497, + "step": 479720 + }, + { + "epoch": 0.9690849517406885, + "grad_norm": 129.34600830078125, + "learning_rate": 4.998012728614221e-08, + "loss": 8.8086, + "step": 479730 + }, + { + "epoch": 0.9691051523733724, + "grad_norm": 312.6191101074219, + "learning_rate": 4.99309072886589e-08, + "loss": 19.7745, + "step": 479740 + }, + { + "epoch": 0.9691253530060562, + "grad_norm": 6.171020030975342, + "learning_rate": 4.988171141721232e-08, + "loss": 14.1907, + "step": 479750 + }, + { + "epoch": 0.96914555363874, + "grad_norm": 263.8418273925781, + "learning_rate": 4.983253967204171e-08, + "loss": 15.5684, + "step": 479760 + }, + { + "epoch": 0.9691657542714238, + "grad_norm": 302.4642028808594, + "learning_rate": 4.9783392053386894e-08, + "loss": 16.7071, + "step": 479770 + }, + { + "epoch": 0.9691859549041076, + "grad_norm": 372.0815124511719, + "learning_rate": 4.9734268561487665e-08, + "loss": 15.0816, + "step": 479780 + }, + { + "epoch": 0.9692061555367915, + "grad_norm": 524.9464721679688, + "learning_rate": 4.968516919658328e-08, + "loss": 10.3388, + "step": 479790 + }, + { + "epoch": 0.9692263561694753, + "grad_norm": 223.0327606201172, + "learning_rate": 4.9636093958913e-08, + "loss": 19.6309, + "step": 479800 + }, + { + "epoch": 0.9692465568021591, + "grad_norm": 181.9541778564453, + "learning_rate": 4.958704284871552e-08, + "loss": 16.8944, + "step": 479810 + }, + { + "epoch": 0.9692667574348428, + "grad_norm": 213.51571655273438, + "learning_rate": 4.9538015866230636e-08, + "loss": 18.4977, + "step": 479820 + }, + { + "epoch": 0.9692869580675266, + "grad_norm": 245.20925903320312, + "learning_rate": 4.948901301169706e-08, + "loss": 9.1325, + "step": 479830 + }, + { + "epoch": 0.9693071587002104, + "grad_norm": 173.84434509277344, + "learning_rate": 4.944003428535349e-08, + "loss": 17.6009, + "step": 479840 + }, + { + "epoch": 0.9693273593328943, + "grad_norm": 501.1705322265625, + "learning_rate": 4.939107968743917e-08, + "loss": 17.0873, + "step": 479850 + }, + { + "epoch": 0.9693475599655781, + "grad_norm": 115.13642120361328, + "learning_rate": 4.9342149218191694e-08, + "loss": 10.3066, + "step": 479860 + }, + { + "epoch": 0.9693677605982619, + "grad_norm": 156.52928161621094, + "learning_rate": 4.9293242877850866e-08, + "loss": 14.234, + "step": 479870 + }, + { + "epoch": 0.9693879612309457, + "grad_norm": 215.22109985351562, + "learning_rate": 4.9244360666653724e-08, + "loss": 42.7303, + "step": 479880 + }, + { + "epoch": 0.9694081618636295, + "grad_norm": 345.40728759765625, + "learning_rate": 4.9195502584839516e-08, + "loss": 27.9623, + "step": 479890 + }, + { + "epoch": 0.9694283624963134, + "grad_norm": 420.7540283203125, + "learning_rate": 4.914666863264528e-08, + "loss": 13.216, + "step": 479900 + }, + { + "epoch": 0.9694485631289972, + "grad_norm": 490.6384582519531, + "learning_rate": 4.9097858810310815e-08, + "loss": 19.9933, + "step": 479910 + }, + { + "epoch": 0.969468763761681, + "grad_norm": 205.8202667236328, + "learning_rate": 4.9049073118072057e-08, + "loss": 22.8096, + "step": 479920 + }, + { + "epoch": 0.9694889643943648, + "grad_norm": 358.317626953125, + "learning_rate": 4.900031155616769e-08, + "loss": 16.5017, + "step": 479930 + }, + { + "epoch": 0.9695091650270486, + "grad_norm": 63.70272445678711, + "learning_rate": 4.8951574124835865e-08, + "loss": 12.9545, + "step": 479940 + }, + { + "epoch": 0.9695293656597325, + "grad_norm": 191.5247344970703, + "learning_rate": 4.890286082431306e-08, + "loss": 31.304, + "step": 479950 + }, + { + "epoch": 0.9695495662924163, + "grad_norm": 293.09033203125, + "learning_rate": 4.885417165483741e-08, + "loss": 13.7888, + "step": 479960 + }, + { + "epoch": 0.9695697669251001, + "grad_norm": 275.1098937988281, + "learning_rate": 4.880550661664541e-08, + "loss": 21.4353, + "step": 479970 + }, + { + "epoch": 0.9695899675577839, + "grad_norm": 267.2708435058594, + "learning_rate": 4.8756865709976284e-08, + "loss": 15.3204, + "step": 479980 + }, + { + "epoch": 0.9696101681904677, + "grad_norm": 473.5832214355469, + "learning_rate": 4.8708248935064315e-08, + "loss": 23.8947, + "step": 479990 + }, + { + "epoch": 0.9696303688231516, + "grad_norm": 218.5522918701172, + "learning_rate": 4.865965629214819e-08, + "loss": 16.9317, + "step": 480000 + }, + { + "epoch": 0.9696505694558354, + "grad_norm": 79.72660064697266, + "learning_rate": 4.861108778146495e-08, + "loss": 13.4541, + "step": 480010 + }, + { + "epoch": 0.9696707700885192, + "grad_norm": 496.3027648925781, + "learning_rate": 4.856254340325051e-08, + "loss": 17.6127, + "step": 480020 + }, + { + "epoch": 0.969690970721203, + "grad_norm": 453.5667724609375, + "learning_rate": 4.851402315774134e-08, + "loss": 18.3573, + "step": 480030 + }, + { + "epoch": 0.9697111713538868, + "grad_norm": 251.51531982421875, + "learning_rate": 4.846552704517449e-08, + "loss": 18.1281, + "step": 480040 + }, + { + "epoch": 0.9697313719865707, + "grad_norm": 126.87972259521484, + "learning_rate": 4.841705506578587e-08, + "loss": 11.8783, + "step": 480050 + }, + { + "epoch": 0.9697515726192545, + "grad_norm": 150.7003173828125, + "learning_rate": 4.836860721981196e-08, + "loss": 16.3773, + "step": 480060 + }, + { + "epoch": 0.9697717732519382, + "grad_norm": 121.2275161743164, + "learning_rate": 4.8320183507489236e-08, + "loss": 14.113, + "step": 480070 + }, + { + "epoch": 0.969791973884622, + "grad_norm": 378.8006286621094, + "learning_rate": 4.827178392905307e-08, + "loss": 18.2608, + "step": 480080 + }, + { + "epoch": 0.9698121745173058, + "grad_norm": 131.00286865234375, + "learning_rate": 4.822340848473994e-08, + "loss": 24.1414, + "step": 480090 + }, + { + "epoch": 0.9698323751499897, + "grad_norm": 132.82247924804688, + "learning_rate": 4.8175057174785766e-08, + "loss": 13.6934, + "step": 480100 + }, + { + "epoch": 0.9698525757826735, + "grad_norm": 195.75389099121094, + "learning_rate": 4.81267299994248e-08, + "loss": 8.3021, + "step": 480110 + }, + { + "epoch": 0.9698727764153573, + "grad_norm": 514.996826171875, + "learning_rate": 4.807842695889409e-08, + "loss": 21.4127, + "step": 480120 + }, + { + "epoch": 0.9698929770480411, + "grad_norm": 240.45498657226562, + "learning_rate": 4.8030148053428424e-08, + "loss": 16.6076, + "step": 480130 + }, + { + "epoch": 0.9699131776807249, + "grad_norm": 174.4800567626953, + "learning_rate": 4.798189328326319e-08, + "loss": 18.3415, + "step": 480140 + }, + { + "epoch": 0.9699333783134088, + "grad_norm": 132.678955078125, + "learning_rate": 4.793366264863375e-08, + "loss": 15.2421, + "step": 480150 + }, + { + "epoch": 0.9699535789460926, + "grad_norm": 133.9465789794922, + "learning_rate": 4.788545614977491e-08, + "loss": 17.5808, + "step": 480160 + }, + { + "epoch": 0.9699737795787764, + "grad_norm": 667.6593627929688, + "learning_rate": 4.783727378692205e-08, + "loss": 23.5546, + "step": 480170 + }, + { + "epoch": 0.9699939802114602, + "grad_norm": 340.83990478515625, + "learning_rate": 4.778911556030885e-08, + "loss": 15.1011, + "step": 480180 + }, + { + "epoch": 0.970014180844144, + "grad_norm": 253.79537963867188, + "learning_rate": 4.774098147017181e-08, + "loss": 29.0629, + "step": 480190 + }, + { + "epoch": 0.9700343814768279, + "grad_norm": 189.45553588867188, + "learning_rate": 4.769287151674407e-08, + "loss": 30.27, + "step": 480200 + }, + { + "epoch": 0.9700545821095117, + "grad_norm": 0.0, + "learning_rate": 4.764478570026043e-08, + "loss": 15.7602, + "step": 480210 + }, + { + "epoch": 0.9700747827421955, + "grad_norm": 187.9842987060547, + "learning_rate": 4.759672402095572e-08, + "loss": 16.245, + "step": 480220 + }, + { + "epoch": 0.9700949833748793, + "grad_norm": 37.108055114746094, + "learning_rate": 4.754868647906419e-08, + "loss": 19.7174, + "step": 480230 + }, + { + "epoch": 0.9701151840075631, + "grad_norm": 51.12001037597656, + "learning_rate": 4.750067307481954e-08, + "loss": 14.801, + "step": 480240 + }, + { + "epoch": 0.970135384640247, + "grad_norm": 278.0903625488281, + "learning_rate": 4.7452683808456026e-08, + "loss": 15.2198, + "step": 480250 + }, + { + "epoch": 0.9701555852729308, + "grad_norm": 429.378662109375, + "learning_rate": 4.740471868020735e-08, + "loss": 13.9195, + "step": 480260 + }, + { + "epoch": 0.9701757859056146, + "grad_norm": 566.3360595703125, + "learning_rate": 4.735677769030722e-08, + "loss": 33.8871, + "step": 480270 + }, + { + "epoch": 0.9701959865382984, + "grad_norm": 303.39007568359375, + "learning_rate": 4.730886083898989e-08, + "loss": 21.8571, + "step": 480280 + }, + { + "epoch": 0.9702161871709822, + "grad_norm": 248.40341186523438, + "learning_rate": 4.726096812648795e-08, + "loss": 17.8176, + "step": 480290 + }, + { + "epoch": 0.9702363878036661, + "grad_norm": 363.1632995605469, + "learning_rate": 4.7213099553035655e-08, + "loss": 12.4366, + "step": 480300 + }, + { + "epoch": 0.9702565884363499, + "grad_norm": 203.74185180664062, + "learning_rate": 4.716525511886616e-08, + "loss": 29.5093, + "step": 480310 + }, + { + "epoch": 0.9702767890690337, + "grad_norm": 107.11116790771484, + "learning_rate": 4.711743482421205e-08, + "loss": 21.8832, + "step": 480320 + }, + { + "epoch": 0.9702969897017174, + "grad_norm": 164.6458740234375, + "learning_rate": 4.7069638669307026e-08, + "loss": 16.5345, + "step": 480330 + }, + { + "epoch": 0.9703171903344012, + "grad_norm": 377.70587158203125, + "learning_rate": 4.702186665438424e-08, + "loss": 14.0841, + "step": 480340 + }, + { + "epoch": 0.970337390967085, + "grad_norm": 159.20343017578125, + "learning_rate": 4.697411877967573e-08, + "loss": 21.3772, + "step": 480350 + }, + { + "epoch": 0.9703575915997689, + "grad_norm": 12.814987182617188, + "learning_rate": 4.692639504541518e-08, + "loss": 7.7102, + "step": 480360 + }, + { + "epoch": 0.9703777922324527, + "grad_norm": 264.45574951171875, + "learning_rate": 4.68786954518341e-08, + "loss": 11.9716, + "step": 480370 + }, + { + "epoch": 0.9703979928651365, + "grad_norm": 302.3787841796875, + "learning_rate": 4.683101999916562e-08, + "loss": 6.1376, + "step": 480380 + }, + { + "epoch": 0.9704181934978203, + "grad_norm": 133.24697875976562, + "learning_rate": 4.6783368687642325e-08, + "loss": 10.4917, + "step": 480390 + }, + { + "epoch": 0.9704383941305041, + "grad_norm": 347.9091491699219, + "learning_rate": 4.6735741517495715e-08, + "loss": 23.6633, + "step": 480400 + }, + { + "epoch": 0.970458594763188, + "grad_norm": 191.81300354003906, + "learning_rate": 4.668813848895837e-08, + "loss": 11.3855, + "step": 480410 + }, + { + "epoch": 0.9704787953958718, + "grad_norm": 149.15115356445312, + "learning_rate": 4.6640559602262325e-08, + "loss": 13.42, + "step": 480420 + }, + { + "epoch": 0.9704989960285556, + "grad_norm": 151.28768920898438, + "learning_rate": 4.6593004857639627e-08, + "loss": 7.9655, + "step": 480430 + }, + { + "epoch": 0.9705191966612394, + "grad_norm": 55.116512298583984, + "learning_rate": 4.654547425532119e-08, + "loss": 12.9752, + "step": 480440 + }, + { + "epoch": 0.9705393972939232, + "grad_norm": 340.17279052734375, + "learning_rate": 4.649796779554016e-08, + "loss": 15.622, + "step": 480450 + }, + { + "epoch": 0.9705595979266071, + "grad_norm": 204.43618774414062, + "learning_rate": 4.645048547852693e-08, + "loss": 16.4864, + "step": 480460 + }, + { + "epoch": 0.9705797985592909, + "grad_norm": 267.153564453125, + "learning_rate": 4.6403027304513513e-08, + "loss": 16.2877, + "step": 480470 + }, + { + "epoch": 0.9705999991919747, + "grad_norm": 366.7547302246094, + "learning_rate": 4.635559327373029e-08, + "loss": 14.5811, + "step": 480480 + }, + { + "epoch": 0.9706201998246585, + "grad_norm": 135.94874572753906, + "learning_rate": 4.6308183386409855e-08, + "loss": 22.3305, + "step": 480490 + }, + { + "epoch": 0.9706404004573423, + "grad_norm": 62.06936264038086, + "learning_rate": 4.626079764278202e-08, + "loss": 16.6228, + "step": 480500 + }, + { + "epoch": 0.9706606010900262, + "grad_norm": 637.1981201171875, + "learning_rate": 4.621343604307826e-08, + "loss": 34.2254, + "step": 480510 + }, + { + "epoch": 0.97068080172271, + "grad_norm": 411.60650634765625, + "learning_rate": 4.616609858753007e-08, + "loss": 15.6918, + "step": 480520 + }, + { + "epoch": 0.9707010023553938, + "grad_norm": 251.51487731933594, + "learning_rate": 4.6118785276366706e-08, + "loss": 19.9691, + "step": 480530 + }, + { + "epoch": 0.9707212029880776, + "grad_norm": 50.386932373046875, + "learning_rate": 4.6071496109819643e-08, + "loss": 15.63, + "step": 480540 + }, + { + "epoch": 0.9707414036207614, + "grad_norm": 380.03997802734375, + "learning_rate": 4.6024231088119266e-08, + "loss": 16.9728, + "step": 480550 + }, + { + "epoch": 0.9707616042534453, + "grad_norm": 331.0513610839844, + "learning_rate": 4.597699021149649e-08, + "loss": 19.1525, + "step": 480560 + }, + { + "epoch": 0.9707818048861291, + "grad_norm": 81.25759887695312, + "learning_rate": 4.592977348018002e-08, + "loss": 15.16, + "step": 480570 + }, + { + "epoch": 0.9708020055188128, + "grad_norm": 463.4545593261719, + "learning_rate": 4.588258089440134e-08, + "loss": 11.7949, + "step": 480580 + }, + { + "epoch": 0.9708222061514966, + "grad_norm": 289.548828125, + "learning_rate": 4.5835412454390823e-08, + "loss": 17.104, + "step": 480590 + }, + { + "epoch": 0.9708424067841804, + "grad_norm": 256.2091064453125, + "learning_rate": 4.578826816037718e-08, + "loss": 27.8903, + "step": 480600 + }, + { + "epoch": 0.9708626074168643, + "grad_norm": 278.6357727050781, + "learning_rate": 4.574114801259022e-08, + "loss": 13.8457, + "step": 480610 + }, + { + "epoch": 0.9708828080495481, + "grad_norm": 317.54296875, + "learning_rate": 4.569405201126087e-08, + "loss": 31.4359, + "step": 480620 + }, + { + "epoch": 0.9709030086822319, + "grad_norm": 362.89520263671875, + "learning_rate": 4.5646980156617284e-08, + "loss": 15.1028, + "step": 480630 + }, + { + "epoch": 0.9709232093149157, + "grad_norm": 528.4024658203125, + "learning_rate": 4.5599932448889276e-08, + "loss": 23.333, + "step": 480640 + }, + { + "epoch": 0.9709434099475995, + "grad_norm": 327.55322265625, + "learning_rate": 4.5552908888306654e-08, + "loss": 13.7529, + "step": 480650 + }, + { + "epoch": 0.9709636105802834, + "grad_norm": 340.1185607910156, + "learning_rate": 4.5505909475098144e-08, + "loss": 18.9533, + "step": 480660 + }, + { + "epoch": 0.9709838112129672, + "grad_norm": 237.61825561523438, + "learning_rate": 4.545893420949299e-08, + "loss": 16.5598, + "step": 480670 + }, + { + "epoch": 0.971004011845651, + "grad_norm": 207.75701904296875, + "learning_rate": 4.5411983091719905e-08, + "loss": 10.5763, + "step": 480680 + }, + { + "epoch": 0.9710242124783348, + "grad_norm": 124.75130462646484, + "learning_rate": 4.5365056122007586e-08, + "loss": 21.4576, + "step": 480690 + }, + { + "epoch": 0.9710444131110186, + "grad_norm": 91.90564727783203, + "learning_rate": 4.531815330058586e-08, + "loss": 21.2379, + "step": 480700 + }, + { + "epoch": 0.9710646137437025, + "grad_norm": 179.62643432617188, + "learning_rate": 4.527127462768233e-08, + "loss": 13.1461, + "step": 480710 + }, + { + "epoch": 0.9710848143763863, + "grad_norm": 373.83477783203125, + "learning_rate": 4.5224420103525125e-08, + "loss": 19.0828, + "step": 480720 + }, + { + "epoch": 0.9711050150090701, + "grad_norm": 281.1725769042969, + "learning_rate": 4.517758972834352e-08, + "loss": 21.119, + "step": 480730 + }, + { + "epoch": 0.9711252156417539, + "grad_norm": 74.29496765136719, + "learning_rate": 4.5130783502365106e-08, + "loss": 15.3173, + "step": 480740 + }, + { + "epoch": 0.9711454162744377, + "grad_norm": 257.2310485839844, + "learning_rate": 4.508400142581859e-08, + "loss": 16.1936, + "step": 480750 + }, + { + "epoch": 0.9711656169071216, + "grad_norm": 362.8109130859375, + "learning_rate": 4.503724349893157e-08, + "loss": 21.5927, + "step": 480760 + }, + { + "epoch": 0.9711858175398054, + "grad_norm": 440.2646179199219, + "learning_rate": 4.49905097219322e-08, + "loss": 21.8716, + "step": 480770 + }, + { + "epoch": 0.9712060181724892, + "grad_norm": 479.6381530761719, + "learning_rate": 4.4943800095048615e-08, + "loss": 17.2833, + "step": 480780 + }, + { + "epoch": 0.971226218805173, + "grad_norm": 459.6658630371094, + "learning_rate": 4.4897114618506765e-08, + "loss": 19.8731, + "step": 480790 + }, + { + "epoch": 0.9712464194378568, + "grad_norm": 211.4539031982422, + "learning_rate": 4.485045329253646e-08, + "loss": 17.9922, + "step": 480800 + }, + { + "epoch": 0.9712666200705407, + "grad_norm": 464.1227722167969, + "learning_rate": 4.480381611736362e-08, + "loss": 14.6579, + "step": 480810 + }, + { + "epoch": 0.9712868207032245, + "grad_norm": 337.0411376953125, + "learning_rate": 4.4757203093215854e-08, + "loss": 11.4597, + "step": 480820 + }, + { + "epoch": 0.9713070213359083, + "grad_norm": 152.1861572265625, + "learning_rate": 4.4710614220320746e-08, + "loss": 10.3488, + "step": 480830 + }, + { + "epoch": 0.971327221968592, + "grad_norm": 182.34927368164062, + "learning_rate": 4.4664049498904796e-08, + "loss": 14.679, + "step": 480840 + }, + { + "epoch": 0.9713474226012758, + "grad_norm": 255.8133087158203, + "learning_rate": 4.4617508929195585e-08, + "loss": 12.6503, + "step": 480850 + }, + { + "epoch": 0.9713676232339596, + "grad_norm": 177.95912170410156, + "learning_rate": 4.457099251141961e-08, + "loss": 7.0826, + "step": 480860 + }, + { + "epoch": 0.9713878238666435, + "grad_norm": 165.25167846679688, + "learning_rate": 4.4524500245803346e-08, + "loss": 13.9274, + "step": 480870 + }, + { + "epoch": 0.9714080244993273, + "grad_norm": 273.8017883300781, + "learning_rate": 4.4478032132573845e-08, + "loss": 19.7758, + "step": 480880 + }, + { + "epoch": 0.9714282251320111, + "grad_norm": 380.6207580566406, + "learning_rate": 4.443158817195703e-08, + "loss": 35.096, + "step": 480890 + }, + { + "epoch": 0.9714484257646949, + "grad_norm": 8.3333158493042, + "learning_rate": 4.438516836417994e-08, + "loss": 20.3232, + "step": 480900 + }, + { + "epoch": 0.9714686263973787, + "grad_norm": 380.8330383300781, + "learning_rate": 4.4338772709468514e-08, + "loss": 14.3037, + "step": 480910 + }, + { + "epoch": 0.9714888270300626, + "grad_norm": 159.7582550048828, + "learning_rate": 4.429240120804923e-08, + "loss": 31.5011, + "step": 480920 + }, + { + "epoch": 0.9715090276627464, + "grad_norm": 370.4774475097656, + "learning_rate": 4.424605386014691e-08, + "loss": 27.2993, + "step": 480930 + }, + { + "epoch": 0.9715292282954302, + "grad_norm": 386.0980224609375, + "learning_rate": 4.4199730665988594e-08, + "loss": 15.8645, + "step": 480940 + }, + { + "epoch": 0.971549428928114, + "grad_norm": 37.42185974121094, + "learning_rate": 4.415343162580022e-08, + "loss": 9.6183, + "step": 480950 + }, + { + "epoch": 0.9715696295607978, + "grad_norm": 93.18374633789062, + "learning_rate": 4.4107156739806037e-08, + "loss": 14.5817, + "step": 480960 + }, + { + "epoch": 0.9715898301934817, + "grad_norm": 8.321996688842773, + "learning_rate": 4.40609060082331e-08, + "loss": 10.9445, + "step": 480970 + }, + { + "epoch": 0.9716100308261655, + "grad_norm": 190.19419860839844, + "learning_rate": 4.401467943130622e-08, + "loss": 12.5028, + "step": 480980 + }, + { + "epoch": 0.9716302314588493, + "grad_norm": 273.48779296875, + "learning_rate": 4.3968477009250775e-08, + "loss": 20.0074, + "step": 480990 + }, + { + "epoch": 0.9716504320915331, + "grad_norm": 71.3777084350586, + "learning_rate": 4.392229874229159e-08, + "loss": 15.8897, + "step": 481000 + }, + { + "epoch": 0.9716706327242169, + "grad_norm": 194.49368286132812, + "learning_rate": 4.387614463065404e-08, + "loss": 25.8925, + "step": 481010 + }, + { + "epoch": 0.9716908333569008, + "grad_norm": 579.4498291015625, + "learning_rate": 4.383001467456294e-08, + "loss": 22.2444, + "step": 481020 + }, + { + "epoch": 0.9717110339895846, + "grad_norm": 1078.0789794921875, + "learning_rate": 4.378390887424366e-08, + "loss": 22.6205, + "step": 481030 + }, + { + "epoch": 0.9717312346222684, + "grad_norm": 369.1848449707031, + "learning_rate": 4.3737827229919926e-08, + "loss": 14.4202, + "step": 481040 + }, + { + "epoch": 0.9717514352549522, + "grad_norm": 144.8754119873047, + "learning_rate": 4.36917697418171e-08, + "loss": 13.1191, + "step": 481050 + }, + { + "epoch": 0.971771635887636, + "grad_norm": 74.88249206542969, + "learning_rate": 4.364573641016001e-08, + "loss": 15.3583, + "step": 481060 + }, + { + "epoch": 0.9717918365203199, + "grad_norm": 296.56024169921875, + "learning_rate": 4.359972723517236e-08, + "loss": 23.6246, + "step": 481070 + }, + { + "epoch": 0.9718120371530037, + "grad_norm": 265.19952392578125, + "learning_rate": 4.3553742217077866e-08, + "loss": 16.3429, + "step": 481080 + }, + { + "epoch": 0.9718322377856875, + "grad_norm": 111.01641845703125, + "learning_rate": 4.350778135610134e-08, + "loss": 11.5564, + "step": 481090 + }, + { + "epoch": 0.9718524384183712, + "grad_norm": 1.675919771194458, + "learning_rate": 4.346184465246761e-08, + "loss": 16.0173, + "step": 481100 + }, + { + "epoch": 0.971872639051055, + "grad_norm": 253.8666534423828, + "learning_rate": 4.3415932106398715e-08, + "loss": 17.6637, + "step": 481110 + }, + { + "epoch": 0.9718928396837389, + "grad_norm": 267.0229187011719, + "learning_rate": 4.3370043718119484e-08, + "loss": 13.4649, + "step": 481120 + }, + { + "epoch": 0.9719130403164227, + "grad_norm": 304.34234619140625, + "learning_rate": 4.332417948785417e-08, + "loss": 22.6865, + "step": 481130 + }, + { + "epoch": 0.9719332409491065, + "grad_norm": 282.455322265625, + "learning_rate": 4.327833941582538e-08, + "loss": 17.5974, + "step": 481140 + }, + { + "epoch": 0.9719534415817903, + "grad_norm": 153.42041015625, + "learning_rate": 4.3232523502256264e-08, + "loss": 28.9189, + "step": 481150 + }, + { + "epoch": 0.9719736422144741, + "grad_norm": 110.81672668457031, + "learning_rate": 4.318673174737109e-08, + "loss": 8.7016, + "step": 481160 + }, + { + "epoch": 0.971993842847158, + "grad_norm": 320.25115966796875, + "learning_rate": 4.3140964151393015e-08, + "loss": 9.325, + "step": 481170 + }, + { + "epoch": 0.9720140434798418, + "grad_norm": 245.3419189453125, + "learning_rate": 4.3095220714544084e-08, + "loss": 13.8234, + "step": 481180 + }, + { + "epoch": 0.9720342441125256, + "grad_norm": 194.27566528320312, + "learning_rate": 4.304950143704745e-08, + "loss": 12.9082, + "step": 481190 + }, + { + "epoch": 0.9720544447452094, + "grad_norm": 192.88880920410156, + "learning_rate": 4.3003806319127376e-08, + "loss": 13.6647, + "step": 481200 + }, + { + "epoch": 0.9720746453778932, + "grad_norm": 271.20098876953125, + "learning_rate": 4.2958135361004794e-08, + "loss": 15.0171, + "step": 481210 + }, + { + "epoch": 0.972094846010577, + "grad_norm": 308.206298828125, + "learning_rate": 4.291248856290342e-08, + "loss": 23.1732, + "step": 481220 + }, + { + "epoch": 0.9721150466432609, + "grad_norm": 272.736083984375, + "learning_rate": 4.28668659250453e-08, + "loss": 18.8223, + "step": 481230 + }, + { + "epoch": 0.9721352472759447, + "grad_norm": 189.8990020751953, + "learning_rate": 4.282126744765247e-08, + "loss": 12.1868, + "step": 481240 + }, + { + "epoch": 0.9721554479086285, + "grad_norm": 373.6046142578125, + "learning_rate": 4.2775693130948094e-08, + "loss": 32.1939, + "step": 481250 + }, + { + "epoch": 0.9721756485413123, + "grad_norm": 0.28666916489601135, + "learning_rate": 4.2730142975153654e-08, + "loss": 22.4644, + "step": 481260 + }, + { + "epoch": 0.9721958491739962, + "grad_norm": 201.35943603515625, + "learning_rate": 4.26846169804912e-08, + "loss": 6.796, + "step": 481270 + }, + { + "epoch": 0.97221604980668, + "grad_norm": 151.1863555908203, + "learning_rate": 4.263911514718222e-08, + "loss": 22.4178, + "step": 481280 + }, + { + "epoch": 0.9722362504393638, + "grad_norm": 756.0543212890625, + "learning_rate": 4.259363747544931e-08, + "loss": 24.6007, + "step": 481290 + }, + { + "epoch": 0.9722564510720476, + "grad_norm": 299.47601318359375, + "learning_rate": 4.2548183965513415e-08, + "loss": 18.0124, + "step": 481300 + }, + { + "epoch": 0.9722766517047314, + "grad_norm": 165.666015625, + "learning_rate": 4.250275461759712e-08, + "loss": 19.1593, + "step": 481310 + }, + { + "epoch": 0.9722968523374153, + "grad_norm": 210.62261962890625, + "learning_rate": 4.245734943192081e-08, + "loss": 13.7901, + "step": 481320 + }, + { + "epoch": 0.9723170529700991, + "grad_norm": 216.73516845703125, + "learning_rate": 4.241196840870598e-08, + "loss": 14.5709, + "step": 481330 + }, + { + "epoch": 0.9723372536027829, + "grad_norm": 185.23233032226562, + "learning_rate": 4.236661154817412e-08, + "loss": 6.3514, + "step": 481340 + }, + { + "epoch": 0.9723574542354666, + "grad_norm": 309.74249267578125, + "learning_rate": 4.23212788505456e-08, + "loss": 16.0866, + "step": 481350 + }, + { + "epoch": 0.9723776548681504, + "grad_norm": 250.5777130126953, + "learning_rate": 4.227597031604247e-08, + "loss": 12.1945, + "step": 481360 + }, + { + "epoch": 0.9723978555008342, + "grad_norm": 496.88690185546875, + "learning_rate": 4.2230685944884554e-08, + "loss": 31.1498, + "step": 481370 + }, + { + "epoch": 0.9724180561335181, + "grad_norm": 275.16107177734375, + "learning_rate": 4.218542573729334e-08, + "loss": 19.4856, + "step": 481380 + }, + { + "epoch": 0.9724382567662019, + "grad_norm": 202.57310485839844, + "learning_rate": 4.2140189693488654e-08, + "loss": 28.5615, + "step": 481390 + }, + { + "epoch": 0.9724584573988857, + "grad_norm": 298.203369140625, + "learning_rate": 4.209497781369143e-08, + "loss": 6.3217, + "step": 481400 + }, + { + "epoch": 0.9724786580315695, + "grad_norm": 309.79852294921875, + "learning_rate": 4.20497900981226e-08, + "loss": 19.4717, + "step": 481410 + }, + { + "epoch": 0.9724988586642533, + "grad_norm": 44.44679260253906, + "learning_rate": 4.2004626547000885e-08, + "loss": 12.0639, + "step": 481420 + }, + { + "epoch": 0.9725190592969372, + "grad_norm": 304.7922668457031, + "learning_rate": 4.195948716054776e-08, + "loss": 17.1214, + "step": 481430 + }, + { + "epoch": 0.972539259929621, + "grad_norm": 404.0693359375, + "learning_rate": 4.191437193898251e-08, + "loss": 19.0104, + "step": 481440 + }, + { + "epoch": 0.9725594605623048, + "grad_norm": 262.6856689453125, + "learning_rate": 4.1869280882525506e-08, + "loss": 34.2173, + "step": 481450 + }, + { + "epoch": 0.9725796611949886, + "grad_norm": 88.3481674194336, + "learning_rate": 4.1824213991396024e-08, + "loss": 10.9159, + "step": 481460 + }, + { + "epoch": 0.9725998618276724, + "grad_norm": 86.42558288574219, + "learning_rate": 4.1779171265814435e-08, + "loss": 10.9482, + "step": 481470 + }, + { + "epoch": 0.9726200624603563, + "grad_norm": 194.77195739746094, + "learning_rate": 4.173415270599945e-08, + "loss": 21.5173, + "step": 481480 + }, + { + "epoch": 0.9726402630930401, + "grad_norm": 155.04457092285156, + "learning_rate": 4.168915831217091e-08, + "loss": 14.087, + "step": 481490 + }, + { + "epoch": 0.9726604637257239, + "grad_norm": 245.21212768554688, + "learning_rate": 4.164418808454806e-08, + "loss": 13.5881, + "step": 481500 + }, + { + "epoch": 0.9726806643584077, + "grad_norm": 213.82957458496094, + "learning_rate": 4.159924202334964e-08, + "loss": 21.7162, + "step": 481510 + }, + { + "epoch": 0.9727008649910915, + "grad_norm": 65.3963851928711, + "learning_rate": 4.1554320128795455e-08, + "loss": 13.6899, + "step": 481520 + }, + { + "epoch": 0.9727210656237754, + "grad_norm": 262.2989501953125, + "learning_rate": 4.150942240110478e-08, + "loss": 12.5275, + "step": 481530 + }, + { + "epoch": 0.9727412662564592, + "grad_norm": 379.6789245605469, + "learning_rate": 4.146454884049467e-08, + "loss": 22.427, + "step": 481540 + }, + { + "epoch": 0.972761466889143, + "grad_norm": 369.7518615722656, + "learning_rate": 4.1419699447186045e-08, + "loss": 53.0007, + "step": 481550 + }, + { + "epoch": 0.9727816675218268, + "grad_norm": 351.9305114746094, + "learning_rate": 4.137487422139541e-08, + "loss": 24.0868, + "step": 481560 + }, + { + "epoch": 0.9728018681545106, + "grad_norm": 163.4134521484375, + "learning_rate": 4.133007316334259e-08, + "loss": 14.4129, + "step": 481570 + }, + { + "epoch": 0.9728220687871945, + "grad_norm": 90.66841888427734, + "learning_rate": 4.128529627324573e-08, + "loss": 18.7434, + "step": 481580 + }, + { + "epoch": 0.9728422694198783, + "grad_norm": 265.9713134765625, + "learning_rate": 4.124054355132301e-08, + "loss": 12.7847, + "step": 481590 + }, + { + "epoch": 0.9728624700525621, + "grad_norm": 357.07513427734375, + "learning_rate": 4.1195814997792014e-08, + "loss": 11.0891, + "step": 481600 + }, + { + "epoch": 0.9728826706852458, + "grad_norm": 363.5975646972656, + "learning_rate": 4.1151110612872023e-08, + "loss": 18.9386, + "step": 481610 + }, + { + "epoch": 0.9729028713179296, + "grad_norm": 348.9988708496094, + "learning_rate": 4.1106430396778974e-08, + "loss": 29.8349, + "step": 481620 + }, + { + "epoch": 0.9729230719506134, + "grad_norm": 173.11012268066406, + "learning_rate": 4.1061774349732686e-08, + "loss": 11.3092, + "step": 481630 + }, + { + "epoch": 0.9729432725832973, + "grad_norm": 240.25750732421875, + "learning_rate": 4.10171424719491e-08, + "loss": 23.8165, + "step": 481640 + }, + { + "epoch": 0.9729634732159811, + "grad_norm": 191.71826171875, + "learning_rate": 4.097253476364693e-08, + "loss": 34.259, + "step": 481650 + }, + { + "epoch": 0.9729836738486649, + "grad_norm": 164.4589385986328, + "learning_rate": 4.092795122504323e-08, + "loss": 17.5541, + "step": 481660 + }, + { + "epoch": 0.9730038744813487, + "grad_norm": 423.4584045410156, + "learning_rate": 4.088339185635504e-08, + "loss": 13.9292, + "step": 481670 + }, + { + "epoch": 0.9730240751140325, + "grad_norm": 274.4292297363281, + "learning_rate": 4.083885665779996e-08, + "loss": 22.5548, + "step": 481680 + }, + { + "epoch": 0.9730442757467164, + "grad_norm": 42.46436309814453, + "learning_rate": 4.07943456295945e-08, + "loss": 25.2314, + "step": 481690 + }, + { + "epoch": 0.9730644763794002, + "grad_norm": 12.420757293701172, + "learning_rate": 4.0749858771956253e-08, + "loss": 11.2472, + "step": 481700 + }, + { + "epoch": 0.973084677012084, + "grad_norm": 459.44866943359375, + "learning_rate": 4.070539608510171e-08, + "loss": 26.4822, + "step": 481710 + }, + { + "epoch": 0.9731048776447678, + "grad_norm": 560.3666381835938, + "learning_rate": 4.066095756924682e-08, + "loss": 19.9428, + "step": 481720 + }, + { + "epoch": 0.9731250782774516, + "grad_norm": 41.60747528076172, + "learning_rate": 4.061654322460973e-08, + "loss": 19.0391, + "step": 481730 + }, + { + "epoch": 0.9731452789101355, + "grad_norm": 78.79512786865234, + "learning_rate": 4.0572153051406383e-08, + "loss": 15.3612, + "step": 481740 + }, + { + "epoch": 0.9731654795428193, + "grad_norm": 173.41709899902344, + "learning_rate": 4.052778704985216e-08, + "loss": 27.3176, + "step": 481750 + }, + { + "epoch": 0.9731856801755031, + "grad_norm": 292.3283996582031, + "learning_rate": 4.048344522016356e-08, + "loss": 22.1398, + "step": 481760 + }, + { + "epoch": 0.9732058808081869, + "grad_norm": 491.2066650390625, + "learning_rate": 4.043912756255819e-08, + "loss": 19.1171, + "step": 481770 + }, + { + "epoch": 0.9732260814408707, + "grad_norm": 164.1918487548828, + "learning_rate": 4.039483407725031e-08, + "loss": 16.1173, + "step": 481780 + }, + { + "epoch": 0.9732462820735546, + "grad_norm": 205.3018035888672, + "learning_rate": 4.035056476445698e-08, + "loss": 25.6681, + "step": 481790 + }, + { + "epoch": 0.9732664827062384, + "grad_norm": 479.06378173828125, + "learning_rate": 4.030631962439302e-08, + "loss": 13.227, + "step": 481800 + }, + { + "epoch": 0.9732866833389222, + "grad_norm": 158.11305236816406, + "learning_rate": 4.026209865727493e-08, + "loss": 15.4445, + "step": 481810 + }, + { + "epoch": 0.973306883971606, + "grad_norm": 41.378910064697266, + "learning_rate": 4.0217901863317534e-08, + "loss": 12.1698, + "step": 481820 + }, + { + "epoch": 0.9733270846042898, + "grad_norm": 473.0097961425781, + "learning_rate": 4.017372924273621e-08, + "loss": 24.0611, + "step": 481830 + }, + { + "epoch": 0.9733472852369737, + "grad_norm": 71.91669464111328, + "learning_rate": 4.012958079574747e-08, + "loss": 17.8506, + "step": 481840 + }, + { + "epoch": 0.9733674858696575, + "grad_norm": 120.09349060058594, + "learning_rate": 4.008545652256502e-08, + "loss": 15.7919, + "step": 481850 + }, + { + "epoch": 0.9733876865023412, + "grad_norm": 81.92552947998047, + "learning_rate": 4.004135642340423e-08, + "loss": 15.3709, + "step": 481860 + }, + { + "epoch": 0.973407887135025, + "grad_norm": 131.71946716308594, + "learning_rate": 3.999728049848106e-08, + "loss": 12.4301, + "step": 481870 + }, + { + "epoch": 0.9734280877677088, + "grad_norm": 386.7486267089844, + "learning_rate": 3.995322874800922e-08, + "loss": 25.5857, + "step": 481880 + }, + { + "epoch": 0.9734482884003927, + "grad_norm": 182.57400512695312, + "learning_rate": 3.9909201172203537e-08, + "loss": 16.8112, + "step": 481890 + }, + { + "epoch": 0.9734684890330765, + "grad_norm": 60.308738708496094, + "learning_rate": 3.986519777127884e-08, + "loss": 9.5926, + "step": 481900 + }, + { + "epoch": 0.9734886896657603, + "grad_norm": 39.768798828125, + "learning_rate": 3.9821218545449956e-08, + "loss": 9.1046, + "step": 481910 + }, + { + "epoch": 0.9735088902984441, + "grad_norm": 139.36248779296875, + "learning_rate": 3.977726349493061e-08, + "loss": 17.739, + "step": 481920 + }, + { + "epoch": 0.9735290909311279, + "grad_norm": 206.23165893554688, + "learning_rate": 3.973333261993506e-08, + "loss": 30.9089, + "step": 481930 + }, + { + "epoch": 0.9735492915638118, + "grad_norm": 3.162886381149292, + "learning_rate": 3.9689425920678146e-08, + "loss": 31.4828, + "step": 481940 + }, + { + "epoch": 0.9735694921964956, + "grad_norm": 146.00070190429688, + "learning_rate": 3.964554339737303e-08, + "loss": 43.0, + "step": 481950 + }, + { + "epoch": 0.9735896928291794, + "grad_norm": 696.09912109375, + "learning_rate": 3.960168505023343e-08, + "loss": 30.9772, + "step": 481960 + }, + { + "epoch": 0.9736098934618632, + "grad_norm": 657.7053833007812, + "learning_rate": 3.955785087947473e-08, + "loss": 16.9947, + "step": 481970 + }, + { + "epoch": 0.973630094094547, + "grad_norm": 394.7059020996094, + "learning_rate": 3.951404088530841e-08, + "loss": 20.1505, + "step": 481980 + }, + { + "epoch": 0.9736502947272309, + "grad_norm": 224.27499389648438, + "learning_rate": 3.947025506794933e-08, + "loss": 24.3254, + "step": 481990 + }, + { + "epoch": 0.9736704953599147, + "grad_norm": 219.9850311279297, + "learning_rate": 3.9426493427611177e-08, + "loss": 8.4789, + "step": 482000 + }, + { + "epoch": 0.9736906959925985, + "grad_norm": 308.3173828125, + "learning_rate": 3.938275596450603e-08, + "loss": 19.5833, + "step": 482010 + }, + { + "epoch": 0.9737108966252823, + "grad_norm": 520.0945434570312, + "learning_rate": 3.933904267884758e-08, + "loss": 14.8363, + "step": 482020 + }, + { + "epoch": 0.9737310972579661, + "grad_norm": 293.06292724609375, + "learning_rate": 3.929535357084957e-08, + "loss": 29.0102, + "step": 482030 + }, + { + "epoch": 0.97375129789065, + "grad_norm": 195.4599151611328, + "learning_rate": 3.925168864072348e-08, + "loss": 12.4343, + "step": 482040 + }, + { + "epoch": 0.9737714985233338, + "grad_norm": 312.1951599121094, + "learning_rate": 3.9208047888683597e-08, + "loss": 7.9112, + "step": 482050 + }, + { + "epoch": 0.9737916991560176, + "grad_norm": 153.21640014648438, + "learning_rate": 3.9164431314941965e-08, + "loss": 18.9792, + "step": 482060 + }, + { + "epoch": 0.9738118997887014, + "grad_norm": 307.2727355957031, + "learning_rate": 3.912083891971119e-08, + "loss": 13.3604, + "step": 482070 + }, + { + "epoch": 0.9738321004213852, + "grad_norm": 439.34478759765625, + "learning_rate": 3.907727070320389e-08, + "loss": 16.1297, + "step": 482080 + }, + { + "epoch": 0.9738523010540691, + "grad_norm": 197.19056701660156, + "learning_rate": 3.9033726665632096e-08, + "loss": 10.431, + "step": 482090 + }, + { + "epoch": 0.9738725016867529, + "grad_norm": 474.0113830566406, + "learning_rate": 3.899020680720844e-08, + "loss": 18.9224, + "step": 482100 + }, + { + "epoch": 0.9738927023194367, + "grad_norm": 226.4248504638672, + "learning_rate": 3.894671112814441e-08, + "loss": 12.1161, + "step": 482110 + }, + { + "epoch": 0.9739129029521204, + "grad_norm": 391.0127868652344, + "learning_rate": 3.8903239628652615e-08, + "loss": 12.1335, + "step": 482120 + }, + { + "epoch": 0.9739331035848042, + "grad_norm": 180.11514282226562, + "learning_rate": 3.88597923089451e-08, + "loss": 18.9657, + "step": 482130 + }, + { + "epoch": 0.973953304217488, + "grad_norm": 699.8246459960938, + "learning_rate": 3.881636916923281e-08, + "loss": 19.0863, + "step": 482140 + }, + { + "epoch": 0.9739735048501719, + "grad_norm": 391.5164794921875, + "learning_rate": 3.877297020972781e-08, + "loss": 24.0634, + "step": 482150 + }, + { + "epoch": 0.9739937054828557, + "grad_norm": 138.33474731445312, + "learning_rate": 3.8729595430641586e-08, + "loss": 17.5885, + "step": 482160 + }, + { + "epoch": 0.9740139061155395, + "grad_norm": 428.3031921386719, + "learning_rate": 3.868624483218619e-08, + "loss": 25.3389, + "step": 482170 + }, + { + "epoch": 0.9740341067482233, + "grad_norm": 43.16802978515625, + "learning_rate": 3.864291841457146e-08, + "loss": 21.3216, + "step": 482180 + }, + { + "epoch": 0.9740543073809071, + "grad_norm": 304.3694152832031, + "learning_rate": 3.859961617801e-08, + "loss": 16.826, + "step": 482190 + }, + { + "epoch": 0.974074508013591, + "grad_norm": 891.7839965820312, + "learning_rate": 3.855633812271165e-08, + "loss": 22.545, + "step": 482200 + }, + { + "epoch": 0.9740947086462748, + "grad_norm": 354.1850891113281, + "learning_rate": 3.8513084248888445e-08, + "loss": 19.9415, + "step": 482210 + }, + { + "epoch": 0.9741149092789586, + "grad_norm": 306.1986389160156, + "learning_rate": 3.8469854556750785e-08, + "loss": 15.7159, + "step": 482220 + }, + { + "epoch": 0.9741351099116424, + "grad_norm": 362.8030090332031, + "learning_rate": 3.842664904650906e-08, + "loss": 17.7414, + "step": 482230 + }, + { + "epoch": 0.9741553105443262, + "grad_norm": 280.6147155761719, + "learning_rate": 3.83834677183742e-08, + "loss": 28.3863, + "step": 482240 + }, + { + "epoch": 0.9741755111770101, + "grad_norm": 64.55226135253906, + "learning_rate": 3.83403105725566e-08, + "loss": 11.1159, + "step": 482250 + }, + { + "epoch": 0.9741957118096939, + "grad_norm": 270.72265625, + "learning_rate": 3.82971776092661e-08, + "loss": 16.8554, + "step": 482260 + }, + { + "epoch": 0.9742159124423777, + "grad_norm": 394.1881103515625, + "learning_rate": 3.825406882871363e-08, + "loss": 12.6277, + "step": 482270 + }, + { + "epoch": 0.9742361130750615, + "grad_norm": 564.988525390625, + "learning_rate": 3.8210984231109583e-08, + "loss": 16.2614, + "step": 482280 + }, + { + "epoch": 0.9742563137077453, + "grad_norm": 417.332275390625, + "learning_rate": 3.816792381666268e-08, + "loss": 11.6937, + "step": 482290 + }, + { + "epoch": 0.9742765143404292, + "grad_norm": 386.9827880859375, + "learning_rate": 3.812488758558386e-08, + "loss": 38.7971, + "step": 482300 + }, + { + "epoch": 0.974296714973113, + "grad_norm": 383.1602478027344, + "learning_rate": 3.8081875538082404e-08, + "loss": 18.8922, + "step": 482310 + }, + { + "epoch": 0.9743169156057968, + "grad_norm": 426.4156188964844, + "learning_rate": 3.8038887674368697e-08, + "loss": 17.9166, + "step": 482320 + }, + { + "epoch": 0.9743371162384806, + "grad_norm": 380.5213928222656, + "learning_rate": 3.799592399465091e-08, + "loss": 16.2061, + "step": 482330 + }, + { + "epoch": 0.9743573168711644, + "grad_norm": 279.58721923828125, + "learning_rate": 3.7952984499138864e-08, + "loss": 20.234, + "step": 482340 + }, + { + "epoch": 0.9743775175038483, + "grad_norm": 25.954132080078125, + "learning_rate": 3.791006918804296e-08, + "loss": 16.3434, + "step": 482350 + }, + { + "epoch": 0.9743977181365321, + "grad_norm": 141.3274383544922, + "learning_rate": 3.786717806157136e-08, + "loss": 17.5097, + "step": 482360 + }, + { + "epoch": 0.9744179187692159, + "grad_norm": 140.3556671142578, + "learning_rate": 3.782431111993279e-08, + "loss": 25.5609, + "step": 482370 + }, + { + "epoch": 0.9744381194018996, + "grad_norm": 329.6993408203125, + "learning_rate": 3.778146836333707e-08, + "loss": 18.3772, + "step": 482380 + }, + { + "epoch": 0.9744583200345834, + "grad_norm": 277.1619567871094, + "learning_rate": 3.7738649791992934e-08, + "loss": 16.9295, + "step": 482390 + }, + { + "epoch": 0.9744785206672673, + "grad_norm": 514.8152465820312, + "learning_rate": 3.769585540610799e-08, + "loss": 24.5608, + "step": 482400 + }, + { + "epoch": 0.9744987212999511, + "grad_norm": 675.7714233398438, + "learning_rate": 3.765308520589206e-08, + "loss": 59.0566, + "step": 482410 + }, + { + "epoch": 0.9745189219326349, + "grad_norm": 189.63316345214844, + "learning_rate": 3.761033919155333e-08, + "loss": 17.8879, + "step": 482420 + }, + { + "epoch": 0.9745391225653187, + "grad_norm": 252.22926330566406, + "learning_rate": 3.7567617363299945e-08, + "loss": 18.0837, + "step": 482430 + }, + { + "epoch": 0.9745593231980025, + "grad_norm": 249.82164001464844, + "learning_rate": 3.7524919721339535e-08, + "loss": 15.6312, + "step": 482440 + }, + { + "epoch": 0.9745795238306864, + "grad_norm": 305.08734130859375, + "learning_rate": 3.748224626588137e-08, + "loss": 24.5202, + "step": 482450 + }, + { + "epoch": 0.9745997244633702, + "grad_norm": 464.5709533691406, + "learning_rate": 3.743959699713251e-08, + "loss": 42.0495, + "step": 482460 + }, + { + "epoch": 0.974619925096054, + "grad_norm": 447.7132568359375, + "learning_rate": 3.739697191530112e-08, + "loss": 20.2405, + "step": 482470 + }, + { + "epoch": 0.9746401257287378, + "grad_norm": 107.67266845703125, + "learning_rate": 3.735437102059536e-08, + "loss": 14.533, + "step": 482480 + }, + { + "epoch": 0.9746603263614216, + "grad_norm": 549.6131591796875, + "learning_rate": 3.731179431322285e-08, + "loss": 22.1725, + "step": 482490 + }, + { + "epoch": 0.9746805269941055, + "grad_norm": 124.27351379394531, + "learning_rate": 3.726924179339009e-08, + "loss": 15.0744, + "step": 482500 + }, + { + "epoch": 0.9747007276267893, + "grad_norm": 82.30006408691406, + "learning_rate": 3.7226713461305245e-08, + "loss": 24.8883, + "step": 482510 + }, + { + "epoch": 0.9747209282594731, + "grad_norm": 521.1300048828125, + "learning_rate": 3.7184209317175366e-08, + "loss": 35.9421, + "step": 482520 + }, + { + "epoch": 0.9747411288921569, + "grad_norm": 336.57330322265625, + "learning_rate": 3.714172936120808e-08, + "loss": 20.7084, + "step": 482530 + }, + { + "epoch": 0.9747613295248407, + "grad_norm": 152.64459228515625, + "learning_rate": 3.7099273593609316e-08, + "loss": 13.1511, + "step": 482540 + }, + { + "epoch": 0.9747815301575246, + "grad_norm": 277.96343994140625, + "learning_rate": 3.7056842014587815e-08, + "loss": 22.9116, + "step": 482550 + }, + { + "epoch": 0.9748017307902084, + "grad_norm": 312.4891357421875, + "learning_rate": 3.701443462434895e-08, + "loss": 9.7763, + "step": 482560 + }, + { + "epoch": 0.9748219314228922, + "grad_norm": 162.68878173828125, + "learning_rate": 3.697205142309923e-08, + "loss": 20.6659, + "step": 482570 + }, + { + "epoch": 0.974842132055576, + "grad_norm": 412.9062805175781, + "learning_rate": 3.692969241104683e-08, + "loss": 14.0268, + "step": 482580 + }, + { + "epoch": 0.9748623326882598, + "grad_norm": 354.30987548828125, + "learning_rate": 3.688735758839601e-08, + "loss": 8.6916, + "step": 482590 + }, + { + "epoch": 0.9748825333209437, + "grad_norm": 423.56805419921875, + "learning_rate": 3.684504695535496e-08, + "loss": 20.0358, + "step": 482600 + }, + { + "epoch": 0.9749027339536275, + "grad_norm": 550.2533569335938, + "learning_rate": 3.680276051212961e-08, + "loss": 20.1011, + "step": 482610 + }, + { + "epoch": 0.9749229345863113, + "grad_norm": 139.30455017089844, + "learning_rate": 3.67604982589248e-08, + "loss": 16.5859, + "step": 482620 + }, + { + "epoch": 0.974943135218995, + "grad_norm": 63.412986755371094, + "learning_rate": 3.6718260195947594e-08, + "loss": 8.6873, + "step": 482630 + }, + { + "epoch": 0.9749633358516788, + "grad_norm": 589.2445068359375, + "learning_rate": 3.6676046323403934e-08, + "loss": 24.3236, + "step": 482640 + }, + { + "epoch": 0.9749835364843626, + "grad_norm": 531.0650634765625, + "learning_rate": 3.663385664149866e-08, + "loss": 22.7496, + "step": 482650 + }, + { + "epoch": 0.9750037371170465, + "grad_norm": 83.04975891113281, + "learning_rate": 3.659169115043826e-08, + "loss": 16.2014, + "step": 482660 + }, + { + "epoch": 0.9750239377497303, + "grad_norm": 270.0564270019531, + "learning_rate": 3.654954985042869e-08, + "loss": 17.2266, + "step": 482670 + }, + { + "epoch": 0.9750441383824141, + "grad_norm": 254.4864501953125, + "learning_rate": 3.650743274167368e-08, + "loss": 11.1291, + "step": 482680 + }, + { + "epoch": 0.9750643390150979, + "grad_norm": 194.707275390625, + "learning_rate": 3.6465339824379165e-08, + "loss": 17.9651, + "step": 482690 + }, + { + "epoch": 0.9750845396477817, + "grad_norm": 518.6030883789062, + "learning_rate": 3.642327109875166e-08, + "loss": 25.6451, + "step": 482700 + }, + { + "epoch": 0.9751047402804656, + "grad_norm": 322.35125732421875, + "learning_rate": 3.638122656499432e-08, + "loss": 19.7594, + "step": 482710 + }, + { + "epoch": 0.9751249409131494, + "grad_norm": 366.0865783691406, + "learning_rate": 3.633920622331311e-08, + "loss": 22.1807, + "step": 482720 + }, + { + "epoch": 0.9751451415458332, + "grad_norm": 35.25457763671875, + "learning_rate": 3.629721007391229e-08, + "loss": 25.9606, + "step": 482730 + }, + { + "epoch": 0.975165342178517, + "grad_norm": 201.05673217773438, + "learning_rate": 3.625523811699727e-08, + "loss": 14.9553, + "step": 482740 + }, + { + "epoch": 0.9751855428112008, + "grad_norm": 395.165283203125, + "learning_rate": 3.621329035277232e-08, + "loss": 12.1448, + "step": 482750 + }, + { + "epoch": 0.9752057434438847, + "grad_norm": 15.634329795837402, + "learning_rate": 3.617136678144173e-08, + "loss": 14.2171, + "step": 482760 + }, + { + "epoch": 0.9752259440765685, + "grad_norm": 799.6106567382812, + "learning_rate": 3.612946740320977e-08, + "loss": 23.4363, + "step": 482770 + }, + { + "epoch": 0.9752461447092523, + "grad_norm": 428.73382568359375, + "learning_rate": 3.608759221828073e-08, + "loss": 19.0898, + "step": 482780 + }, + { + "epoch": 0.9752663453419361, + "grad_norm": 378.638671875, + "learning_rate": 3.604574122685833e-08, + "loss": 21.3059, + "step": 482790 + }, + { + "epoch": 0.97528654597462, + "grad_norm": 383.9864807128906, + "learning_rate": 3.600391442914741e-08, + "loss": 17.5286, + "step": 482800 + }, + { + "epoch": 0.9753067466073038, + "grad_norm": 409.30499267578125, + "learning_rate": 3.5962111825350585e-08, + "loss": 20.0298, + "step": 482810 + }, + { + "epoch": 0.9753269472399876, + "grad_norm": 84.33053588867188, + "learning_rate": 3.592033341567325e-08, + "loss": 12.0383, + "step": 482820 + }, + { + "epoch": 0.9753471478726714, + "grad_norm": 243.53125, + "learning_rate": 3.5878579200318006e-08, + "loss": 26.7664, + "step": 482830 + }, + { + "epoch": 0.9753673485053552, + "grad_norm": 306.3808898925781, + "learning_rate": 3.583684917948804e-08, + "loss": 19.9252, + "step": 482840 + }, + { + "epoch": 0.975387549138039, + "grad_norm": 224.66900634765625, + "learning_rate": 3.579514335338763e-08, + "loss": 19.7794, + "step": 482850 + }, + { + "epoch": 0.9754077497707229, + "grad_norm": 243.73361206054688, + "learning_rate": 3.575346172221939e-08, + "loss": 16.3464, + "step": 482860 + }, + { + "epoch": 0.9754279504034067, + "grad_norm": 265.2950439453125, + "learning_rate": 3.5711804286187035e-08, + "loss": 7.8652, + "step": 482870 + }, + { + "epoch": 0.9754481510360905, + "grad_norm": 117.79085540771484, + "learning_rate": 3.5670171045492643e-08, + "loss": 9.7502, + "step": 482880 + }, + { + "epoch": 0.9754683516687742, + "grad_norm": 1.972617506980896, + "learning_rate": 3.5628562000339925e-08, + "loss": 12.1673, + "step": 482890 + }, + { + "epoch": 0.975488552301458, + "grad_norm": 28.19820213317871, + "learning_rate": 3.558697715093207e-08, + "loss": 20.6444, + "step": 482900 + }, + { + "epoch": 0.9755087529341419, + "grad_norm": 151.6971435546875, + "learning_rate": 3.554541649747056e-08, + "loss": 20.8036, + "step": 482910 + }, + { + "epoch": 0.9755289535668257, + "grad_norm": 152.75494384765625, + "learning_rate": 3.5503880040158586e-08, + "loss": 19.0316, + "step": 482920 + }, + { + "epoch": 0.9755491541995095, + "grad_norm": 201.63723754882812, + "learning_rate": 3.546236777919876e-08, + "loss": 9.6726, + "step": 482930 + }, + { + "epoch": 0.9755693548321933, + "grad_norm": 138.88731384277344, + "learning_rate": 3.542087971479313e-08, + "loss": 11.0982, + "step": 482940 + }, + { + "epoch": 0.9755895554648771, + "grad_norm": 332.2503662109375, + "learning_rate": 3.5379415847143775e-08, + "loss": 16.3735, + "step": 482950 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 223.29818725585938, + "learning_rate": 3.5337976176453845e-08, + "loss": 14.105, + "step": 482960 + }, + { + "epoch": 0.9756299567302448, + "grad_norm": 162.20608520507812, + "learning_rate": 3.529656070292375e-08, + "loss": 16.5648, + "step": 482970 + }, + { + "epoch": 0.9756501573629286, + "grad_norm": 467.5368347167969, + "learning_rate": 3.525516942675611e-08, + "loss": 24.0874, + "step": 482980 + }, + { + "epoch": 0.9756703579956124, + "grad_norm": 158.80160522460938, + "learning_rate": 3.521380234815297e-08, + "loss": 28.2517, + "step": 482990 + }, + { + "epoch": 0.9756905586282962, + "grad_norm": 111.56636047363281, + "learning_rate": 3.517245946731529e-08, + "loss": 33.4182, + "step": 483000 + }, + { + "epoch": 0.97571075926098, + "grad_norm": 202.2498321533203, + "learning_rate": 3.513114078444513e-08, + "loss": 6.0337, + "step": 483010 + }, + { + "epoch": 0.9757309598936639, + "grad_norm": 439.6343688964844, + "learning_rate": 3.508984629974288e-08, + "loss": 22.1437, + "step": 483020 + }, + { + "epoch": 0.9757511605263477, + "grad_norm": 589.834228515625, + "learning_rate": 3.504857601341172e-08, + "loss": 14.595, + "step": 483030 + }, + { + "epoch": 0.9757713611590315, + "grad_norm": 25.5278377532959, + "learning_rate": 3.5007329925650925e-08, + "loss": 45.8825, + "step": 483040 + }, + { + "epoch": 0.9757915617917153, + "grad_norm": 383.9505920410156, + "learning_rate": 3.4966108036662006e-08, + "loss": 10.8613, + "step": 483050 + }, + { + "epoch": 0.9758117624243992, + "grad_norm": 198.6356201171875, + "learning_rate": 3.4924910346647024e-08, + "loss": 16.7055, + "step": 483060 + }, + { + "epoch": 0.975831963057083, + "grad_norm": 397.1332092285156, + "learning_rate": 3.488373685580526e-08, + "loss": 17.8031, + "step": 483070 + }, + { + "epoch": 0.9758521636897668, + "grad_norm": 426.4501647949219, + "learning_rate": 3.4842587564337674e-08, + "loss": 9.4932, + "step": 483080 + }, + { + "epoch": 0.9758723643224506, + "grad_norm": 208.44989013671875, + "learning_rate": 3.48014624724452e-08, + "loss": 9.9579, + "step": 483090 + }, + { + "epoch": 0.9758925649551344, + "grad_norm": 3.690246343612671, + "learning_rate": 3.47603615803288e-08, + "loss": 18.2543, + "step": 483100 + }, + { + "epoch": 0.9759127655878183, + "grad_norm": 264.77227783203125, + "learning_rate": 3.471928488818776e-08, + "loss": 9.9958, + "step": 483110 + }, + { + "epoch": 0.9759329662205021, + "grad_norm": 3.3299927711486816, + "learning_rate": 3.467823239622248e-08, + "loss": 17.3776, + "step": 483120 + }, + { + "epoch": 0.9759531668531859, + "grad_norm": 327.7098388671875, + "learning_rate": 3.463720410463334e-08, + "loss": 24.4381, + "step": 483130 + }, + { + "epoch": 0.9759733674858696, + "grad_norm": 290.3511962890625, + "learning_rate": 3.459620001362074e-08, + "loss": 15.9041, + "step": 483140 + }, + { + "epoch": 0.9759935681185534, + "grad_norm": 330.9452819824219, + "learning_rate": 3.4555220123383416e-08, + "loss": 12.4754, + "step": 483150 + }, + { + "epoch": 0.9760137687512372, + "grad_norm": 116.95169067382812, + "learning_rate": 3.451426443412231e-08, + "loss": 7.8998, + "step": 483160 + }, + { + "epoch": 0.9760339693839211, + "grad_norm": 242.63372802734375, + "learning_rate": 3.4473332946036164e-08, + "loss": 20.3625, + "step": 483170 + }, + { + "epoch": 0.9760541700166049, + "grad_norm": 418.365966796875, + "learning_rate": 3.443242565932481e-08, + "loss": 18.6768, + "step": 483180 + }, + { + "epoch": 0.9760743706492887, + "grad_norm": 186.45838928222656, + "learning_rate": 3.439154257418753e-08, + "loss": 14.3481, + "step": 483190 + }, + { + "epoch": 0.9760945712819725, + "grad_norm": 366.9158935546875, + "learning_rate": 3.435068369082306e-08, + "loss": 9.6534, + "step": 483200 + }, + { + "epoch": 0.9761147719146563, + "grad_norm": 214.03306579589844, + "learning_rate": 3.4309849009431794e-08, + "loss": 21.3093, + "step": 483210 + }, + { + "epoch": 0.9761349725473402, + "grad_norm": 165.76834106445312, + "learning_rate": 3.4269038530211906e-08, + "loss": 14.8489, + "step": 483220 + }, + { + "epoch": 0.976155173180024, + "grad_norm": 163.30471801757812, + "learning_rate": 3.4228252253362683e-08, + "loss": 12.1658, + "step": 483230 + }, + { + "epoch": 0.9761753738127078, + "grad_norm": 0.0, + "learning_rate": 3.41874901790823e-08, + "loss": 20.5138, + "step": 483240 + }, + { + "epoch": 0.9761955744453916, + "grad_norm": 317.5463562011719, + "learning_rate": 3.414675230757003e-08, + "loss": 12.4586, + "step": 483250 + }, + { + "epoch": 0.9762157750780754, + "grad_norm": 253.27920532226562, + "learning_rate": 3.410603863902406e-08, + "loss": 19.113, + "step": 483260 + }, + { + "epoch": 0.9762359757107593, + "grad_norm": 36.075157165527344, + "learning_rate": 3.406534917364257e-08, + "loss": 12.2031, + "step": 483270 + }, + { + "epoch": 0.9762561763434431, + "grad_norm": 303.51995849609375, + "learning_rate": 3.402468391162539e-08, + "loss": 16.7368, + "step": 483280 + }, + { + "epoch": 0.9762763769761269, + "grad_norm": 268.71087646484375, + "learning_rate": 3.398404285316847e-08, + "loss": 8.2577, + "step": 483290 + }, + { + "epoch": 0.9762965776088107, + "grad_norm": 149.17491149902344, + "learning_rate": 3.394342599847111e-08, + "loss": 13.4999, + "step": 483300 + }, + { + "epoch": 0.9763167782414945, + "grad_norm": 492.189208984375, + "learning_rate": 3.390283334773203e-08, + "loss": 19.87, + "step": 483310 + }, + { + "epoch": 0.9763369788741784, + "grad_norm": 318.0566711425781, + "learning_rate": 3.3862264901147745e-08, + "loss": 26.1718, + "step": 483320 + }, + { + "epoch": 0.9763571795068622, + "grad_norm": 219.8817901611328, + "learning_rate": 3.3821720658916426e-08, + "loss": 6.3677, + "step": 483330 + }, + { + "epoch": 0.976377380139546, + "grad_norm": 5.527459621429443, + "learning_rate": 3.378120062123569e-08, + "loss": 11.1907, + "step": 483340 + }, + { + "epoch": 0.9763975807722298, + "grad_norm": 123.34063720703125, + "learning_rate": 3.374070478830316e-08, + "loss": 11.3013, + "step": 483350 + }, + { + "epoch": 0.9764177814049136, + "grad_norm": 655.8611450195312, + "learning_rate": 3.3700233160315897e-08, + "loss": 13.955, + "step": 483360 + }, + { + "epoch": 0.9764379820375975, + "grad_norm": 287.165771484375, + "learning_rate": 3.365978573747153e-08, + "loss": 29.42, + "step": 483370 + }, + { + "epoch": 0.9764581826702813, + "grad_norm": 611.0186157226562, + "learning_rate": 3.361936251996711e-08, + "loss": 27.2618, + "step": 483380 + }, + { + "epoch": 0.9764783833029651, + "grad_norm": 84.76065826416016, + "learning_rate": 3.357896350799916e-08, + "loss": 14.0287, + "step": 483390 + }, + { + "epoch": 0.9764985839356488, + "grad_norm": 339.7453918457031, + "learning_rate": 3.3538588701765296e-08, + "loss": 13.6125, + "step": 483400 + }, + { + "epoch": 0.9765187845683326, + "grad_norm": 176.95620727539062, + "learning_rate": 3.349823810146202e-08, + "loss": 8.7821, + "step": 483410 + }, + { + "epoch": 0.9765389852010165, + "grad_norm": 102.81352233886719, + "learning_rate": 3.34579117072864e-08, + "loss": 15.8232, + "step": 483420 + }, + { + "epoch": 0.9765591858337003, + "grad_norm": 199.88491821289062, + "learning_rate": 3.341760951943385e-08, + "loss": 19.5629, + "step": 483430 + }, + { + "epoch": 0.9765793864663841, + "grad_norm": 346.9388427734375, + "learning_rate": 3.337733153810141e-08, + "loss": 16.255, + "step": 483440 + }, + { + "epoch": 0.9765995870990679, + "grad_norm": 196.13929748535156, + "learning_rate": 3.3337077763485605e-08, + "loss": 23.9133, + "step": 483450 + }, + { + "epoch": 0.9766197877317517, + "grad_norm": 377.25042724609375, + "learning_rate": 3.329684819578294e-08, + "loss": 24.0601, + "step": 483460 + }, + { + "epoch": 0.9766399883644356, + "grad_norm": 276.38189697265625, + "learning_rate": 3.3256642835188816e-08, + "loss": 15.3663, + "step": 483470 + }, + { + "epoch": 0.9766601889971194, + "grad_norm": 101.42993927001953, + "learning_rate": 3.321646168189918e-08, + "loss": 8.4814, + "step": 483480 + }, + { + "epoch": 0.9766803896298032, + "grad_norm": 300.20281982421875, + "learning_rate": 3.317630473611055e-08, + "loss": 25.3813, + "step": 483490 + }, + { + "epoch": 0.976700590262487, + "grad_norm": 5.163257122039795, + "learning_rate": 3.313617199801777e-08, + "loss": 15.0118, + "step": 483500 + }, + { + "epoch": 0.9767207908951708, + "grad_norm": 482.6654968261719, + "learning_rate": 3.309606346781735e-08, + "loss": 15.7841, + "step": 483510 + }, + { + "epoch": 0.9767409915278547, + "grad_norm": 362.0157470703125, + "learning_rate": 3.305597914570413e-08, + "loss": 14.2689, + "step": 483520 + }, + { + "epoch": 0.9767611921605385, + "grad_norm": 400.5704650878906, + "learning_rate": 3.301591903187351e-08, + "loss": 13.4012, + "step": 483530 + }, + { + "epoch": 0.9767813927932223, + "grad_norm": 249.0880889892578, + "learning_rate": 3.297588312652089e-08, + "loss": 22.4317, + "step": 483540 + }, + { + "epoch": 0.9768015934259061, + "grad_norm": 182.11021423339844, + "learning_rate": 3.2935871429841116e-08, + "loss": 15.1609, + "step": 483550 + }, + { + "epoch": 0.9768217940585899, + "grad_norm": 5.8800554275512695, + "learning_rate": 3.289588394203014e-08, + "loss": 6.4429, + "step": 483560 + }, + { + "epoch": 0.9768419946912738, + "grad_norm": 215.13284301757812, + "learning_rate": 3.285592066328169e-08, + "loss": 14.1888, + "step": 483570 + }, + { + "epoch": 0.9768621953239576, + "grad_norm": 642.272216796875, + "learning_rate": 3.281598159379118e-08, + "loss": 12.5785, + "step": 483580 + }, + { + "epoch": 0.9768823959566414, + "grad_norm": 282.8641052246094, + "learning_rate": 3.277606673375289e-08, + "loss": 15.3044, + "step": 483590 + }, + { + "epoch": 0.9769025965893252, + "grad_norm": 169.6785125732422, + "learning_rate": 3.2736176083362216e-08, + "loss": 18.1839, + "step": 483600 + }, + { + "epoch": 0.976922797222009, + "grad_norm": 431.96875, + "learning_rate": 3.2696309642812344e-08, + "loss": 13.4903, + "step": 483610 + }, + { + "epoch": 0.9769429978546929, + "grad_norm": 76.10205078125, + "learning_rate": 3.2656467412298665e-08, + "loss": 22.2909, + "step": 483620 + }, + { + "epoch": 0.9769631984873767, + "grad_norm": 135.01373291015625, + "learning_rate": 3.261664939201436e-08, + "loss": 20.6173, + "step": 483630 + }, + { + "epoch": 0.9769833991200605, + "grad_norm": 430.282470703125, + "learning_rate": 3.2576855582154844e-08, + "loss": 24.4488, + "step": 483640 + }, + { + "epoch": 0.9770035997527442, + "grad_norm": 136.57626342773438, + "learning_rate": 3.253708598291272e-08, + "loss": 7.0277, + "step": 483650 + }, + { + "epoch": 0.977023800385428, + "grad_norm": 239.49517822265625, + "learning_rate": 3.2497340594482284e-08, + "loss": 11.4957, + "step": 483660 + }, + { + "epoch": 0.9770440010181118, + "grad_norm": 198.54737854003906, + "learning_rate": 3.245761941705727e-08, + "loss": 11.8537, + "step": 483670 + }, + { + "epoch": 0.9770642016507957, + "grad_norm": 344.41021728515625, + "learning_rate": 3.241792245083142e-08, + "loss": 8.878, + "step": 483680 + }, + { + "epoch": 0.9770844022834795, + "grad_norm": 233.0062255859375, + "learning_rate": 3.237824969599845e-08, + "loss": 14.1195, + "step": 483690 + }, + { + "epoch": 0.9771046029161633, + "grad_norm": 190.5237274169922, + "learning_rate": 3.2338601152751e-08, + "loss": 20.4171, + "step": 483700 + }, + { + "epoch": 0.9771248035488471, + "grad_norm": 245.7359619140625, + "learning_rate": 3.2298976821282804e-08, + "loss": 26.0637, + "step": 483710 + }, + { + "epoch": 0.9771450041815309, + "grad_norm": 424.45123291015625, + "learning_rate": 3.2259376701787025e-08, + "loss": 16.3542, + "step": 483720 + }, + { + "epoch": 0.9771652048142148, + "grad_norm": 154.21754455566406, + "learning_rate": 3.2219800794456304e-08, + "loss": 22.9416, + "step": 483730 + }, + { + "epoch": 0.9771854054468986, + "grad_norm": 137.80577087402344, + "learning_rate": 3.2180249099483806e-08, + "loss": 7.9407, + "step": 483740 + }, + { + "epoch": 0.9772056060795824, + "grad_norm": 429.020751953125, + "learning_rate": 3.214072161706272e-08, + "loss": 18.3285, + "step": 483750 + }, + { + "epoch": 0.9772258067122662, + "grad_norm": 167.385986328125, + "learning_rate": 3.210121834738456e-08, + "loss": 24.7815, + "step": 483760 + }, + { + "epoch": 0.97724600734495, + "grad_norm": 19.635986328125, + "learning_rate": 3.206173929064304e-08, + "loss": 20.0423, + "step": 483770 + }, + { + "epoch": 0.9772662079776339, + "grad_norm": 102.77245330810547, + "learning_rate": 3.20222844470297e-08, + "loss": 9.3504, + "step": 483780 + }, + { + "epoch": 0.9772864086103177, + "grad_norm": 527.6898193359375, + "learning_rate": 3.198285381673716e-08, + "loss": 29.21, + "step": 483790 + }, + { + "epoch": 0.9773066092430015, + "grad_norm": 661.2086181640625, + "learning_rate": 3.194344739995803e-08, + "loss": 25.6792, + "step": 483800 + }, + { + "epoch": 0.9773268098756853, + "grad_norm": 232.419677734375, + "learning_rate": 3.1904065196883825e-08, + "loss": 16.9828, + "step": 483810 + }, + { + "epoch": 0.9773470105083691, + "grad_norm": 89.22576141357422, + "learning_rate": 3.1864707207706624e-08, + "loss": 5.4055, + "step": 483820 + }, + { + "epoch": 0.977367211141053, + "grad_norm": 634.2050170898438, + "learning_rate": 3.182537343261849e-08, + "loss": 20.296, + "step": 483830 + }, + { + "epoch": 0.9773874117737368, + "grad_norm": 289.4164123535156, + "learning_rate": 3.178606387181038e-08, + "loss": 28.411, + "step": 483840 + }, + { + "epoch": 0.9774076124064206, + "grad_norm": 297.56500244140625, + "learning_rate": 3.1746778525474916e-08, + "loss": 10.1111, + "step": 483850 + }, + { + "epoch": 0.9774278130391044, + "grad_norm": 54.85966491699219, + "learning_rate": 3.1707517393803064e-08, + "loss": 8.5428, + "step": 483860 + }, + { + "epoch": 0.9774480136717882, + "grad_norm": 586.3662719726562, + "learning_rate": 3.166828047698578e-08, + "loss": 12.225, + "step": 483870 + }, + { + "epoch": 0.9774682143044721, + "grad_norm": 279.25689697265625, + "learning_rate": 3.1629067775214575e-08, + "loss": 23.4355, + "step": 483880 + }, + { + "epoch": 0.9774884149371559, + "grad_norm": 198.25270080566406, + "learning_rate": 3.158987928868151e-08, + "loss": 13.6672, + "step": 483890 + }, + { + "epoch": 0.9775086155698397, + "grad_norm": 197.78375244140625, + "learning_rate": 3.1550715017575895e-08, + "loss": 14.6742, + "step": 483900 + }, + { + "epoch": 0.9775288162025234, + "grad_norm": 312.73980712890625, + "learning_rate": 3.151157496208979e-08, + "loss": 10.7456, + "step": 483910 + }, + { + "epoch": 0.9775490168352072, + "grad_norm": 0.0, + "learning_rate": 3.1472459122414144e-08, + "loss": 12.6866, + "step": 483920 + }, + { + "epoch": 0.977569217467891, + "grad_norm": 26.30738067626953, + "learning_rate": 3.143336749873882e-08, + "loss": 14.7024, + "step": 483930 + }, + { + "epoch": 0.9775894181005749, + "grad_norm": 153.23471069335938, + "learning_rate": 3.139430009125477e-08, + "loss": 17.0624, + "step": 483940 + }, + { + "epoch": 0.9776096187332587, + "grad_norm": 626.3602294921875, + "learning_rate": 3.135525690015184e-08, + "loss": 22.0167, + "step": 483950 + }, + { + "epoch": 0.9776298193659425, + "grad_norm": 165.5670928955078, + "learning_rate": 3.131623792562155e-08, + "loss": 14.3687, + "step": 483960 + }, + { + "epoch": 0.9776500199986263, + "grad_norm": 11.01375675201416, + "learning_rate": 3.127724316785263e-08, + "loss": 18.0701, + "step": 483970 + }, + { + "epoch": 0.9776702206313101, + "grad_norm": 94.835693359375, + "learning_rate": 3.1238272627035494e-08, + "loss": 13.9082, + "step": 483980 + }, + { + "epoch": 0.977690421263994, + "grad_norm": 383.6202087402344, + "learning_rate": 3.119932630336109e-08, + "loss": 27.2308, + "step": 483990 + }, + { + "epoch": 0.9777106218966778, + "grad_norm": 350.61016845703125, + "learning_rate": 3.1160404197018155e-08, + "loss": 17.0211, + "step": 484000 + }, + { + "epoch": 0.9777308225293616, + "grad_norm": 131.5062713623047, + "learning_rate": 3.11215063081971e-08, + "loss": 27.6387, + "step": 484010 + }, + { + "epoch": 0.9777510231620454, + "grad_norm": 182.68927001953125, + "learning_rate": 3.108263263708666e-08, + "loss": 7.6781, + "step": 484020 + }, + { + "epoch": 0.9777712237947292, + "grad_norm": 101.00978088378906, + "learning_rate": 3.104378318387724e-08, + "loss": 12.5815, + "step": 484030 + }, + { + "epoch": 0.9777914244274131, + "grad_norm": 145.689697265625, + "learning_rate": 3.1004957948757576e-08, + "loss": 15.6676, + "step": 484040 + }, + { + "epoch": 0.9778116250600969, + "grad_norm": 278.1912841796875, + "learning_rate": 3.0966156931916955e-08, + "loss": 29.9499, + "step": 484050 + }, + { + "epoch": 0.9778318256927807, + "grad_norm": 280.1376647949219, + "learning_rate": 3.092738013354468e-08, + "loss": 13.4762, + "step": 484060 + }, + { + "epoch": 0.9778520263254645, + "grad_norm": 198.26275634765625, + "learning_rate": 3.088862755383004e-08, + "loss": 16.3791, + "step": 484070 + }, + { + "epoch": 0.9778722269581483, + "grad_norm": 275.3869323730469, + "learning_rate": 3.084989919296122e-08, + "loss": 23.9532, + "step": 484080 + }, + { + "epoch": 0.9778924275908322, + "grad_norm": 134.90830993652344, + "learning_rate": 3.081119505112751e-08, + "loss": 18.2092, + "step": 484090 + }, + { + "epoch": 0.977912628223516, + "grad_norm": 222.60604858398438, + "learning_rate": 3.077251512851709e-08, + "loss": 24.5073, + "step": 484100 + }, + { + "epoch": 0.9779328288561998, + "grad_norm": 744.3571166992188, + "learning_rate": 3.07338594253187e-08, + "loss": 22.8677, + "step": 484110 + }, + { + "epoch": 0.9779530294888836, + "grad_norm": 480.4717712402344, + "learning_rate": 3.069522794172109e-08, + "loss": 19.43, + "step": 484120 + }, + { + "epoch": 0.9779732301215674, + "grad_norm": 225.44764709472656, + "learning_rate": 3.0656620677911867e-08, + "loss": 11.4545, + "step": 484130 + }, + { + "epoch": 0.9779934307542513, + "grad_norm": 434.86517333984375, + "learning_rate": 3.061803763408033e-08, + "loss": 22.5593, + "step": 484140 + }, + { + "epoch": 0.9780136313869351, + "grad_norm": 923.6597900390625, + "learning_rate": 3.057947881041301e-08, + "loss": 24.499, + "step": 484150 + }, + { + "epoch": 0.9780338320196189, + "grad_norm": 247.2659912109375, + "learning_rate": 3.054094420709863e-08, + "loss": 14.029, + "step": 484160 + }, + { + "epoch": 0.9780540326523026, + "grad_norm": 263.4147644042969, + "learning_rate": 3.050243382432483e-08, + "loss": 16.9114, + "step": 484170 + }, + { + "epoch": 0.9780742332849864, + "grad_norm": 355.8857116699219, + "learning_rate": 3.046394766228034e-08, + "loss": 8.9725, + "step": 484180 + }, + { + "epoch": 0.9780944339176703, + "grad_norm": 217.84544372558594, + "learning_rate": 3.0425485721151115e-08, + "loss": 24.7696, + "step": 484190 + }, + { + "epoch": 0.9781146345503541, + "grad_norm": 119.02053833007812, + "learning_rate": 3.038704800112535e-08, + "loss": 8.661, + "step": 484200 + }, + { + "epoch": 0.9781348351830379, + "grad_norm": 208.46368408203125, + "learning_rate": 3.034863450239067e-08, + "loss": 26.7875, + "step": 484210 + }, + { + "epoch": 0.9781550358157217, + "grad_norm": 436.1865234375, + "learning_rate": 3.0310245225133595e-08, + "loss": 19.8382, + "step": 484220 + }, + { + "epoch": 0.9781752364484055, + "grad_norm": 403.9502258300781, + "learning_rate": 3.027188016954175e-08, + "loss": 20.4445, + "step": 484230 + }, + { + "epoch": 0.9781954370810894, + "grad_norm": 261.0536193847656, + "learning_rate": 3.0233539335802195e-08, + "loss": 20.8463, + "step": 484240 + }, + { + "epoch": 0.9782156377137732, + "grad_norm": 551.9981079101562, + "learning_rate": 3.019522272410202e-08, + "loss": 16.055, + "step": 484250 + }, + { + "epoch": 0.978235838346457, + "grad_norm": 362.7401428222656, + "learning_rate": 3.0156930334626633e-08, + "loss": 13.623, + "step": 484260 + }, + { + "epoch": 0.9782560389791408, + "grad_norm": 222.0453643798828, + "learning_rate": 3.0118662167564205e-08, + "loss": 13.3153, + "step": 484270 + }, + { + "epoch": 0.9782762396118246, + "grad_norm": 234.4801025390625, + "learning_rate": 3.008041822310015e-08, + "loss": 13.9276, + "step": 484280 + }, + { + "epoch": 0.9782964402445085, + "grad_norm": 12.658368110656738, + "learning_rate": 3.004219850142209e-08, + "loss": 25.2537, + "step": 484290 + }, + { + "epoch": 0.9783166408771923, + "grad_norm": 452.21368408203125, + "learning_rate": 3.0004003002714886e-08, + "loss": 33.7096, + "step": 484300 + }, + { + "epoch": 0.9783368415098761, + "grad_norm": 105.99453735351562, + "learning_rate": 2.9965831727165603e-08, + "loss": 14.9214, + "step": 484310 + }, + { + "epoch": 0.9783570421425599, + "grad_norm": 125.94039916992188, + "learning_rate": 2.992768467496021e-08, + "loss": 19.0695, + "step": 484320 + }, + { + "epoch": 0.9783772427752437, + "grad_norm": 77.85628509521484, + "learning_rate": 2.988956184628411e-08, + "loss": 19.3725, + "step": 484330 + }, + { + "epoch": 0.9783974434079276, + "grad_norm": 37.931915283203125, + "learning_rate": 2.985146324132438e-08, + "loss": 37.1462, + "step": 484340 + }, + { + "epoch": 0.9784176440406114, + "grad_norm": 354.00482177734375, + "learning_rate": 2.981338886026475e-08, + "loss": 13.8811, + "step": 484350 + }, + { + "epoch": 0.9784378446732952, + "grad_norm": 192.91014099121094, + "learning_rate": 2.97753387032923e-08, + "loss": 19.4121, + "step": 484360 + }, + { + "epoch": 0.978458045305979, + "grad_norm": 75.15606689453125, + "learning_rate": 2.9737312770591887e-08, + "loss": 19.2399, + "step": 484370 + }, + { + "epoch": 0.9784782459386628, + "grad_norm": 247.0068817138672, + "learning_rate": 2.9699311062349467e-08, + "loss": 16.0864, + "step": 484380 + }, + { + "epoch": 0.9784984465713467, + "grad_norm": 25.100473403930664, + "learning_rate": 2.966133357874934e-08, + "loss": 19.2274, + "step": 484390 + }, + { + "epoch": 0.9785186472040305, + "grad_norm": 25.9183292388916, + "learning_rate": 2.9623380319976912e-08, + "loss": 13.2397, + "step": 484400 + }, + { + "epoch": 0.9785388478367143, + "grad_norm": 208.7423095703125, + "learning_rate": 2.9585451286217593e-08, + "loss": 14.6802, + "step": 484410 + }, + { + "epoch": 0.978559048469398, + "grad_norm": 132.13888549804688, + "learning_rate": 2.954754647765623e-08, + "loss": 14.4208, + "step": 484420 + }, + { + "epoch": 0.9785792491020818, + "grad_norm": 559.3355712890625, + "learning_rate": 2.950966589447657e-08, + "loss": 19.5717, + "step": 484430 + }, + { + "epoch": 0.9785994497347656, + "grad_norm": 304.85235595703125, + "learning_rate": 2.947180953686457e-08, + "loss": 17.2576, + "step": 484440 + }, + { + "epoch": 0.9786196503674495, + "grad_norm": 345.2542419433594, + "learning_rate": 2.9433977405003976e-08, + "loss": 23.9686, + "step": 484450 + }, + { + "epoch": 0.9786398510001333, + "grad_norm": 329.9510498046875, + "learning_rate": 2.9396169499079087e-08, + "loss": 16.6701, + "step": 484460 + }, + { + "epoch": 0.9786600516328171, + "grad_norm": 353.3564758300781, + "learning_rate": 2.935838581927475e-08, + "loss": 33.0322, + "step": 484470 + }, + { + "epoch": 0.9786802522655009, + "grad_norm": 130.25843811035156, + "learning_rate": 2.9320626365774153e-08, + "loss": 9.726, + "step": 484480 + }, + { + "epoch": 0.9787004528981847, + "grad_norm": 17.38633918762207, + "learning_rate": 2.9282891138762148e-08, + "loss": 15.5292, + "step": 484490 + }, + { + "epoch": 0.9787206535308686, + "grad_norm": 326.0066223144531, + "learning_rate": 2.9245180138423033e-08, + "loss": 37.6101, + "step": 484500 + }, + { + "epoch": 0.9787408541635524, + "grad_norm": 419.8305358886719, + "learning_rate": 2.920749336494e-08, + "loss": 13.2768, + "step": 484510 + }, + { + "epoch": 0.9787610547962362, + "grad_norm": 286.7938232421875, + "learning_rate": 2.9169830818496226e-08, + "loss": 25.2753, + "step": 484520 + }, + { + "epoch": 0.97878125542892, + "grad_norm": 140.8557891845703, + "learning_rate": 2.9132192499276014e-08, + "loss": 19.5638, + "step": 484530 + }, + { + "epoch": 0.9788014560616038, + "grad_norm": 13.008999824523926, + "learning_rate": 2.9094578407462547e-08, + "loss": 11.8918, + "step": 484540 + }, + { + "epoch": 0.9788216566942877, + "grad_norm": 198.89772033691406, + "learning_rate": 2.9056988543239018e-08, + "loss": 7.4577, + "step": 484550 + }, + { + "epoch": 0.9788418573269715, + "grad_norm": 13.655198097229004, + "learning_rate": 2.9019422906789162e-08, + "loss": 11.6218, + "step": 484560 + }, + { + "epoch": 0.9788620579596553, + "grad_norm": 289.172119140625, + "learning_rate": 2.8981881498295616e-08, + "loss": 30.0727, + "step": 484570 + }, + { + "epoch": 0.9788822585923391, + "grad_norm": 75.4028091430664, + "learning_rate": 2.8944364317941564e-08, + "loss": 10.8886, + "step": 484580 + }, + { + "epoch": 0.978902459225023, + "grad_norm": 394.84210205078125, + "learning_rate": 2.8906871365909638e-08, + "loss": 14.4291, + "step": 484590 + }, + { + "epoch": 0.9789226598577068, + "grad_norm": 444.9529724121094, + "learning_rate": 2.8869402642382473e-08, + "loss": 13.397, + "step": 484600 + }, + { + "epoch": 0.9789428604903906, + "grad_norm": 577.3967895507812, + "learning_rate": 2.8831958147543805e-08, + "loss": 17.5643, + "step": 484610 + }, + { + "epoch": 0.9789630611230744, + "grad_norm": 170.24781799316406, + "learning_rate": 2.8794537881574046e-08, + "loss": 22.3157, + "step": 484620 + }, + { + "epoch": 0.9789832617557582, + "grad_norm": 422.06024169921875, + "learning_rate": 2.87571418446575e-08, + "loss": 20.9791, + "step": 484630 + }, + { + "epoch": 0.979003462388442, + "grad_norm": 428.97283935546875, + "learning_rate": 2.871977003697568e-08, + "loss": 21.446, + "step": 484640 + }, + { + "epoch": 0.9790236630211259, + "grad_norm": 288.517578125, + "learning_rate": 2.8682422458710667e-08, + "loss": 15.7553, + "step": 484650 + }, + { + "epoch": 0.9790438636538097, + "grad_norm": 124.12348175048828, + "learning_rate": 2.864509911004454e-08, + "loss": 13.6223, + "step": 484660 + }, + { + "epoch": 0.9790640642864935, + "grad_norm": 20.6627254486084, + "learning_rate": 2.8607799991159368e-08, + "loss": 9.575, + "step": 484670 + }, + { + "epoch": 0.9790842649191772, + "grad_norm": 465.4810791015625, + "learning_rate": 2.857052510223668e-08, + "loss": 17.2106, + "step": 484680 + }, + { + "epoch": 0.979104465551861, + "grad_norm": 258.0783386230469, + "learning_rate": 2.853327444345799e-08, + "loss": 15.3026, + "step": 484690 + }, + { + "epoch": 0.9791246661845449, + "grad_norm": 550.4926147460938, + "learning_rate": 2.8496048015005385e-08, + "loss": 21.0773, + "step": 484700 + }, + { + "epoch": 0.9791448668172287, + "grad_norm": 139.730224609375, + "learning_rate": 2.8458845817060376e-08, + "loss": 13.0492, + "step": 484710 + }, + { + "epoch": 0.9791650674499125, + "grad_norm": 444.1943664550781, + "learning_rate": 2.8421667849803937e-08, + "loss": 14.2267, + "step": 484720 + }, + { + "epoch": 0.9791852680825963, + "grad_norm": 17.33098602294922, + "learning_rate": 2.8384514113417026e-08, + "loss": 14.1109, + "step": 484730 + }, + { + "epoch": 0.9792054687152801, + "grad_norm": 0.0, + "learning_rate": 2.8347384608081173e-08, + "loss": 24.8455, + "step": 484740 + }, + { + "epoch": 0.979225669347964, + "grad_norm": 1.657724380493164, + "learning_rate": 2.8310279333976786e-08, + "loss": 21.6012, + "step": 484750 + }, + { + "epoch": 0.9792458699806478, + "grad_norm": 104.38908386230469, + "learning_rate": 2.827319829128594e-08, + "loss": 12.692, + "step": 484760 + }, + { + "epoch": 0.9792660706133316, + "grad_norm": 258.1601257324219, + "learning_rate": 2.823614148018794e-08, + "loss": 14.0637, + "step": 484770 + }, + { + "epoch": 0.9792862712460154, + "grad_norm": 429.6944274902344, + "learning_rate": 2.819910890086375e-08, + "loss": 24.2677, + "step": 484780 + }, + { + "epoch": 0.9793064718786992, + "grad_norm": 248.7170867919922, + "learning_rate": 2.8162100553494887e-08, + "loss": 17.2839, + "step": 484790 + }, + { + "epoch": 0.9793266725113831, + "grad_norm": 518.0184326171875, + "learning_rate": 2.8125116438260104e-08, + "loss": 12.0953, + "step": 484800 + }, + { + "epoch": 0.9793468731440669, + "grad_norm": 375.1488952636719, + "learning_rate": 2.8088156555340916e-08, + "loss": 26.6397, + "step": 484810 + }, + { + "epoch": 0.9793670737767507, + "grad_norm": 318.00823974609375, + "learning_rate": 2.805122090491719e-08, + "loss": 15.0593, + "step": 484820 + }, + { + "epoch": 0.9793872744094345, + "grad_norm": 125.44718933105469, + "learning_rate": 2.801430948716821e-08, + "loss": 17.0579, + "step": 484830 + }, + { + "epoch": 0.9794074750421183, + "grad_norm": 273.11883544921875, + "learning_rate": 2.797742230227496e-08, + "loss": 14.5143, + "step": 484840 + }, + { + "epoch": 0.9794276756748022, + "grad_norm": 272.05291748046875, + "learning_rate": 2.794055935041673e-08, + "loss": 14.1976, + "step": 484850 + }, + { + "epoch": 0.979447876307486, + "grad_norm": 555.6732788085938, + "learning_rate": 2.7903720631772824e-08, + "loss": 24.0573, + "step": 484860 + }, + { + "epoch": 0.9794680769401698, + "grad_norm": 186.22152709960938, + "learning_rate": 2.7866906146523098e-08, + "loss": 19.1456, + "step": 484870 + }, + { + "epoch": 0.9794882775728536, + "grad_norm": 398.9652404785156, + "learning_rate": 2.783011589484741e-08, + "loss": 26.9826, + "step": 484880 + }, + { + "epoch": 0.9795084782055374, + "grad_norm": 167.0987091064453, + "learning_rate": 2.7793349876924503e-08, + "loss": 24.4843, + "step": 484890 + }, + { + "epoch": 0.9795286788382213, + "grad_norm": 268.7541198730469, + "learning_rate": 2.7756608092933678e-08, + "loss": 18.577, + "step": 484900 + }, + { + "epoch": 0.9795488794709051, + "grad_norm": 201.6580352783203, + "learning_rate": 2.771989054305424e-08, + "loss": 10.9767, + "step": 484910 + }, + { + "epoch": 0.9795690801035889, + "grad_norm": 141.82345581054688, + "learning_rate": 2.768319722746493e-08, + "loss": 10.678, + "step": 484920 + }, + { + "epoch": 0.9795892807362726, + "grad_norm": 262.8535461425781, + "learning_rate": 2.7646528146345053e-08, + "loss": 12.9, + "step": 484930 + }, + { + "epoch": 0.9796094813689564, + "grad_norm": 435.3656005859375, + "learning_rate": 2.760988329987224e-08, + "loss": 13.6356, + "step": 484940 + }, + { + "epoch": 0.9796296820016402, + "grad_norm": 510.5507507324219, + "learning_rate": 2.7573262688226355e-08, + "loss": 35.3654, + "step": 484950 + }, + { + "epoch": 0.9796498826343241, + "grad_norm": 341.7468566894531, + "learning_rate": 2.753666631158447e-08, + "loss": 14.537, + "step": 484960 + }, + { + "epoch": 0.9796700832670079, + "grad_norm": 818.6873168945312, + "learning_rate": 2.7500094170126447e-08, + "loss": 12.7979, + "step": 484970 + }, + { + "epoch": 0.9796902838996917, + "grad_norm": 200.34927368164062, + "learning_rate": 2.7463546264029915e-08, + "loss": 10.148, + "step": 484980 + }, + { + "epoch": 0.9797104845323755, + "grad_norm": 480.0416564941406, + "learning_rate": 2.7427022593473074e-08, + "loss": 19.2405, + "step": 484990 + }, + { + "epoch": 0.9797306851650593, + "grad_norm": 407.5085144042969, + "learning_rate": 2.7390523158633552e-08, + "loss": 17.0076, + "step": 485000 + }, + { + "epoch": 0.9797508857977432, + "grad_norm": 249.0325164794922, + "learning_rate": 2.7354047959689543e-08, + "loss": 12.0682, + "step": 485010 + }, + { + "epoch": 0.979771086430427, + "grad_norm": 400.5632629394531, + "learning_rate": 2.7317596996818684e-08, + "loss": 31.2551, + "step": 485020 + }, + { + "epoch": 0.9797912870631108, + "grad_norm": 332.02001953125, + "learning_rate": 2.728117027019861e-08, + "loss": 38.4033, + "step": 485030 + }, + { + "epoch": 0.9798114876957946, + "grad_norm": 482.14398193359375, + "learning_rate": 2.7244767780007507e-08, + "loss": 29.1403, + "step": 485040 + }, + { + "epoch": 0.9798316883284784, + "grad_norm": 236.2940673828125, + "learning_rate": 2.7208389526421907e-08, + "loss": 34.5868, + "step": 485050 + }, + { + "epoch": 0.9798518889611623, + "grad_norm": 228.9615020751953, + "learning_rate": 2.7172035509619442e-08, + "loss": 21.3379, + "step": 485060 + }, + { + "epoch": 0.9798720895938461, + "grad_norm": 248.9374237060547, + "learning_rate": 2.713570572977775e-08, + "loss": 28.5801, + "step": 485070 + }, + { + "epoch": 0.9798922902265299, + "grad_norm": 736.0357666015625, + "learning_rate": 2.7099400187073356e-08, + "loss": 18.1751, + "step": 485080 + }, + { + "epoch": 0.9799124908592137, + "grad_norm": 380.0699768066406, + "learning_rate": 2.7063118881682782e-08, + "loss": 20.2449, + "step": 485090 + }, + { + "epoch": 0.9799326914918975, + "grad_norm": 400.3285827636719, + "learning_rate": 2.7026861813783668e-08, + "loss": 12.1668, + "step": 485100 + }, + { + "epoch": 0.9799528921245814, + "grad_norm": 164.75294494628906, + "learning_rate": 2.6990628983553093e-08, + "loss": 16.418, + "step": 485110 + }, + { + "epoch": 0.9799730927572652, + "grad_norm": 417.77935791015625, + "learning_rate": 2.6954420391166468e-08, + "loss": 14.4959, + "step": 485120 + }, + { + "epoch": 0.979993293389949, + "grad_norm": 394.34307861328125, + "learning_rate": 2.691823603680088e-08, + "loss": 29.6269, + "step": 485130 + }, + { + "epoch": 0.9800134940226328, + "grad_norm": 233.8807830810547, + "learning_rate": 2.6882075920632854e-08, + "loss": 17.3482, + "step": 485140 + }, + { + "epoch": 0.9800336946553166, + "grad_norm": 178.50732421875, + "learning_rate": 2.684594004283836e-08, + "loss": 14.9117, + "step": 485150 + }, + { + "epoch": 0.9800538952880005, + "grad_norm": 410.2407531738281, + "learning_rate": 2.6809828403593363e-08, + "loss": 11.6257, + "step": 485160 + }, + { + "epoch": 0.9800740959206843, + "grad_norm": 47.52455139160156, + "learning_rate": 2.6773741003074394e-08, + "loss": 17.6356, + "step": 485170 + }, + { + "epoch": 0.9800942965533681, + "grad_norm": 598.1161499023438, + "learning_rate": 2.6737677841456867e-08, + "loss": 20.3677, + "step": 485180 + }, + { + "epoch": 0.9801144971860518, + "grad_norm": 373.65185546875, + "learning_rate": 2.670163891891675e-08, + "loss": 18.3767, + "step": 485190 + }, + { + "epoch": 0.9801346978187356, + "grad_norm": 236.4280548095703, + "learning_rate": 2.6665624235629463e-08, + "loss": 15.3819, + "step": 485200 + }, + { + "epoch": 0.9801548984514195, + "grad_norm": 404.3671569824219, + "learning_rate": 2.662963379177097e-08, + "loss": 14.8196, + "step": 485210 + }, + { + "epoch": 0.9801750990841033, + "grad_norm": 1.4682387113571167, + "learning_rate": 2.6593667587516693e-08, + "loss": 17.7185, + "step": 485220 + }, + { + "epoch": 0.9801952997167871, + "grad_norm": 116.38851165771484, + "learning_rate": 2.6557725623041487e-08, + "loss": 17.0778, + "step": 485230 + }, + { + "epoch": 0.9802155003494709, + "grad_norm": 202.219482421875, + "learning_rate": 2.6521807898520214e-08, + "loss": 11.1525, + "step": 485240 + }, + { + "epoch": 0.9802357009821547, + "grad_norm": 249.83407592773438, + "learning_rate": 2.64859144141294e-08, + "loss": 12.2021, + "step": 485250 + }, + { + "epoch": 0.9802559016148386, + "grad_norm": 158.11590576171875, + "learning_rate": 2.6450045170042238e-08, + "loss": 15.9091, + "step": 485260 + }, + { + "epoch": 0.9802761022475224, + "grad_norm": 373.1625671386719, + "learning_rate": 2.6414200166434144e-08, + "loss": 33.0223, + "step": 485270 + }, + { + "epoch": 0.9802963028802062, + "grad_norm": 558.8707275390625, + "learning_rate": 2.6378379403480536e-08, + "loss": 22.0269, + "step": 485280 + }, + { + "epoch": 0.98031650351289, + "grad_norm": 80.94778442382812, + "learning_rate": 2.6342582881355717e-08, + "loss": 11.432, + "step": 485290 + }, + { + "epoch": 0.9803367041455738, + "grad_norm": 447.9439392089844, + "learning_rate": 2.6306810600233435e-08, + "loss": 20.6871, + "step": 485300 + }, + { + "epoch": 0.9803569047782577, + "grad_norm": 212.05914306640625, + "learning_rate": 2.6271062560288552e-08, + "loss": 20.6287, + "step": 485310 + }, + { + "epoch": 0.9803771054109415, + "grad_norm": 454.74493408203125, + "learning_rate": 2.6235338761695372e-08, + "loss": 21.637, + "step": 485320 + }, + { + "epoch": 0.9803973060436253, + "grad_norm": 220.4085693359375, + "learning_rate": 2.6199639204628202e-08, + "loss": 14.0467, + "step": 485330 + }, + { + "epoch": 0.9804175066763091, + "grad_norm": 428.25933837890625, + "learning_rate": 2.6163963889260236e-08, + "loss": 23.0588, + "step": 485340 + }, + { + "epoch": 0.9804377073089929, + "grad_norm": 14.814775466918945, + "learning_rate": 2.6128312815766332e-08, + "loss": 21.864, + "step": 485350 + }, + { + "epoch": 0.9804579079416768, + "grad_norm": 515.070068359375, + "learning_rate": 2.6092685984319134e-08, + "loss": 15.1821, + "step": 485360 + }, + { + "epoch": 0.9804781085743606, + "grad_norm": 180.12730407714844, + "learning_rate": 2.6057083395093495e-08, + "loss": 23.8452, + "step": 485370 + }, + { + "epoch": 0.9804983092070444, + "grad_norm": 219.78884887695312, + "learning_rate": 2.6021505048262062e-08, + "loss": 25.7188, + "step": 485380 + }, + { + "epoch": 0.9805185098397282, + "grad_norm": 4.057182312011719, + "learning_rate": 2.5985950943999137e-08, + "loss": 11.9159, + "step": 485390 + }, + { + "epoch": 0.980538710472412, + "grad_norm": 164.8330535888672, + "learning_rate": 2.5950421082476805e-08, + "loss": 13.588, + "step": 485400 + }, + { + "epoch": 0.9805589111050959, + "grad_norm": 252.8827667236328, + "learning_rate": 2.5914915463868816e-08, + "loss": 21.1826, + "step": 485410 + }, + { + "epoch": 0.9805791117377797, + "grad_norm": 0.5538516640663147, + "learning_rate": 2.5879434088348364e-08, + "loss": 18.9406, + "step": 485420 + }, + { + "epoch": 0.9805993123704635, + "grad_norm": 533.2706909179688, + "learning_rate": 2.584397695608809e-08, + "loss": 34.0056, + "step": 485430 + }, + { + "epoch": 0.9806195130031473, + "grad_norm": 162.6726837158203, + "learning_rate": 2.580854406726174e-08, + "loss": 8.7961, + "step": 485440 + }, + { + "epoch": 0.980639713635831, + "grad_norm": 37.770572662353516, + "learning_rate": 2.5773135422040296e-08, + "loss": 12.204, + "step": 485450 + }, + { + "epoch": 0.9806599142685148, + "grad_norm": 323.77325439453125, + "learning_rate": 2.5737751020598057e-08, + "loss": 8.6064, + "step": 485460 + }, + { + "epoch": 0.9806801149011987, + "grad_norm": 164.14524841308594, + "learning_rate": 2.5702390863105996e-08, + "loss": 7.7936, + "step": 485470 + }, + { + "epoch": 0.9807003155338825, + "grad_norm": 325.83123779296875, + "learning_rate": 2.5667054949737315e-08, + "loss": 13.356, + "step": 485480 + }, + { + "epoch": 0.9807205161665663, + "grad_norm": 206.82696533203125, + "learning_rate": 2.5631743280664643e-08, + "loss": 20.1518, + "step": 485490 + }, + { + "epoch": 0.9807407167992501, + "grad_norm": 266.54156494140625, + "learning_rate": 2.5596455856058966e-08, + "loss": 20.6028, + "step": 485500 + }, + { + "epoch": 0.9807609174319339, + "grad_norm": 263.7402648925781, + "learning_rate": 2.556119267609347e-08, + "loss": 13.1059, + "step": 485510 + }, + { + "epoch": 0.9807811180646178, + "grad_norm": 296.02099609375, + "learning_rate": 2.552595374093858e-08, + "loss": 16.091, + "step": 485520 + }, + { + "epoch": 0.9808013186973016, + "grad_norm": 113.26029205322266, + "learning_rate": 2.5490739050767488e-08, + "loss": 9.6391, + "step": 485530 + }, + { + "epoch": 0.9808215193299854, + "grad_norm": 412.40570068359375, + "learning_rate": 2.5455548605751167e-08, + "loss": 30.8011, + "step": 485540 + }, + { + "epoch": 0.9808417199626692, + "grad_norm": 297.3525085449219, + "learning_rate": 2.5420382406060595e-08, + "loss": 25.4737, + "step": 485550 + }, + { + "epoch": 0.980861920595353, + "grad_norm": 256.57373046875, + "learning_rate": 2.5385240451867853e-08, + "loss": 17.9179, + "step": 485560 + }, + { + "epoch": 0.9808821212280369, + "grad_norm": 345.115478515625, + "learning_rate": 2.5350122743344476e-08, + "loss": 21.138, + "step": 485570 + }, + { + "epoch": 0.9809023218607207, + "grad_norm": 169.43362426757812, + "learning_rate": 2.531502928066143e-08, + "loss": 11.9062, + "step": 485580 + }, + { + "epoch": 0.9809225224934045, + "grad_norm": 351.01409912109375, + "learning_rate": 2.527996006398914e-08, + "loss": 9.3808, + "step": 485590 + }, + { + "epoch": 0.9809427231260883, + "grad_norm": 307.8703918457031, + "learning_rate": 2.5244915093499134e-08, + "loss": 36.9408, + "step": 485600 + }, + { + "epoch": 0.9809629237587721, + "grad_norm": 154.72337341308594, + "learning_rate": 2.5209894369362386e-08, + "loss": 14.547, + "step": 485610 + }, + { + "epoch": 0.980983124391456, + "grad_norm": 193.18045043945312, + "learning_rate": 2.5174897891748762e-08, + "loss": 26.4675, + "step": 485620 + }, + { + "epoch": 0.9810033250241398, + "grad_norm": 170.4246368408203, + "learning_rate": 2.5139925660829233e-08, + "loss": 19.2328, + "step": 485630 + }, + { + "epoch": 0.9810235256568236, + "grad_norm": 236.127197265625, + "learning_rate": 2.5104977676774777e-08, + "loss": 19.9385, + "step": 485640 + }, + { + "epoch": 0.9810437262895074, + "grad_norm": 212.71778869628906, + "learning_rate": 2.5070053939754702e-08, + "loss": 13.2024, + "step": 485650 + }, + { + "epoch": 0.9810639269221912, + "grad_norm": 319.12548828125, + "learning_rate": 2.5035154449940535e-08, + "loss": 27.3948, + "step": 485660 + }, + { + "epoch": 0.9810841275548751, + "grad_norm": 484.36846923828125, + "learning_rate": 2.500027920750103e-08, + "loss": 15.4728, + "step": 485670 + }, + { + "epoch": 0.9811043281875589, + "grad_norm": 314.556884765625, + "learning_rate": 2.496542821260717e-08, + "loss": 13.4917, + "step": 485680 + }, + { + "epoch": 0.9811245288202427, + "grad_norm": 113.44535064697266, + "learning_rate": 2.493060146542825e-08, + "loss": 20.1452, + "step": 485690 + }, + { + "epoch": 0.9811447294529264, + "grad_norm": 352.3360595703125, + "learning_rate": 2.489579896613359e-08, + "loss": 31.2981, + "step": 485700 + }, + { + "epoch": 0.9811649300856102, + "grad_norm": 160.99942016601562, + "learning_rate": 2.4861020714894156e-08, + "loss": 9.934, + "step": 485710 + }, + { + "epoch": 0.981185130718294, + "grad_norm": 139.1692657470703, + "learning_rate": 2.482626671187871e-08, + "loss": 10.0028, + "step": 485720 + }, + { + "epoch": 0.9812053313509779, + "grad_norm": 391.8040771484375, + "learning_rate": 2.4791536957256e-08, + "loss": 15.2026, + "step": 485730 + }, + { + "epoch": 0.9812255319836617, + "grad_norm": 560.1205444335938, + "learning_rate": 2.4756831451196452e-08, + "loss": 24.3444, + "step": 485740 + }, + { + "epoch": 0.9812457326163455, + "grad_norm": 29.000030517578125, + "learning_rate": 2.472215019386881e-08, + "loss": 14.2183, + "step": 485750 + }, + { + "epoch": 0.9812659332490293, + "grad_norm": 220.35426330566406, + "learning_rate": 2.4687493185441836e-08, + "loss": 24.7511, + "step": 485760 + }, + { + "epoch": 0.9812861338817132, + "grad_norm": 254.14060974121094, + "learning_rate": 2.4652860426084278e-08, + "loss": 24.0683, + "step": 485770 + }, + { + "epoch": 0.981306334514397, + "grad_norm": 58.67346954345703, + "learning_rate": 2.46182519159649e-08, + "loss": 10.4478, + "step": 485780 + }, + { + "epoch": 0.9813265351470808, + "grad_norm": 44.45576477050781, + "learning_rate": 2.458366765525355e-08, + "loss": 9.0318, + "step": 485790 + }, + { + "epoch": 0.9813467357797646, + "grad_norm": 409.38818359375, + "learning_rate": 2.4549107644117888e-08, + "loss": 18.5046, + "step": 485800 + }, + { + "epoch": 0.9813669364124484, + "grad_norm": 100.24295043945312, + "learning_rate": 2.4514571882726102e-08, + "loss": 14.8458, + "step": 485810 + }, + { + "epoch": 0.9813871370451323, + "grad_norm": 495.2707214355469, + "learning_rate": 2.448006037124695e-08, + "loss": 24.7137, + "step": 485820 + }, + { + "epoch": 0.9814073376778161, + "grad_norm": 344.15325927734375, + "learning_rate": 2.444557310984863e-08, + "loss": 16.843, + "step": 485830 + }, + { + "epoch": 0.9814275383104999, + "grad_norm": 408.4779052734375, + "learning_rate": 2.441111009869879e-08, + "loss": 25.2231, + "step": 485840 + }, + { + "epoch": 0.9814477389431837, + "grad_norm": 65.52508544921875, + "learning_rate": 2.4376671337966174e-08, + "loss": 17.1064, + "step": 485850 + }, + { + "epoch": 0.9814679395758675, + "grad_norm": 246.01783752441406, + "learning_rate": 2.434225682781788e-08, + "loss": 20.977, + "step": 485860 + }, + { + "epoch": 0.9814881402085514, + "grad_norm": 506.14581298828125, + "learning_rate": 2.43078665684221e-08, + "loss": 46.8623, + "step": 485870 + }, + { + "epoch": 0.9815083408412352, + "grad_norm": 335.97332763671875, + "learning_rate": 2.427350055994593e-08, + "loss": 26.2684, + "step": 485880 + }, + { + "epoch": 0.981528541473919, + "grad_norm": 217.335205078125, + "learning_rate": 2.423915880255756e-08, + "loss": 14.9394, + "step": 485890 + }, + { + "epoch": 0.9815487421066028, + "grad_norm": 121.01840209960938, + "learning_rate": 2.4204841296424086e-08, + "loss": 14.2219, + "step": 485900 + }, + { + "epoch": 0.9815689427392866, + "grad_norm": 415.287109375, + "learning_rate": 2.4170548041712594e-08, + "loss": 19.2851, + "step": 485910 + }, + { + "epoch": 0.9815891433719705, + "grad_norm": 250.50343322753906, + "learning_rate": 2.4136279038590727e-08, + "loss": 18.9241, + "step": 485920 + }, + { + "epoch": 0.9816093440046543, + "grad_norm": 368.9814758300781, + "learning_rate": 2.4102034287224462e-08, + "loss": 12.0449, + "step": 485930 + }, + { + "epoch": 0.9816295446373381, + "grad_norm": 394.1357727050781, + "learning_rate": 2.4067813787782e-08, + "loss": 22.9436, + "step": 485940 + }, + { + "epoch": 0.9816497452700219, + "grad_norm": 175.39022827148438, + "learning_rate": 2.403361754042932e-08, + "loss": 19.2293, + "step": 485950 + }, + { + "epoch": 0.9816699459027056, + "grad_norm": 53.9837646484375, + "learning_rate": 2.3999445545332955e-08, + "loss": 15.5892, + "step": 485960 + }, + { + "epoch": 0.9816901465353894, + "grad_norm": 236.90887451171875, + "learning_rate": 2.3965297802659993e-08, + "loss": 17.771, + "step": 485970 + }, + { + "epoch": 0.9817103471680733, + "grad_norm": 234.25405883789062, + "learning_rate": 2.3931174312576966e-08, + "loss": 19.3174, + "step": 485980 + }, + { + "epoch": 0.9817305478007571, + "grad_norm": 201.67913818359375, + "learning_rate": 2.3897075075249298e-08, + "loss": 16.0098, + "step": 485990 + }, + { + "epoch": 0.9817507484334409, + "grad_norm": 109.33238220214844, + "learning_rate": 2.386300009084408e-08, + "loss": 18.8036, + "step": 486000 + }, + { + "epoch": 0.9817709490661247, + "grad_norm": 4.808259963989258, + "learning_rate": 2.382894935952729e-08, + "loss": 16.8313, + "step": 486010 + }, + { + "epoch": 0.9817911496988085, + "grad_norm": 272.5266418457031, + "learning_rate": 2.3794922881464344e-08, + "loss": 18.3731, + "step": 486020 + }, + { + "epoch": 0.9818113503314924, + "grad_norm": 34.69828414916992, + "learning_rate": 2.3760920656821228e-08, + "loss": 17.4615, + "step": 486030 + }, + { + "epoch": 0.9818315509641762, + "grad_norm": 323.7899169921875, + "learning_rate": 2.3726942685764474e-08, + "loss": 22.6718, + "step": 486040 + }, + { + "epoch": 0.98185175159686, + "grad_norm": 101.85675048828125, + "learning_rate": 2.3692988968458398e-08, + "loss": 30.6321, + "step": 486050 + }, + { + "epoch": 0.9818719522295438, + "grad_norm": 198.76905822753906, + "learning_rate": 2.3659059505069526e-08, + "loss": 26.5358, + "step": 486060 + }, + { + "epoch": 0.9818921528622276, + "grad_norm": 53.76090621948242, + "learning_rate": 2.362515429576273e-08, + "loss": 11.4659, + "step": 486070 + }, + { + "epoch": 0.9819123534949115, + "grad_norm": 252.8328094482422, + "learning_rate": 2.3591273340703436e-08, + "loss": 11.9757, + "step": 486080 + }, + { + "epoch": 0.9819325541275953, + "grad_norm": 793.3980102539062, + "learning_rate": 2.3557416640056507e-08, + "loss": 17.1405, + "step": 486090 + }, + { + "epoch": 0.9819527547602791, + "grad_norm": 195.5869903564453, + "learning_rate": 2.3523584193986816e-08, + "loss": 11.7921, + "step": 486100 + }, + { + "epoch": 0.9819729553929629, + "grad_norm": 510.00006103515625, + "learning_rate": 2.3489776002660337e-08, + "loss": 18.6178, + "step": 486110 + }, + { + "epoch": 0.9819931560256467, + "grad_norm": 270.2353515625, + "learning_rate": 2.3455992066240828e-08, + "loss": 31.8047, + "step": 486120 + }, + { + "epoch": 0.9820133566583306, + "grad_norm": 276.6331481933594, + "learning_rate": 2.342223238489316e-08, + "loss": 24.8707, + "step": 486130 + }, + { + "epoch": 0.9820335572910144, + "grad_norm": 91.86866760253906, + "learning_rate": 2.3388496958782203e-08, + "loss": 20.6946, + "step": 486140 + }, + { + "epoch": 0.9820537579236982, + "grad_norm": 178.90017700195312, + "learning_rate": 2.3354785788072265e-08, + "loss": 8.8982, + "step": 486150 + }, + { + "epoch": 0.982073958556382, + "grad_norm": 476.80303955078125, + "learning_rate": 2.3321098872927107e-08, + "loss": 25.1611, + "step": 486160 + }, + { + "epoch": 0.9820941591890658, + "grad_norm": 338.6235656738281, + "learning_rate": 2.3287436213511038e-08, + "loss": 14.7015, + "step": 486170 + }, + { + "epoch": 0.9821143598217497, + "grad_norm": 253.2498321533203, + "learning_rate": 2.3253797809988933e-08, + "loss": 15.6617, + "step": 486180 + }, + { + "epoch": 0.9821345604544335, + "grad_norm": 255.2903289794922, + "learning_rate": 2.3220183662523986e-08, + "loss": 19.9992, + "step": 486190 + }, + { + "epoch": 0.9821547610871173, + "grad_norm": 241.35653686523438, + "learning_rate": 2.3186593771280518e-08, + "loss": 6.3188, + "step": 486200 + }, + { + "epoch": 0.982174961719801, + "grad_norm": 446.3783874511719, + "learning_rate": 2.3153028136421728e-08, + "loss": 17.9964, + "step": 486210 + }, + { + "epoch": 0.9821951623524848, + "grad_norm": 265.31549072265625, + "learning_rate": 2.3119486758111375e-08, + "loss": 11.47, + "step": 486220 + }, + { + "epoch": 0.9822153629851686, + "grad_norm": 238.12451171875, + "learning_rate": 2.3085969636513217e-08, + "loss": 14.1695, + "step": 486230 + }, + { + "epoch": 0.9822355636178525, + "grad_norm": 0.0, + "learning_rate": 2.3052476771790454e-08, + "loss": 11.3744, + "step": 486240 + }, + { + "epoch": 0.9822557642505363, + "grad_norm": 332.5835876464844, + "learning_rate": 2.301900816410574e-08, + "loss": 12.744, + "step": 486250 + }, + { + "epoch": 0.9822759648832201, + "grad_norm": 273.3670654296875, + "learning_rate": 2.2985563813623378e-08, + "loss": 15.5879, + "step": 486260 + }, + { + "epoch": 0.9822961655159039, + "grad_norm": 128.8788604736328, + "learning_rate": 2.295214372050547e-08, + "loss": 12.1372, + "step": 486270 + }, + { + "epoch": 0.9823163661485877, + "grad_norm": 246.0155792236328, + "learning_rate": 2.2918747884915216e-08, + "loss": 16.4574, + "step": 486280 + }, + { + "epoch": 0.9823365667812716, + "grad_norm": 247.61831665039062, + "learning_rate": 2.2885376307015817e-08, + "loss": 20.4371, + "step": 486290 + }, + { + "epoch": 0.9823567674139554, + "grad_norm": 244.03050231933594, + "learning_rate": 2.285202898696881e-08, + "loss": 19.5125, + "step": 486300 + }, + { + "epoch": 0.9823769680466392, + "grad_norm": 441.6757507324219, + "learning_rate": 2.2818705924937402e-08, + "loss": 19.5062, + "step": 486310 + }, + { + "epoch": 0.982397168679323, + "grad_norm": 158.03622436523438, + "learning_rate": 2.2785407121084236e-08, + "loss": 23.2111, + "step": 486320 + }, + { + "epoch": 0.9824173693120068, + "grad_norm": 217.98892211914062, + "learning_rate": 2.2752132575570852e-08, + "loss": 19.4118, + "step": 486330 + }, + { + "epoch": 0.9824375699446907, + "grad_norm": 376.9085998535156, + "learning_rate": 2.271888228856045e-08, + "loss": 18.4132, + "step": 486340 + }, + { + "epoch": 0.9824577705773745, + "grad_norm": 335.8309631347656, + "learning_rate": 2.268565626021457e-08, + "loss": 42.8806, + "step": 486350 + }, + { + "epoch": 0.9824779712100583, + "grad_norm": 220.08766174316406, + "learning_rate": 2.2652454490694752e-08, + "loss": 19.1188, + "step": 486360 + }, + { + "epoch": 0.9824981718427421, + "grad_norm": 44.7899055480957, + "learning_rate": 2.261927698016364e-08, + "loss": 15.5816, + "step": 486370 + }, + { + "epoch": 0.982518372475426, + "grad_norm": 0.0, + "learning_rate": 2.2586123728781663e-08, + "loss": 16.5778, + "step": 486380 + }, + { + "epoch": 0.9825385731081098, + "grad_norm": 275.6081848144531, + "learning_rate": 2.255299473671202e-08, + "loss": 18.4161, + "step": 486390 + }, + { + "epoch": 0.9825587737407936, + "grad_norm": 154.0469970703125, + "learning_rate": 2.251989000411514e-08, + "loss": 24.1468, + "step": 486400 + }, + { + "epoch": 0.9825789743734774, + "grad_norm": 495.96075439453125, + "learning_rate": 2.2486809531152563e-08, + "loss": 8.1208, + "step": 486410 + }, + { + "epoch": 0.9825991750061612, + "grad_norm": 195.0969696044922, + "learning_rate": 2.2453753317985272e-08, + "loss": 18.1736, + "step": 486420 + }, + { + "epoch": 0.982619375638845, + "grad_norm": 304.2983703613281, + "learning_rate": 2.2420721364775354e-08, + "loss": 23.7621, + "step": 486430 + }, + { + "epoch": 0.9826395762715289, + "grad_norm": 256.87933349609375, + "learning_rate": 2.2387713671682687e-08, + "loss": 25.3219, + "step": 486440 + }, + { + "epoch": 0.9826597769042127, + "grad_norm": 311.9479064941406, + "learning_rate": 2.2354730238868804e-08, + "loss": 21.947, + "step": 486450 + }, + { + "epoch": 0.9826799775368965, + "grad_norm": 153.4099578857422, + "learning_rate": 2.2321771066494137e-08, + "loss": 13.7153, + "step": 486460 + }, + { + "epoch": 0.9827001781695802, + "grad_norm": 1512.3955078125, + "learning_rate": 2.2288836154719663e-08, + "loss": 22.5255, + "step": 486470 + }, + { + "epoch": 0.982720378802264, + "grad_norm": 176.42466735839844, + "learning_rate": 2.2255925503705255e-08, + "loss": 13.9224, + "step": 486480 + }, + { + "epoch": 0.9827405794349479, + "grad_norm": 140.4485626220703, + "learning_rate": 2.22230391136119e-08, + "loss": 11.1048, + "step": 486490 + }, + { + "epoch": 0.9827607800676317, + "grad_norm": 356.75421142578125, + "learning_rate": 2.219017698460002e-08, + "loss": 19.3174, + "step": 486500 + }, + { + "epoch": 0.9827809807003155, + "grad_norm": 216.4697723388672, + "learning_rate": 2.215733911682949e-08, + "loss": 13.9558, + "step": 486510 + }, + { + "epoch": 0.9828011813329993, + "grad_norm": 160.7062530517578, + "learning_rate": 2.2124525510459627e-08, + "loss": 12.9543, + "step": 486520 + }, + { + "epoch": 0.9828213819656831, + "grad_norm": 118.80420684814453, + "learning_rate": 2.2091736165651966e-08, + "loss": 6.4347, + "step": 486530 + }, + { + "epoch": 0.982841582598367, + "grad_norm": 285.1014099121094, + "learning_rate": 2.205897108256472e-08, + "loss": 29.0186, + "step": 486540 + }, + { + "epoch": 0.9828617832310508, + "grad_norm": 67.74488067626953, + "learning_rate": 2.202623026135886e-08, + "loss": 13.3825, + "step": 486550 + }, + { + "epoch": 0.9828819838637346, + "grad_norm": 233.2350311279297, + "learning_rate": 2.1993513702193157e-08, + "loss": 18.1295, + "step": 486560 + }, + { + "epoch": 0.9829021844964184, + "grad_norm": 86.41580200195312, + "learning_rate": 2.1960821405226928e-08, + "loss": 17.6475, + "step": 486570 + }, + { + "epoch": 0.9829223851291022, + "grad_norm": 145.8586883544922, + "learning_rate": 2.1928153370620598e-08, + "loss": 19.6319, + "step": 486580 + }, + { + "epoch": 0.9829425857617861, + "grad_norm": 189.52638244628906, + "learning_rate": 2.1895509598532372e-08, + "loss": 23.1163, + "step": 486590 + }, + { + "epoch": 0.9829627863944699, + "grad_norm": 166.6016082763672, + "learning_rate": 2.1862890089121567e-08, + "loss": 22.5704, + "step": 486600 + }, + { + "epoch": 0.9829829870271537, + "grad_norm": 108.01043701171875, + "learning_rate": 2.1830294842547506e-08, + "loss": 8.6083, + "step": 486610 + }, + { + "epoch": 0.9830031876598375, + "grad_norm": 152.20062255859375, + "learning_rate": 2.1797723858968388e-08, + "loss": 11.4041, + "step": 486620 + }, + { + "epoch": 0.9830233882925213, + "grad_norm": 445.4767761230469, + "learning_rate": 2.1765177138543535e-08, + "loss": 19.9943, + "step": 486630 + }, + { + "epoch": 0.9830435889252052, + "grad_norm": 690.5706176757812, + "learning_rate": 2.173265468143171e-08, + "loss": 34.0933, + "step": 486640 + }, + { + "epoch": 0.983063789557889, + "grad_norm": 203.51470947265625, + "learning_rate": 2.1700156487790557e-08, + "loss": 12.4785, + "step": 486650 + }, + { + "epoch": 0.9830839901905728, + "grad_norm": 166.11949157714844, + "learning_rate": 2.1667682557779958e-08, + "loss": 20.8026, + "step": 486660 + }, + { + "epoch": 0.9831041908232566, + "grad_norm": 349.7200012207031, + "learning_rate": 2.1635232891556446e-08, + "loss": 10.3085, + "step": 486670 + }, + { + "epoch": 0.9831243914559404, + "grad_norm": 654.8858032226562, + "learning_rate": 2.1602807489279344e-08, + "loss": 14.9182, + "step": 486680 + }, + { + "epoch": 0.9831445920886243, + "grad_norm": 538.98876953125, + "learning_rate": 2.1570406351106298e-08, + "loss": 21.2531, + "step": 486690 + }, + { + "epoch": 0.9831647927213081, + "grad_norm": 265.0469970703125, + "learning_rate": 2.1538029477195522e-08, + "loss": 16.0271, + "step": 486700 + }, + { + "epoch": 0.9831849933539919, + "grad_norm": 147.05458068847656, + "learning_rate": 2.1505676867704105e-08, + "loss": 15.6037, + "step": 486710 + }, + { + "epoch": 0.9832051939866756, + "grad_norm": 167.15638732910156, + "learning_rate": 2.1473348522790262e-08, + "loss": 13.1396, + "step": 486720 + }, + { + "epoch": 0.9832253946193594, + "grad_norm": 405.22979736328125, + "learning_rate": 2.1441044442611634e-08, + "loss": 22.5856, + "step": 486730 + }, + { + "epoch": 0.9832455952520432, + "grad_norm": 23.794240951538086, + "learning_rate": 2.1408764627325883e-08, + "loss": 14.2671, + "step": 486740 + }, + { + "epoch": 0.9832657958847271, + "grad_norm": 264.0993957519531, + "learning_rate": 2.1376509077089546e-08, + "loss": 17.3019, + "step": 486750 + }, + { + "epoch": 0.9832859965174109, + "grad_norm": 236.44725036621094, + "learning_rate": 2.1344277792060275e-08, + "loss": 12.0019, + "step": 486760 + }, + { + "epoch": 0.9833061971500947, + "grad_norm": 31.286575317382812, + "learning_rate": 2.1312070772395165e-08, + "loss": 13.8529, + "step": 486770 + }, + { + "epoch": 0.9833263977827785, + "grad_norm": 69.27201080322266, + "learning_rate": 2.1279888018251317e-08, + "loss": 21.4398, + "step": 486780 + }, + { + "epoch": 0.9833465984154623, + "grad_norm": 474.1777038574219, + "learning_rate": 2.1247729529785822e-08, + "loss": 19.1745, + "step": 486790 + }, + { + "epoch": 0.9833667990481462, + "grad_norm": 66.36163330078125, + "learning_rate": 2.1215595307154667e-08, + "loss": 9.6001, + "step": 486800 + }, + { + "epoch": 0.98338699968083, + "grad_norm": 378.3507385253906, + "learning_rate": 2.1183485350514397e-08, + "loss": 12.8704, + "step": 486810 + }, + { + "epoch": 0.9834072003135138, + "grad_norm": 382.94683837890625, + "learning_rate": 2.1151399660022664e-08, + "loss": 16.8565, + "step": 486820 + }, + { + "epoch": 0.9834274009461976, + "grad_norm": 455.8929748535156, + "learning_rate": 2.1119338235834897e-08, + "loss": 13.8287, + "step": 486830 + }, + { + "epoch": 0.9834476015788814, + "grad_norm": 188.6612091064453, + "learning_rate": 2.1087301078107637e-08, + "loss": 18.3839, + "step": 486840 + }, + { + "epoch": 0.9834678022115653, + "grad_norm": 188.607666015625, + "learning_rate": 2.105528818699687e-08, + "loss": 15.9933, + "step": 486850 + }, + { + "epoch": 0.9834880028442491, + "grad_norm": 273.63031005859375, + "learning_rate": 2.1023299562658584e-08, + "loss": 11.8442, + "step": 486860 + }, + { + "epoch": 0.9835082034769329, + "grad_norm": 0.0, + "learning_rate": 2.0991335205249318e-08, + "loss": 15.2303, + "step": 486870 + }, + { + "epoch": 0.9835284041096167, + "grad_norm": 430.9086608886719, + "learning_rate": 2.0959395114923954e-08, + "loss": 18.0465, + "step": 486880 + }, + { + "epoch": 0.9835486047423005, + "grad_norm": 1019.9697265625, + "learning_rate": 2.0927479291839024e-08, + "loss": 18.0296, + "step": 486890 + }, + { + "epoch": 0.9835688053749844, + "grad_norm": 237.91407775878906, + "learning_rate": 2.0895587736149414e-08, + "loss": 17.7302, + "step": 486900 + }, + { + "epoch": 0.9835890060076682, + "grad_norm": 375.41259765625, + "learning_rate": 2.0863720448011106e-08, + "loss": 11.8425, + "step": 486910 + }, + { + "epoch": 0.983609206640352, + "grad_norm": 411.8083190917969, + "learning_rate": 2.0831877427578974e-08, + "loss": 17.8789, + "step": 486920 + }, + { + "epoch": 0.9836294072730358, + "grad_norm": 517.0942993164062, + "learning_rate": 2.0800058675007894e-08, + "loss": 27.8522, + "step": 486930 + }, + { + "epoch": 0.9836496079057196, + "grad_norm": 201.32533264160156, + "learning_rate": 2.076826419045386e-08, + "loss": 12.8727, + "step": 486940 + }, + { + "epoch": 0.9836698085384035, + "grad_norm": 190.80589294433594, + "learning_rate": 2.0736493974071736e-08, + "loss": 18.4179, + "step": 486950 + }, + { + "epoch": 0.9836900091710873, + "grad_norm": 454.38873291015625, + "learning_rate": 2.0704748026015298e-08, + "loss": 12.353, + "step": 486960 + }, + { + "epoch": 0.9837102098037711, + "grad_norm": 261.8936767578125, + "learning_rate": 2.0673026346440526e-08, + "loss": 20.5521, + "step": 486970 + }, + { + "epoch": 0.9837304104364548, + "grad_norm": 331.35302734375, + "learning_rate": 2.0641328935501748e-08, + "loss": 34.3284, + "step": 486980 + }, + { + "epoch": 0.9837506110691386, + "grad_norm": 309.5794372558594, + "learning_rate": 2.0609655793352724e-08, + "loss": 17.0877, + "step": 486990 + }, + { + "epoch": 0.9837708117018225, + "grad_norm": 479.65631103515625, + "learning_rate": 2.057800692014833e-08, + "loss": 17.5659, + "step": 487000 + }, + { + "epoch": 0.9837910123345063, + "grad_norm": 251.328369140625, + "learning_rate": 2.054638231604289e-08, + "loss": 18.0924, + "step": 487010 + }, + { + "epoch": 0.9838112129671901, + "grad_norm": 547.3744506835938, + "learning_rate": 2.051478198119017e-08, + "loss": 17.2195, + "step": 487020 + }, + { + "epoch": 0.9838314135998739, + "grad_norm": 148.19195556640625, + "learning_rate": 2.0483205915745042e-08, + "loss": 20.8214, + "step": 487030 + }, + { + "epoch": 0.9838516142325577, + "grad_norm": 314.1408386230469, + "learning_rate": 2.0451654119860164e-08, + "loss": 19.398, + "step": 487040 + }, + { + "epoch": 0.9838718148652416, + "grad_norm": 207.7755584716797, + "learning_rate": 2.0420126593690416e-08, + "loss": 11.6213, + "step": 487050 + }, + { + "epoch": 0.9838920154979254, + "grad_norm": 164.70834350585938, + "learning_rate": 2.0388623337389003e-08, + "loss": 28.0964, + "step": 487060 + }, + { + "epoch": 0.9839122161306092, + "grad_norm": 182.27505493164062, + "learning_rate": 2.0357144351109693e-08, + "loss": 11.2916, + "step": 487070 + }, + { + "epoch": 0.983932416763293, + "grad_norm": 1601.4581298828125, + "learning_rate": 2.0325689635005142e-08, + "loss": 22.9281, + "step": 487080 + }, + { + "epoch": 0.9839526173959768, + "grad_norm": 120.6426773071289, + "learning_rate": 2.029425918922967e-08, + "loss": 13.2163, + "step": 487090 + }, + { + "epoch": 0.9839728180286607, + "grad_norm": 179.578369140625, + "learning_rate": 2.026285301393538e-08, + "loss": 12.8361, + "step": 487100 + }, + { + "epoch": 0.9839930186613445, + "grad_norm": 227.57310485839844, + "learning_rate": 2.023147110927659e-08, + "loss": 12.8364, + "step": 487110 + }, + { + "epoch": 0.9840132192940283, + "grad_norm": 216.75086975097656, + "learning_rate": 2.020011347540596e-08, + "loss": 17.3644, + "step": 487120 + }, + { + "epoch": 0.9840334199267121, + "grad_norm": 250.1388702392578, + "learning_rate": 2.016878011247503e-08, + "loss": 20.2271, + "step": 487130 + }, + { + "epoch": 0.9840536205593959, + "grad_norm": 75.4766616821289, + "learning_rate": 2.013747102063812e-08, + "loss": 10.7925, + "step": 487140 + }, + { + "epoch": 0.9840738211920798, + "grad_norm": 337.7696838378906, + "learning_rate": 2.010618620004734e-08, + "loss": 17.7548, + "step": 487150 + }, + { + "epoch": 0.9840940218247636, + "grad_norm": 175.10000610351562, + "learning_rate": 2.0074925650854226e-08, + "loss": 20.6679, + "step": 487160 + }, + { + "epoch": 0.9841142224574474, + "grad_norm": 113.11949920654297, + "learning_rate": 2.004368937321255e-08, + "loss": 10.9412, + "step": 487170 + }, + { + "epoch": 0.9841344230901312, + "grad_norm": 260.4883117675781, + "learning_rate": 2.0012477367273854e-08, + "loss": 11.9149, + "step": 487180 + }, + { + "epoch": 0.984154623722815, + "grad_norm": 276.24774169921875, + "learning_rate": 1.9981289633190237e-08, + "loss": 19.1066, + "step": 487190 + }, + { + "epoch": 0.9841748243554989, + "grad_norm": 267.4715881347656, + "learning_rate": 1.995012617111436e-08, + "loss": 13.6057, + "step": 487200 + }, + { + "epoch": 0.9841950249881827, + "grad_norm": 382.16961669921875, + "learning_rate": 1.9918986981196653e-08, + "loss": 21.8432, + "step": 487210 + }, + { + "epoch": 0.9842152256208665, + "grad_norm": 321.4910583496094, + "learning_rate": 1.988787206359033e-08, + "loss": 17.212, + "step": 487220 + }, + { + "epoch": 0.9842354262535503, + "grad_norm": 383.756591796875, + "learning_rate": 1.985678141844638e-08, + "loss": 13.3955, + "step": 487230 + }, + { + "epoch": 0.984255626886234, + "grad_norm": 275.9746398925781, + "learning_rate": 1.9825715045916905e-08, + "loss": 20.2973, + "step": 487240 + }, + { + "epoch": 0.9842758275189178, + "grad_norm": 169.0533447265625, + "learning_rate": 1.9794672946152337e-08, + "loss": 17.8191, + "step": 487250 + }, + { + "epoch": 0.9842960281516017, + "grad_norm": 301.2450866699219, + "learning_rate": 1.9763655119304227e-08, + "loss": 11.1227, + "step": 487260 + }, + { + "epoch": 0.9843162287842855, + "grad_norm": 179.3243865966797, + "learning_rate": 1.973266156552467e-08, + "loss": 20.714, + "step": 487270 + }, + { + "epoch": 0.9843364294169693, + "grad_norm": 629.1900024414062, + "learning_rate": 1.9701692284963547e-08, + "loss": 21.4526, + "step": 487280 + }, + { + "epoch": 0.9843566300496531, + "grad_norm": 495.31561279296875, + "learning_rate": 1.967074727777296e-08, + "loss": 26.6525, + "step": 487290 + }, + { + "epoch": 0.984376830682337, + "grad_norm": 18.64380645751953, + "learning_rate": 1.963982654410279e-08, + "loss": 11.236, + "step": 487300 + }, + { + "epoch": 0.9843970313150208, + "grad_norm": 253.85336303710938, + "learning_rate": 1.9608930084104027e-08, + "loss": 13.1734, + "step": 487310 + }, + { + "epoch": 0.9844172319477046, + "grad_norm": 320.97882080078125, + "learning_rate": 1.9578057897927104e-08, + "loss": 21.7728, + "step": 487320 + }, + { + "epoch": 0.9844374325803884, + "grad_norm": 320.5849914550781, + "learning_rate": 1.9547209985723015e-08, + "loss": 15.2387, + "step": 487330 + }, + { + "epoch": 0.9844576332130722, + "grad_norm": 160.2686767578125, + "learning_rate": 1.9516386347641636e-08, + "loss": 30.8996, + "step": 487340 + }, + { + "epoch": 0.984477833845756, + "grad_norm": 55.73615646362305, + "learning_rate": 1.9485586983833404e-08, + "loss": 16.7139, + "step": 487350 + }, + { + "epoch": 0.9844980344784399, + "grad_norm": 243.82052612304688, + "learning_rate": 1.94548118944482e-08, + "loss": 8.9158, + "step": 487360 + }, + { + "epoch": 0.9845182351111237, + "grad_norm": 200.86695861816406, + "learning_rate": 1.9424061079636458e-08, + "loss": 20.3272, + "step": 487370 + }, + { + "epoch": 0.9845384357438075, + "grad_norm": 431.8441162109375, + "learning_rate": 1.9393334539547505e-08, + "loss": 18.077, + "step": 487380 + }, + { + "epoch": 0.9845586363764913, + "grad_norm": 498.93487548828125, + "learning_rate": 1.9362632274331215e-08, + "loss": 24.6046, + "step": 487390 + }, + { + "epoch": 0.9845788370091751, + "grad_norm": 226.7593536376953, + "learning_rate": 1.9331954284137476e-08, + "loss": 11.3064, + "step": 487400 + }, + { + "epoch": 0.984599037641859, + "grad_norm": 512.7572021484375, + "learning_rate": 1.9301300569116165e-08, + "loss": 10.7101, + "step": 487410 + }, + { + "epoch": 0.9846192382745428, + "grad_norm": 370.7438049316406, + "learning_rate": 1.9270671129415496e-08, + "loss": 23.7001, + "step": 487420 + }, + { + "epoch": 0.9846394389072266, + "grad_norm": 154.5172119140625, + "learning_rate": 1.9240065965185907e-08, + "loss": 13.9947, + "step": 487430 + }, + { + "epoch": 0.9846596395399104, + "grad_norm": 373.3928527832031, + "learning_rate": 1.9209485076576718e-08, + "loss": 19.3025, + "step": 487440 + }, + { + "epoch": 0.9846798401725942, + "grad_norm": 524.5455322265625, + "learning_rate": 1.9178928463735593e-08, + "loss": 17.3592, + "step": 487450 + }, + { + "epoch": 0.9847000408052781, + "grad_norm": 0.0, + "learning_rate": 1.9148396126812407e-08, + "loss": 21.9097, + "step": 487460 + }, + { + "epoch": 0.9847202414379619, + "grad_norm": 153.7161102294922, + "learning_rate": 1.9117888065955938e-08, + "loss": 7.4523, + "step": 487470 + }, + { + "epoch": 0.9847404420706457, + "grad_norm": 413.46221923828125, + "learning_rate": 1.908740428131495e-08, + "loss": 15.7865, + "step": 487480 + }, + { + "epoch": 0.9847606427033294, + "grad_norm": 6.001185894012451, + "learning_rate": 1.9056944773037656e-08, + "loss": 14.429, + "step": 487490 + }, + { + "epoch": 0.9847808433360132, + "grad_norm": 147.7904510498047, + "learning_rate": 1.9026509541272276e-08, + "loss": 12.247, + "step": 487500 + }, + { + "epoch": 0.984801043968697, + "grad_norm": 375.27838134765625, + "learning_rate": 1.8996098586168132e-08, + "loss": 8.6651, + "step": 487510 + }, + { + "epoch": 0.9848212446013809, + "grad_norm": 250.44415283203125, + "learning_rate": 1.8965711907872885e-08, + "loss": 23.1334, + "step": 487520 + }, + { + "epoch": 0.9848414452340647, + "grad_norm": 329.9169006347656, + "learning_rate": 1.8935349506534195e-08, + "loss": 24.2585, + "step": 487530 + }, + { + "epoch": 0.9848616458667485, + "grad_norm": 485.7986755371094, + "learning_rate": 1.890501138230083e-08, + "loss": 28.2031, + "step": 487540 + }, + { + "epoch": 0.9848818464994323, + "grad_norm": 291.9529724121094, + "learning_rate": 1.8874697535319897e-08, + "loss": 14.7063, + "step": 487550 + }, + { + "epoch": 0.9849020471321162, + "grad_norm": 431.1380615234375, + "learning_rate": 1.8844407965740165e-08, + "loss": 19.2102, + "step": 487560 + }, + { + "epoch": 0.9849222477648, + "grad_norm": 294.2987976074219, + "learning_rate": 1.881414267370818e-08, + "loss": 26.2314, + "step": 487570 + }, + { + "epoch": 0.9849424483974838, + "grad_norm": 241.67819213867188, + "learning_rate": 1.8783901659372162e-08, + "loss": 12.028, + "step": 487580 + }, + { + "epoch": 0.9849626490301676, + "grad_norm": 168.99851989746094, + "learning_rate": 1.875368492287921e-08, + "loss": 13.3972, + "step": 487590 + }, + { + "epoch": 0.9849828496628514, + "grad_norm": 395.69635009765625, + "learning_rate": 1.8723492464376992e-08, + "loss": 16.6246, + "step": 487600 + }, + { + "epoch": 0.9850030502955353, + "grad_norm": 239.0929412841797, + "learning_rate": 1.8693324284011495e-08, + "loss": 30.1394, + "step": 487610 + }, + { + "epoch": 0.9850232509282191, + "grad_norm": 283.23016357421875, + "learning_rate": 1.8663180381931488e-08, + "loss": 18.1366, + "step": 487620 + }, + { + "epoch": 0.9850434515609029, + "grad_norm": 0.0, + "learning_rate": 1.8633060758282418e-08, + "loss": 13.3518, + "step": 487630 + }, + { + "epoch": 0.9850636521935867, + "grad_norm": 390.0324401855469, + "learning_rate": 1.860296541321138e-08, + "loss": 14.8397, + "step": 487640 + }, + { + "epoch": 0.9850838528262705, + "grad_norm": 193.6684112548828, + "learning_rate": 1.8572894346866043e-08, + "loss": 6.3827, + "step": 487650 + }, + { + "epoch": 0.9851040534589544, + "grad_norm": 249.24124145507812, + "learning_rate": 1.854284755939184e-08, + "loss": 32.1922, + "step": 487660 + }, + { + "epoch": 0.9851242540916382, + "grad_norm": 5.052976131439209, + "learning_rate": 1.8512825050935323e-08, + "loss": 14.5054, + "step": 487670 + }, + { + "epoch": 0.985144454724322, + "grad_norm": 251.10342407226562, + "learning_rate": 1.8482826821643596e-08, + "loss": 36.6727, + "step": 487680 + }, + { + "epoch": 0.9851646553570058, + "grad_norm": 87.64988708496094, + "learning_rate": 1.8452852871662653e-08, + "loss": 8.2042, + "step": 487690 + }, + { + "epoch": 0.9851848559896896, + "grad_norm": 18.71599006652832, + "learning_rate": 1.842290320113793e-08, + "loss": 21.0584, + "step": 487700 + }, + { + "epoch": 0.9852050566223735, + "grad_norm": 276.3898620605469, + "learning_rate": 1.839297781021543e-08, + "loss": 24.6668, + "step": 487710 + }, + { + "epoch": 0.9852252572550573, + "grad_norm": 216.9844512939453, + "learning_rate": 1.8363076699041695e-08, + "loss": 16.0389, + "step": 487720 + }, + { + "epoch": 0.9852454578877411, + "grad_norm": 240.8494415283203, + "learning_rate": 1.8333199867762163e-08, + "loss": 15.7165, + "step": 487730 + }, + { + "epoch": 0.9852656585204249, + "grad_norm": 479.3291931152344, + "learning_rate": 1.830334731652228e-08, + "loss": 24.8963, + "step": 487740 + }, + { + "epoch": 0.9852858591531086, + "grad_norm": 6.0796098709106445, + "learning_rate": 1.8273519045468035e-08, + "loss": 11.4292, + "step": 487750 + }, + { + "epoch": 0.9853060597857924, + "grad_norm": 55.927310943603516, + "learning_rate": 1.8243715054744315e-08, + "loss": 19.4176, + "step": 487760 + }, + { + "epoch": 0.9853262604184763, + "grad_norm": 225.20668029785156, + "learning_rate": 1.8213935344496002e-08, + "loss": 12.6681, + "step": 487770 + }, + { + "epoch": 0.9853464610511601, + "grad_norm": 70.58676147460938, + "learning_rate": 1.8184179914869093e-08, + "loss": 10.7999, + "step": 487780 + }, + { + "epoch": 0.9853666616838439, + "grad_norm": 230.5763702392578, + "learning_rate": 1.815444876600847e-08, + "loss": 12.6158, + "step": 487790 + }, + { + "epoch": 0.9853868623165277, + "grad_norm": 47.597259521484375, + "learning_rate": 1.8124741898058462e-08, + "loss": 15.8652, + "step": 487800 + }, + { + "epoch": 0.9854070629492115, + "grad_norm": 160.1806182861328, + "learning_rate": 1.8095059311164508e-08, + "loss": 15.9682, + "step": 487810 + }, + { + "epoch": 0.9854272635818954, + "grad_norm": 55.63274002075195, + "learning_rate": 1.8065401005470938e-08, + "loss": 17.0484, + "step": 487820 + }, + { + "epoch": 0.9854474642145792, + "grad_norm": 624.5726928710938, + "learning_rate": 1.803576698112264e-08, + "loss": 20.1198, + "step": 487830 + }, + { + "epoch": 0.985467664847263, + "grad_norm": 268.1905517578125, + "learning_rate": 1.8006157238263376e-08, + "loss": 22.8521, + "step": 487840 + }, + { + "epoch": 0.9854878654799468, + "grad_norm": 285.8241271972656, + "learning_rate": 1.7976571777038044e-08, + "loss": 14.8861, + "step": 487850 + }, + { + "epoch": 0.9855080661126306, + "grad_norm": 328.4594421386719, + "learning_rate": 1.7947010597590408e-08, + "loss": 6.3186, + "step": 487860 + }, + { + "epoch": 0.9855282667453145, + "grad_norm": 341.7926940917969, + "learning_rate": 1.791747370006536e-08, + "loss": 21.5587, + "step": 487870 + }, + { + "epoch": 0.9855484673779983, + "grad_norm": 234.67457580566406, + "learning_rate": 1.7887961084605554e-08, + "loss": 18.5226, + "step": 487880 + }, + { + "epoch": 0.9855686680106821, + "grad_norm": 202.00758361816406, + "learning_rate": 1.7858472751355883e-08, + "loss": 19.0618, + "step": 487890 + }, + { + "epoch": 0.9855888686433659, + "grad_norm": 240.0037384033203, + "learning_rate": 1.7829008700460116e-08, + "loss": 26.7407, + "step": 487900 + }, + { + "epoch": 0.9856090692760497, + "grad_norm": 428.3014831542969, + "learning_rate": 1.779956893206092e-08, + "loss": 24.3507, + "step": 487910 + }, + { + "epoch": 0.9856292699087336, + "grad_norm": 277.4865417480469, + "learning_rate": 1.7770153446302618e-08, + "loss": 16.4346, + "step": 487920 + }, + { + "epoch": 0.9856494705414174, + "grad_norm": 143.16390991210938, + "learning_rate": 1.7740762243328435e-08, + "loss": 15.1754, + "step": 487930 + }, + { + "epoch": 0.9856696711741012, + "grad_norm": 261.26666259765625, + "learning_rate": 1.7711395323281588e-08, + "loss": 17.8769, + "step": 487940 + }, + { + "epoch": 0.985689871806785, + "grad_norm": 332.67401123046875, + "learning_rate": 1.768205268630474e-08, + "loss": 11.723, + "step": 487950 + }, + { + "epoch": 0.9857100724394688, + "grad_norm": 798.5401000976562, + "learning_rate": 1.765273433254111e-08, + "loss": 12.4922, + "step": 487960 + }, + { + "epoch": 0.9857302730721527, + "grad_norm": 474.3741149902344, + "learning_rate": 1.7623440262134472e-08, + "loss": 28.1277, + "step": 487970 + }, + { + "epoch": 0.9857504737048365, + "grad_norm": 431.0119934082031, + "learning_rate": 1.759417047522638e-08, + "loss": 21.4646, + "step": 487980 + }, + { + "epoch": 0.9857706743375203, + "grad_norm": 162.62039184570312, + "learning_rate": 1.756492497196005e-08, + "loss": 26.5215, + "step": 487990 + }, + { + "epoch": 0.985790874970204, + "grad_norm": 321.650634765625, + "learning_rate": 1.753570375247815e-08, + "loss": 21.9883, + "step": 488000 + }, + { + "epoch": 0.9858110756028878, + "grad_norm": 0.0, + "learning_rate": 1.7506506816923342e-08, + "loss": 25.8632, + "step": 488010 + }, + { + "epoch": 0.9858312762355717, + "grad_norm": 307.39825439453125, + "learning_rate": 1.747733416543662e-08, + "loss": 9.4321, + "step": 488020 + }, + { + "epoch": 0.9858514768682555, + "grad_norm": 279.2649841308594, + "learning_rate": 1.7448185798161765e-08, + "loss": 11.7379, + "step": 488030 + }, + { + "epoch": 0.9858716775009393, + "grad_norm": 608.5477294921875, + "learning_rate": 1.741906171523977e-08, + "loss": 22.1897, + "step": 488040 + }, + { + "epoch": 0.9858918781336231, + "grad_norm": 325.70849609375, + "learning_rate": 1.73899619168133e-08, + "loss": 11.8657, + "step": 488050 + }, + { + "epoch": 0.9859120787663069, + "grad_norm": 14.203507423400879, + "learning_rate": 1.7360886403023358e-08, + "loss": 8.2065, + "step": 488060 + }, + { + "epoch": 0.9859322793989908, + "grad_norm": 117.48062133789062, + "learning_rate": 1.7331835174012602e-08, + "loss": 14.0592, + "step": 488070 + }, + { + "epoch": 0.9859524800316746, + "grad_norm": 118.30371856689453, + "learning_rate": 1.7302808229921476e-08, + "loss": 9.5321, + "step": 488080 + }, + { + "epoch": 0.9859726806643584, + "grad_norm": 0.0, + "learning_rate": 1.7273805570892643e-08, + "loss": 17.0119, + "step": 488090 + }, + { + "epoch": 0.9859928812970422, + "grad_norm": 230.1846466064453, + "learning_rate": 1.7244827197067103e-08, + "loss": 11.6632, + "step": 488100 + }, + { + "epoch": 0.986013081929726, + "grad_norm": 950.8536987304688, + "learning_rate": 1.7215873108585858e-08, + "loss": 39.8514, + "step": 488110 + }, + { + "epoch": 0.9860332825624099, + "grad_norm": 357.2371520996094, + "learning_rate": 1.71869433055899e-08, + "loss": 15.9341, + "step": 488120 + }, + { + "epoch": 0.9860534831950937, + "grad_norm": 358.3096618652344, + "learning_rate": 1.7158037788220782e-08, + "loss": 13.2133, + "step": 488130 + }, + { + "epoch": 0.9860736838277775, + "grad_norm": 325.31451416015625, + "learning_rate": 1.7129156556618398e-08, + "loss": 22.9959, + "step": 488140 + }, + { + "epoch": 0.9860938844604613, + "grad_norm": 290.09014892578125, + "learning_rate": 1.7100299610924297e-08, + "loss": 15.1126, + "step": 488150 + }, + { + "epoch": 0.9861140850931451, + "grad_norm": 303.61358642578125, + "learning_rate": 1.707146695127948e-08, + "loss": 9.4876, + "step": 488160 + }, + { + "epoch": 0.986134285725829, + "grad_norm": 228.52444458007812, + "learning_rate": 1.7042658577823833e-08, + "loss": 31.0255, + "step": 488170 + }, + { + "epoch": 0.9861544863585128, + "grad_norm": 215.50685119628906, + "learning_rate": 1.7013874490697802e-08, + "loss": 10.9896, + "step": 488180 + }, + { + "epoch": 0.9861746869911966, + "grad_norm": 194.21315002441406, + "learning_rate": 1.6985114690041825e-08, + "loss": 18.2948, + "step": 488190 + }, + { + "epoch": 0.9861948876238804, + "grad_norm": 199.46400451660156, + "learning_rate": 1.6956379175995796e-08, + "loss": 6.3744, + "step": 488200 + }, + { + "epoch": 0.9862150882565642, + "grad_norm": 154.3696746826172, + "learning_rate": 1.6927667948700155e-08, + "loss": 12.2599, + "step": 488210 + }, + { + "epoch": 0.986235288889248, + "grad_norm": 0.0, + "learning_rate": 1.689898100829479e-08, + "loss": 11.7021, + "step": 488220 + }, + { + "epoch": 0.9862554895219319, + "grad_norm": 373.691162109375, + "learning_rate": 1.687031835491959e-08, + "loss": 24.5346, + "step": 488230 + }, + { + "epoch": 0.9862756901546157, + "grad_norm": 129.77389526367188, + "learning_rate": 1.6841679988713332e-08, + "loss": 13.8806, + "step": 488240 + }, + { + "epoch": 0.9862958907872995, + "grad_norm": 200.0734100341797, + "learning_rate": 1.681306590981702e-08, + "loss": 20.9077, + "step": 488250 + }, + { + "epoch": 0.9863160914199832, + "grad_norm": 319.417236328125, + "learning_rate": 1.678447611836942e-08, + "loss": 20.4569, + "step": 488260 + }, + { + "epoch": 0.986336292052667, + "grad_norm": 203.24192810058594, + "learning_rate": 1.6755910614509872e-08, + "loss": 13.9135, + "step": 488270 + }, + { + "epoch": 0.9863564926853509, + "grad_norm": 249.82212829589844, + "learning_rate": 1.6727369398377158e-08, + "loss": 19.4762, + "step": 488280 + }, + { + "epoch": 0.9863766933180347, + "grad_norm": 372.3239440917969, + "learning_rate": 1.669885247011116e-08, + "loss": 11.3219, + "step": 488290 + }, + { + "epoch": 0.9863968939507185, + "grad_norm": 346.02032470703125, + "learning_rate": 1.6670359829850657e-08, + "loss": 23.3714, + "step": 488300 + }, + { + "epoch": 0.9864170945834023, + "grad_norm": 183.74671936035156, + "learning_rate": 1.664189147773443e-08, + "loss": 11.1664, + "step": 488310 + }, + { + "epoch": 0.9864372952160861, + "grad_norm": 141.95510864257812, + "learning_rate": 1.6613447413900696e-08, + "loss": 17.8322, + "step": 488320 + }, + { + "epoch": 0.98645749584877, + "grad_norm": 208.08604431152344, + "learning_rate": 1.6585027638489347e-08, + "loss": 17.2616, + "step": 488330 + }, + { + "epoch": 0.9864776964814538, + "grad_norm": 189.37168884277344, + "learning_rate": 1.655663215163805e-08, + "loss": 8.0663, + "step": 488340 + }, + { + "epoch": 0.9864978971141376, + "grad_norm": 154.3152313232422, + "learning_rate": 1.6528260953484476e-08, + "loss": 16.4016, + "step": 488350 + }, + { + "epoch": 0.9865180977468214, + "grad_norm": 193.22824096679688, + "learning_rate": 1.6499914044168508e-08, + "loss": 13.7439, + "step": 488360 + }, + { + "epoch": 0.9865382983795052, + "grad_norm": 146.596435546875, + "learning_rate": 1.6471591423827817e-08, + "loss": 14.3777, + "step": 488370 + }, + { + "epoch": 0.9865584990121891, + "grad_norm": 277.32025146484375, + "learning_rate": 1.644329309259951e-08, + "loss": 23.4403, + "step": 488380 + }, + { + "epoch": 0.9865786996448729, + "grad_norm": 207.35980224609375, + "learning_rate": 1.6415019050622373e-08, + "loss": 20.5025, + "step": 488390 + }, + { + "epoch": 0.9865989002775567, + "grad_norm": 347.7386779785156, + "learning_rate": 1.6386769298034067e-08, + "loss": 16.721, + "step": 488400 + }, + { + "epoch": 0.9866191009102405, + "grad_norm": 334.52081298828125, + "learning_rate": 1.635854383497226e-08, + "loss": 10.623, + "step": 488410 + }, + { + "epoch": 0.9866393015429243, + "grad_norm": 184.0161590576172, + "learning_rate": 1.6330342661574072e-08, + "loss": 8.1553, + "step": 488420 + }, + { + "epoch": 0.9866595021756082, + "grad_norm": 477.0707092285156, + "learning_rate": 1.6302165777977718e-08, + "loss": 15.1807, + "step": 488430 + }, + { + "epoch": 0.986679702808292, + "grad_norm": 603.2002563476562, + "learning_rate": 1.6274013184319757e-08, + "loss": 23.8915, + "step": 488440 + }, + { + "epoch": 0.9866999034409758, + "grad_norm": 259.2967224121094, + "learning_rate": 1.6245884880738415e-08, + "loss": 9.3005, + "step": 488450 + }, + { + "epoch": 0.9867201040736596, + "grad_norm": 37.519474029541016, + "learning_rate": 1.621778086736969e-08, + "loss": 17.446, + "step": 488460 + }, + { + "epoch": 0.9867403047063434, + "grad_norm": 290.7611999511719, + "learning_rate": 1.6189701144351254e-08, + "loss": 18.5315, + "step": 488470 + }, + { + "epoch": 0.9867605053390273, + "grad_norm": 635.7722778320312, + "learning_rate": 1.6161645711819664e-08, + "loss": 28.1317, + "step": 488480 + }, + { + "epoch": 0.9867807059717111, + "grad_norm": 97.23357391357422, + "learning_rate": 1.6133614569912027e-08, + "loss": 13.3002, + "step": 488490 + }, + { + "epoch": 0.9868009066043949, + "grad_norm": 935.350830078125, + "learning_rate": 1.610560771876435e-08, + "loss": 19.8366, + "step": 488500 + }, + { + "epoch": 0.9868211072370787, + "grad_norm": 208.09727478027344, + "learning_rate": 1.607762515851319e-08, + "loss": 15.4066, + "step": 488510 + }, + { + "epoch": 0.9868413078697624, + "grad_norm": 336.455078125, + "learning_rate": 1.6049666889295657e-08, + "loss": 22.3513, + "step": 488520 + }, + { + "epoch": 0.9868615085024462, + "grad_norm": 0.0, + "learning_rate": 1.6021732911247756e-08, + "loss": 16.3491, + "step": 488530 + }, + { + "epoch": 0.9868817091351301, + "grad_norm": 478.55029296875, + "learning_rate": 1.5993823224504935e-08, + "loss": 22.3994, + "step": 488540 + }, + { + "epoch": 0.9869019097678139, + "grad_norm": 412.5328674316406, + "learning_rate": 1.5965937829204302e-08, + "loss": 18.8308, + "step": 488550 + }, + { + "epoch": 0.9869221104004977, + "grad_norm": 117.3507080078125, + "learning_rate": 1.5938076725480756e-08, + "loss": 15.6856, + "step": 488560 + }, + { + "epoch": 0.9869423110331815, + "grad_norm": 202.3659210205078, + "learning_rate": 1.5910239913470292e-08, + "loss": 9.3767, + "step": 488570 + }, + { + "epoch": 0.9869625116658653, + "grad_norm": 101.88932800292969, + "learning_rate": 1.5882427393309475e-08, + "loss": 12.814, + "step": 488580 + }, + { + "epoch": 0.9869827122985492, + "grad_norm": 293.5104064941406, + "learning_rate": 1.585463916513319e-08, + "loss": 13.3986, + "step": 488590 + }, + { + "epoch": 0.987002912931233, + "grad_norm": 396.2603759765625, + "learning_rate": 1.582687522907633e-08, + "loss": 14.7102, + "step": 488600 + }, + { + "epoch": 0.9870231135639168, + "grad_norm": 360.0671691894531, + "learning_rate": 1.5799135585274906e-08, + "loss": 23.0482, + "step": 488610 + }, + { + "epoch": 0.9870433141966006, + "grad_norm": 468.65740966796875, + "learning_rate": 1.5771420233864355e-08, + "loss": 17.836, + "step": 488620 + }, + { + "epoch": 0.9870635148292844, + "grad_norm": 145.20582580566406, + "learning_rate": 1.5743729174979016e-08, + "loss": 9.367, + "step": 488630 + }, + { + "epoch": 0.9870837154619683, + "grad_norm": 340.53564453125, + "learning_rate": 1.571606240875434e-08, + "loss": 26.3723, + "step": 488640 + }, + { + "epoch": 0.9871039160946521, + "grad_norm": 8.510629653930664, + "learning_rate": 1.5688419935325216e-08, + "loss": 10.1287, + "step": 488650 + }, + { + "epoch": 0.9871241167273359, + "grad_norm": 99.44361877441406, + "learning_rate": 1.5660801754825983e-08, + "loss": 11.9617, + "step": 488660 + }, + { + "epoch": 0.9871443173600197, + "grad_norm": 163.0398406982422, + "learning_rate": 1.563320786739153e-08, + "loss": 13.9467, + "step": 488670 + }, + { + "epoch": 0.9871645179927035, + "grad_norm": 365.76470947265625, + "learning_rate": 1.56056382731562e-08, + "loss": 20.5186, + "step": 488680 + }, + { + "epoch": 0.9871847186253874, + "grad_norm": 83.98016357421875, + "learning_rate": 1.5578092972254875e-08, + "loss": 16.4393, + "step": 488690 + }, + { + "epoch": 0.9872049192580712, + "grad_norm": 113.0829849243164, + "learning_rate": 1.5550571964820793e-08, + "loss": 24.4147, + "step": 488700 + }, + { + "epoch": 0.987225119890755, + "grad_norm": 82.530029296875, + "learning_rate": 1.5523075250989395e-08, + "loss": 6.054, + "step": 488710 + }, + { + "epoch": 0.9872453205234388, + "grad_norm": 102.75135040283203, + "learning_rate": 1.5495602830893354e-08, + "loss": 22.1198, + "step": 488720 + }, + { + "epoch": 0.9872655211561226, + "grad_norm": 521.2152709960938, + "learning_rate": 1.546815470466756e-08, + "loss": 35.0327, + "step": 488730 + }, + { + "epoch": 0.9872857217888065, + "grad_norm": 69.88471984863281, + "learning_rate": 1.5440730872445242e-08, + "loss": 26.4921, + "step": 488740 + }, + { + "epoch": 0.9873059224214903, + "grad_norm": 210.11477661132812, + "learning_rate": 1.541333133436018e-08, + "loss": 20.2044, + "step": 488750 + }, + { + "epoch": 0.9873261230541741, + "grad_norm": 0.0, + "learning_rate": 1.538595609054616e-08, + "loss": 10.2439, + "step": 488760 + }, + { + "epoch": 0.9873463236868578, + "grad_norm": 185.92269897460938, + "learning_rate": 1.5358605141136407e-08, + "loss": 16.2549, + "step": 488770 + }, + { + "epoch": 0.9873665243195416, + "grad_norm": 130.92626953125, + "learning_rate": 1.5331278486264144e-08, + "loss": 11.7812, + "step": 488780 + }, + { + "epoch": 0.9873867249522255, + "grad_norm": 868.0314331054688, + "learning_rate": 1.53039761260626e-08, + "loss": 12.8459, + "step": 488790 + }, + { + "epoch": 0.9874069255849093, + "grad_norm": 553.2174682617188, + "learning_rate": 1.5276698060665007e-08, + "loss": 20.79, + "step": 488800 + }, + { + "epoch": 0.9874271262175931, + "grad_norm": 190.422119140625, + "learning_rate": 1.5249444290204584e-08, + "loss": 22.2626, + "step": 488810 + }, + { + "epoch": 0.9874473268502769, + "grad_norm": 213.2537841796875, + "learning_rate": 1.5222214814812897e-08, + "loss": 14.2655, + "step": 488820 + }, + { + "epoch": 0.9874675274829607, + "grad_norm": 177.89871215820312, + "learning_rate": 1.519500963462428e-08, + "loss": 13.1173, + "step": 488830 + }, + { + "epoch": 0.9874877281156446, + "grad_norm": 340.16180419921875, + "learning_rate": 1.5167828749770853e-08, + "loss": 18.5178, + "step": 488840 + }, + { + "epoch": 0.9875079287483284, + "grad_norm": 0.0, + "learning_rate": 1.5140672160384174e-08, + "loss": 21.7909, + "step": 488850 + }, + { + "epoch": 0.9875281293810122, + "grad_norm": 72.12262725830078, + "learning_rate": 1.511353986659747e-08, + "loss": 12.1323, + "step": 488860 + }, + { + "epoch": 0.987548330013696, + "grad_norm": 119.32369995117188, + "learning_rate": 1.508643186854286e-08, + "loss": 11.6525, + "step": 488870 + }, + { + "epoch": 0.9875685306463798, + "grad_norm": 15.226715087890625, + "learning_rate": 1.505934816635246e-08, + "loss": 17.8393, + "step": 488880 + }, + { + "epoch": 0.9875887312790637, + "grad_norm": 317.7912292480469, + "learning_rate": 1.503228876015783e-08, + "loss": 8.1661, + "step": 488890 + }, + { + "epoch": 0.9876089319117475, + "grad_norm": 242.69522094726562, + "learning_rate": 1.500525365009109e-08, + "loss": 23.9129, + "step": 488900 + }, + { + "epoch": 0.9876291325444313, + "grad_norm": 283.3116760253906, + "learning_rate": 1.4978242836284908e-08, + "loss": 19.5075, + "step": 488910 + }, + { + "epoch": 0.9876493331771151, + "grad_norm": 206.3544464111328, + "learning_rate": 1.4951256318869733e-08, + "loss": 17.1439, + "step": 488920 + }, + { + "epoch": 0.9876695338097989, + "grad_norm": 216.89434814453125, + "learning_rate": 1.4924294097977687e-08, + "loss": 23.3198, + "step": 488930 + }, + { + "epoch": 0.9876897344424828, + "grad_norm": 30.293317794799805, + "learning_rate": 1.4897356173739774e-08, + "loss": 8.9313, + "step": 488940 + }, + { + "epoch": 0.9877099350751666, + "grad_norm": 446.0645751953125, + "learning_rate": 1.4870442546287555e-08, + "loss": 12.2558, + "step": 488950 + }, + { + "epoch": 0.9877301357078504, + "grad_norm": 429.6090087890625, + "learning_rate": 1.4843553215752037e-08, + "loss": 6.9012, + "step": 488960 + }, + { + "epoch": 0.9877503363405342, + "grad_norm": 241.11767578125, + "learning_rate": 1.4816688182264782e-08, + "loss": 28.3325, + "step": 488970 + }, + { + "epoch": 0.987770536973218, + "grad_norm": 146.9576873779297, + "learning_rate": 1.478984744595624e-08, + "loss": 11.2696, + "step": 488980 + }, + { + "epoch": 0.9877907376059019, + "grad_norm": 264.9900207519531, + "learning_rate": 1.4763031006957417e-08, + "loss": 18.4016, + "step": 488990 + }, + { + "epoch": 0.9878109382385857, + "grad_norm": 46.155147552490234, + "learning_rate": 1.4736238865398766e-08, + "loss": 21.9187, + "step": 489000 + }, + { + "epoch": 0.9878311388712695, + "grad_norm": 442.6540222167969, + "learning_rate": 1.4709471021411293e-08, + "loss": 23.4123, + "step": 489010 + }, + { + "epoch": 0.9878513395039533, + "grad_norm": 304.92022705078125, + "learning_rate": 1.4682727475124891e-08, + "loss": 8.7293, + "step": 489020 + }, + { + "epoch": 0.987871540136637, + "grad_norm": 366.9974365234375, + "learning_rate": 1.4656008226670571e-08, + "loss": 16.9375, + "step": 489030 + }, + { + "epoch": 0.9878917407693208, + "grad_norm": 585.1427612304688, + "learning_rate": 1.462931327617767e-08, + "loss": 17.0381, + "step": 489040 + }, + { + "epoch": 0.9879119414020047, + "grad_norm": 248.7589874267578, + "learning_rate": 1.4602642623777752e-08, + "loss": 16.0915, + "step": 489050 + }, + { + "epoch": 0.9879321420346885, + "grad_norm": 436.0631103515625, + "learning_rate": 1.4575996269599046e-08, + "loss": 23.4857, + "step": 489060 + }, + { + "epoch": 0.9879523426673723, + "grad_norm": 229.00271606445312, + "learning_rate": 1.454937421377256e-08, + "loss": 22.8875, + "step": 489070 + }, + { + "epoch": 0.9879725433000561, + "grad_norm": 487.84930419921875, + "learning_rate": 1.4522776456427635e-08, + "loss": 12.5085, + "step": 489080 + }, + { + "epoch": 0.98799274393274, + "grad_norm": 265.67041015625, + "learning_rate": 1.4496202997694164e-08, + "loss": 12.242, + "step": 489090 + }, + { + "epoch": 0.9880129445654238, + "grad_norm": 365.039306640625, + "learning_rate": 1.4469653837701491e-08, + "loss": 20.5881, + "step": 489100 + }, + { + "epoch": 0.9880331451981076, + "grad_norm": 148.48818969726562, + "learning_rate": 1.4443128976579513e-08, + "loss": 9.5963, + "step": 489110 + }, + { + "epoch": 0.9880533458307914, + "grad_norm": 112.43424987792969, + "learning_rate": 1.4416628414456457e-08, + "loss": 15.4086, + "step": 489120 + }, + { + "epoch": 0.9880735464634752, + "grad_norm": 338.1566162109375, + "learning_rate": 1.4390152151462222e-08, + "loss": 14.5803, + "step": 489130 + }, + { + "epoch": 0.988093747096159, + "grad_norm": 154.32980346679688, + "learning_rate": 1.4363700187725593e-08, + "loss": 4.736, + "step": 489140 + }, + { + "epoch": 0.9881139477288429, + "grad_norm": 88.50868225097656, + "learning_rate": 1.4337272523375911e-08, + "loss": 11.0857, + "step": 489150 + }, + { + "epoch": 0.9881341483615267, + "grad_norm": 201.3150634765625, + "learning_rate": 1.4310869158541408e-08, + "loss": 12.3076, + "step": 489160 + }, + { + "epoch": 0.9881543489942105, + "grad_norm": 176.51992797851562, + "learning_rate": 1.4284490093351421e-08, + "loss": 12.9454, + "step": 489170 + }, + { + "epoch": 0.9881745496268943, + "grad_norm": 498.5993957519531, + "learning_rate": 1.425813532793363e-08, + "loss": 9.4879, + "step": 489180 + }, + { + "epoch": 0.9881947502595781, + "grad_norm": 326.467529296875, + "learning_rate": 1.4231804862417375e-08, + "loss": 14.8705, + "step": 489190 + }, + { + "epoch": 0.988214950892262, + "grad_norm": 252.15155029296875, + "learning_rate": 1.4205498696930332e-08, + "loss": 10.7357, + "step": 489200 + }, + { + "epoch": 0.9882351515249458, + "grad_norm": 242.57083129882812, + "learning_rate": 1.4179216831601284e-08, + "loss": 17.2078, + "step": 489210 + }, + { + "epoch": 0.9882553521576296, + "grad_norm": 63.63706588745117, + "learning_rate": 1.4152959266557354e-08, + "loss": 8.6665, + "step": 489220 + }, + { + "epoch": 0.9882755527903134, + "grad_norm": 281.6227111816406, + "learning_rate": 1.4126726001927882e-08, + "loss": 14.7733, + "step": 489230 + }, + { + "epoch": 0.9882957534229972, + "grad_norm": 221.778076171875, + "learning_rate": 1.4100517037839989e-08, + "loss": 17.674, + "step": 489240 + }, + { + "epoch": 0.9883159540556811, + "grad_norm": 52.52895736694336, + "learning_rate": 1.4074332374421351e-08, + "loss": 12.3258, + "step": 489250 + }, + { + "epoch": 0.9883361546883649, + "grad_norm": 403.40509033203125, + "learning_rate": 1.4048172011799643e-08, + "loss": 24.0928, + "step": 489260 + }, + { + "epoch": 0.9883563553210487, + "grad_norm": 154.4250030517578, + "learning_rate": 1.4022035950102541e-08, + "loss": 16.2239, + "step": 489270 + }, + { + "epoch": 0.9883765559537324, + "grad_norm": 10.991928100585938, + "learning_rate": 1.3995924189457167e-08, + "loss": 13.4733, + "step": 489280 + }, + { + "epoch": 0.9883967565864162, + "grad_norm": 225.697021484375, + "learning_rate": 1.3969836729990637e-08, + "loss": 12.9439, + "step": 489290 + }, + { + "epoch": 0.9884169572191001, + "grad_norm": 50.451290130615234, + "learning_rate": 1.3943773571831188e-08, + "loss": 6.6792, + "step": 489300 + }, + { + "epoch": 0.9884371578517839, + "grad_norm": 180.2344512939453, + "learning_rate": 1.3917734715104269e-08, + "loss": 28.7392, + "step": 489310 + }, + { + "epoch": 0.9884573584844677, + "grad_norm": 481.76861572265625, + "learning_rate": 1.3891720159938116e-08, + "loss": 20.9584, + "step": 489320 + }, + { + "epoch": 0.9884775591171515, + "grad_norm": 226.91014099121094, + "learning_rate": 1.3865729906458735e-08, + "loss": 15.3054, + "step": 489330 + }, + { + "epoch": 0.9884977597498353, + "grad_norm": 404.64447021484375, + "learning_rate": 1.3839763954792695e-08, + "loss": 25.2543, + "step": 489340 + }, + { + "epoch": 0.9885179603825192, + "grad_norm": 204.29371643066406, + "learning_rate": 1.3813822305067115e-08, + "loss": 16.9764, + "step": 489350 + }, + { + "epoch": 0.988538161015203, + "grad_norm": 371.6661071777344, + "learning_rate": 1.378790495740856e-08, + "loss": 19.321, + "step": 489360 + }, + { + "epoch": 0.9885583616478868, + "grad_norm": 472.3009033203125, + "learning_rate": 1.376201191194304e-08, + "loss": 16.0028, + "step": 489370 + }, + { + "epoch": 0.9885785622805706, + "grad_norm": 213.62432861328125, + "learning_rate": 1.3736143168796012e-08, + "loss": 10.0731, + "step": 489380 + }, + { + "epoch": 0.9885987629132544, + "grad_norm": 146.18003845214844, + "learning_rate": 1.371029872809515e-08, + "loss": 19.0526, + "step": 489390 + }, + { + "epoch": 0.9886189635459383, + "grad_norm": 227.52987670898438, + "learning_rate": 1.3684478589964801e-08, + "loss": 16.331, + "step": 489400 + }, + { + "epoch": 0.9886391641786221, + "grad_norm": 1322.4578857421875, + "learning_rate": 1.3658682754532082e-08, + "loss": 29.2223, + "step": 489410 + }, + { + "epoch": 0.9886593648113059, + "grad_norm": 0.20874613523483276, + "learning_rate": 1.3632911221921896e-08, + "loss": 14.9838, + "step": 489420 + }, + { + "epoch": 0.9886795654439897, + "grad_norm": 350.9067687988281, + "learning_rate": 1.3607163992259697e-08, + "loss": 11.9115, + "step": 489430 + }, + { + "epoch": 0.9886997660766735, + "grad_norm": 310.6279602050781, + "learning_rate": 1.3581441065672052e-08, + "loss": 18.1364, + "step": 489440 + }, + { + "epoch": 0.9887199667093574, + "grad_norm": 525.9887084960938, + "learning_rate": 1.355574244228386e-08, + "loss": 21.2615, + "step": 489450 + }, + { + "epoch": 0.9887401673420412, + "grad_norm": 343.25640869140625, + "learning_rate": 1.3530068122219464e-08, + "loss": 29.8688, + "step": 489460 + }, + { + "epoch": 0.988760367974725, + "grad_norm": 427.1761779785156, + "learning_rate": 1.3504418105604877e-08, + "loss": 16.783, + "step": 489470 + }, + { + "epoch": 0.9887805686074088, + "grad_norm": 297.1617431640625, + "learning_rate": 1.3478792392565553e-08, + "loss": 17.5976, + "step": 489480 + }, + { + "epoch": 0.9888007692400926, + "grad_norm": 110.7373046875, + "learning_rate": 1.3453190983225285e-08, + "loss": 13.126, + "step": 489490 + }, + { + "epoch": 0.9888209698727765, + "grad_norm": 257.8731689453125, + "learning_rate": 1.3427613877709523e-08, + "loss": 21.7787, + "step": 489500 + }, + { + "epoch": 0.9888411705054603, + "grad_norm": 227.19361877441406, + "learning_rate": 1.3402061076142613e-08, + "loss": 18.5559, + "step": 489510 + }, + { + "epoch": 0.9888613711381441, + "grad_norm": 138.41561889648438, + "learning_rate": 1.3376532578649459e-08, + "loss": 13.6527, + "step": 489520 + }, + { + "epoch": 0.9888815717708279, + "grad_norm": 222.69546508789062, + "learning_rate": 1.3351028385354402e-08, + "loss": 14.4402, + "step": 489530 + }, + { + "epoch": 0.9889017724035116, + "grad_norm": 176.04368591308594, + "learning_rate": 1.3325548496381235e-08, + "loss": 17.616, + "step": 489540 + }, + { + "epoch": 0.9889219730361954, + "grad_norm": 423.5249328613281, + "learning_rate": 1.3300092911854856e-08, + "loss": 10.7274, + "step": 489550 + }, + { + "epoch": 0.9889421736688793, + "grad_norm": 90.23912811279297, + "learning_rate": 1.3274661631899055e-08, + "loss": 25.2611, + "step": 489560 + }, + { + "epoch": 0.9889623743015631, + "grad_norm": 152.38360595703125, + "learning_rate": 1.3249254656637622e-08, + "loss": 10.2711, + "step": 489570 + }, + { + "epoch": 0.9889825749342469, + "grad_norm": 206.1272430419922, + "learning_rate": 1.3223871986194348e-08, + "loss": 14.608, + "step": 489580 + }, + { + "epoch": 0.9890027755669307, + "grad_norm": 253.0663604736328, + "learning_rate": 1.3198513620693022e-08, + "loss": 35.4088, + "step": 489590 + }, + { + "epoch": 0.9890229761996145, + "grad_norm": 327.2695617675781, + "learning_rate": 1.3173179560257432e-08, + "loss": 11.8573, + "step": 489600 + }, + { + "epoch": 0.9890431768322984, + "grad_norm": 242.9253387451172, + "learning_rate": 1.314786980501137e-08, + "loss": 6.221, + "step": 489610 + }, + { + "epoch": 0.9890633774649822, + "grad_norm": 259.5450744628906, + "learning_rate": 1.3122584355076962e-08, + "loss": 11.6083, + "step": 489620 + }, + { + "epoch": 0.989083578097666, + "grad_norm": 216.1023406982422, + "learning_rate": 1.3097323210579104e-08, + "loss": 18.5336, + "step": 489630 + }, + { + "epoch": 0.9891037787303498, + "grad_norm": 232.20651245117188, + "learning_rate": 1.307208637163937e-08, + "loss": 21.1228, + "step": 489640 + }, + { + "epoch": 0.9891239793630336, + "grad_norm": 3.4372758865356445, + "learning_rate": 1.3046873838381546e-08, + "loss": 11.8663, + "step": 489650 + }, + { + "epoch": 0.9891441799957175, + "grad_norm": 899.5872192382812, + "learning_rate": 1.3021685610928869e-08, + "loss": 24.7065, + "step": 489660 + }, + { + "epoch": 0.9891643806284013, + "grad_norm": 386.62091064453125, + "learning_rate": 1.2996521689403463e-08, + "loss": 17.0633, + "step": 489670 + }, + { + "epoch": 0.9891845812610851, + "grad_norm": 213.47979736328125, + "learning_rate": 1.2971382073928007e-08, + "loss": 15.0888, + "step": 489680 + }, + { + "epoch": 0.9892047818937689, + "grad_norm": 380.30621337890625, + "learning_rate": 1.2946266764625182e-08, + "loss": 14.2182, + "step": 489690 + }, + { + "epoch": 0.9892249825264527, + "grad_norm": 230.2733917236328, + "learning_rate": 1.292117576161711e-08, + "loss": 33.2241, + "step": 489700 + }, + { + "epoch": 0.9892451831591366, + "grad_norm": 164.525390625, + "learning_rate": 1.2896109065027029e-08, + "loss": 5.3284, + "step": 489710 + }, + { + "epoch": 0.9892653837918204, + "grad_norm": 113.11135864257812, + "learning_rate": 1.2871066674975951e-08, + "loss": 13.0744, + "step": 489720 + }, + { + "epoch": 0.9892855844245042, + "grad_norm": 158.61990356445312, + "learning_rate": 1.2846048591586558e-08, + "loss": 27.8961, + "step": 489730 + }, + { + "epoch": 0.989305785057188, + "grad_norm": 58.93808364868164, + "learning_rate": 1.2821054814980971e-08, + "loss": 9.826, + "step": 489740 + }, + { + "epoch": 0.9893259856898718, + "grad_norm": 145.59530639648438, + "learning_rate": 1.2796085345280207e-08, + "loss": 9.6687, + "step": 489750 + }, + { + "epoch": 0.9893461863225557, + "grad_norm": 336.0180969238281, + "learning_rate": 1.277114018260639e-08, + "loss": 30.2326, + "step": 489760 + }, + { + "epoch": 0.9893663869552395, + "grad_norm": 306.5389099121094, + "learning_rate": 1.2746219327081644e-08, + "loss": 6.7078, + "step": 489770 + }, + { + "epoch": 0.9893865875879233, + "grad_norm": 277.2000427246094, + "learning_rate": 1.2721322778826983e-08, + "loss": 7.0898, + "step": 489780 + }, + { + "epoch": 0.989406788220607, + "grad_norm": 85.2691421508789, + "learning_rate": 1.2696450537963422e-08, + "loss": 18.8508, + "step": 489790 + }, + { + "epoch": 0.9894269888532908, + "grad_norm": 226.0152130126953, + "learning_rate": 1.2671602604612531e-08, + "loss": 22.8411, + "step": 489800 + }, + { + "epoch": 0.9894471894859747, + "grad_norm": 248.8648223876953, + "learning_rate": 1.2646778978895325e-08, + "loss": 19.694, + "step": 489810 + }, + { + "epoch": 0.9894673901186585, + "grad_norm": 239.36099243164062, + "learning_rate": 1.2621979660932814e-08, + "loss": 22.7057, + "step": 489820 + }, + { + "epoch": 0.9894875907513423, + "grad_norm": 91.97760009765625, + "learning_rate": 1.2597204650845463e-08, + "loss": 8.3683, + "step": 489830 + }, + { + "epoch": 0.9895077913840261, + "grad_norm": 277.6579895019531, + "learning_rate": 1.2572453948755393e-08, + "loss": 18.2378, + "step": 489840 + }, + { + "epoch": 0.9895279920167099, + "grad_norm": 24.776805877685547, + "learning_rate": 1.2547727554781398e-08, + "loss": 17.9956, + "step": 489850 + }, + { + "epoch": 0.9895481926493938, + "grad_norm": 221.5035858154297, + "learning_rate": 1.2523025469045047e-08, + "loss": 12.6799, + "step": 489860 + }, + { + "epoch": 0.9895683932820776, + "grad_norm": 355.8934020996094, + "learning_rate": 1.2498347691666801e-08, + "loss": 11.9491, + "step": 489870 + }, + { + "epoch": 0.9895885939147614, + "grad_norm": 164.27005004882812, + "learning_rate": 1.2473694222766563e-08, + "loss": 15.1054, + "step": 489880 + }, + { + "epoch": 0.9896087945474452, + "grad_norm": 332.149169921875, + "learning_rate": 1.2449065062464794e-08, + "loss": 14.2905, + "step": 489890 + }, + { + "epoch": 0.989628995180129, + "grad_norm": 554.3561401367188, + "learning_rate": 1.2424460210881394e-08, + "loss": 24.9038, + "step": 489900 + }, + { + "epoch": 0.9896491958128129, + "grad_norm": 0.0, + "learning_rate": 1.2399879668136271e-08, + "loss": 13.2437, + "step": 489910 + }, + { + "epoch": 0.9896693964454967, + "grad_norm": 185.58526611328125, + "learning_rate": 1.2375323434348773e-08, + "loss": 22.9642, + "step": 489920 + }, + { + "epoch": 0.9896895970781805, + "grad_norm": 291.0820007324219, + "learning_rate": 1.235079150963936e-08, + "loss": 15.7851, + "step": 489930 + }, + { + "epoch": 0.9897097977108643, + "grad_norm": 212.75608825683594, + "learning_rate": 1.2326283894127378e-08, + "loss": 35.3077, + "step": 489940 + }, + { + "epoch": 0.9897299983435481, + "grad_norm": 297.8985900878906, + "learning_rate": 1.2301800587932179e-08, + "loss": 29.191, + "step": 489950 + }, + { + "epoch": 0.989750198976232, + "grad_norm": 196.13829040527344, + "learning_rate": 1.2277341591172553e-08, + "loss": 17.0079, + "step": 489960 + }, + { + "epoch": 0.9897703996089158, + "grad_norm": 356.3586120605469, + "learning_rate": 1.225290690396841e-08, + "loss": 16.3947, + "step": 489970 + }, + { + "epoch": 0.9897906002415996, + "grad_norm": 0.7744454145431519, + "learning_rate": 1.2228496526439093e-08, + "loss": 14.7819, + "step": 489980 + }, + { + "epoch": 0.9898108008742834, + "grad_norm": 5.643133640289307, + "learning_rate": 1.2204110458702844e-08, + "loss": 10.4795, + "step": 489990 + }, + { + "epoch": 0.9898310015069672, + "grad_norm": 254.61962890625, + "learning_rate": 1.2179748700879013e-08, + "loss": 17.88, + "step": 490000 + }, + { + "epoch": 0.989851202139651, + "grad_norm": 185.8321990966797, + "learning_rate": 1.2155411253085835e-08, + "loss": 12.9206, + "step": 490010 + }, + { + "epoch": 0.9898714027723349, + "grad_norm": 260.7972106933594, + "learning_rate": 1.2131098115442108e-08, + "loss": 23.3891, + "step": 490020 + }, + { + "epoch": 0.9898916034050187, + "grad_norm": 313.2037658691406, + "learning_rate": 1.2106809288067178e-08, + "loss": 20.5773, + "step": 490030 + }, + { + "epoch": 0.9899118040377025, + "grad_norm": 73.95179748535156, + "learning_rate": 1.208254477107762e-08, + "loss": 12.1286, + "step": 490040 + }, + { + "epoch": 0.9899320046703862, + "grad_norm": 21.208145141601562, + "learning_rate": 1.2058304564593893e-08, + "loss": 22.3142, + "step": 490050 + }, + { + "epoch": 0.98995220530307, + "grad_norm": 206.740478515625, + "learning_rate": 1.2034088668732568e-08, + "loss": 17.1283, + "step": 490060 + }, + { + "epoch": 0.9899724059357539, + "grad_norm": 181.3231964111328, + "learning_rate": 1.2009897083611888e-08, + "loss": 14.6414, + "step": 490070 + }, + { + "epoch": 0.9899926065684377, + "grad_norm": 52.20246505737305, + "learning_rate": 1.1985729809350088e-08, + "loss": 9.1634, + "step": 490080 + }, + { + "epoch": 0.9900128072011215, + "grad_norm": 257.18743896484375, + "learning_rate": 1.1961586846064855e-08, + "loss": 14.8102, + "step": 490090 + }, + { + "epoch": 0.9900330078338053, + "grad_norm": 281.6833801269531, + "learning_rate": 1.1937468193873869e-08, + "loss": 14.4875, + "step": 490100 + }, + { + "epoch": 0.9900532084664891, + "grad_norm": 182.42430114746094, + "learning_rate": 1.1913373852894816e-08, + "loss": 21.3874, + "step": 490110 + }, + { + "epoch": 0.990073409099173, + "grad_norm": 2.4899678230285645, + "learning_rate": 1.1889303823244825e-08, + "loss": 6.1339, + "step": 490120 + }, + { + "epoch": 0.9900936097318568, + "grad_norm": 116.8193588256836, + "learning_rate": 1.1865258105041577e-08, + "loss": 16.2697, + "step": 490130 + }, + { + "epoch": 0.9901138103645406, + "grad_norm": 341.3250427246094, + "learning_rate": 1.1841236698402202e-08, + "loss": 26.4881, + "step": 490140 + }, + { + "epoch": 0.9901340109972244, + "grad_norm": 106.97996520996094, + "learning_rate": 1.1817239603443276e-08, + "loss": 21.3249, + "step": 490150 + }, + { + "epoch": 0.9901542116299082, + "grad_norm": 208.8172607421875, + "learning_rate": 1.1793266820282478e-08, + "loss": 11.2017, + "step": 490160 + }, + { + "epoch": 0.9901744122625921, + "grad_norm": 224.62625122070312, + "learning_rate": 1.1769318349036385e-08, + "loss": 20.2307, + "step": 490170 + }, + { + "epoch": 0.9901946128952759, + "grad_norm": 150.63478088378906, + "learning_rate": 1.1745394189821013e-08, + "loss": 9.0618, + "step": 490180 + }, + { + "epoch": 0.9902148135279597, + "grad_norm": 304.30377197265625, + "learning_rate": 1.1721494342754048e-08, + "loss": 8.9648, + "step": 490190 + }, + { + "epoch": 0.9902350141606435, + "grad_norm": 171.0780029296875, + "learning_rate": 1.1697618807951504e-08, + "loss": 10.7617, + "step": 490200 + }, + { + "epoch": 0.9902552147933273, + "grad_norm": 37.07375717163086, + "learning_rate": 1.1673767585529404e-08, + "loss": 10.6067, + "step": 490210 + }, + { + "epoch": 0.9902754154260112, + "grad_norm": 769.3895263671875, + "learning_rate": 1.1649940675604876e-08, + "loss": 23.3863, + "step": 490220 + }, + { + "epoch": 0.990295616058695, + "grad_norm": 363.86407470703125, + "learning_rate": 1.1626138078293381e-08, + "loss": 14.0682, + "step": 490230 + }, + { + "epoch": 0.9903158166913788, + "grad_norm": 389.760986328125, + "learning_rate": 1.1602359793710938e-08, + "loss": 22.7912, + "step": 490240 + }, + { + "epoch": 0.9903360173240626, + "grad_norm": 193.2105255126953, + "learning_rate": 1.1578605821973566e-08, + "loss": 16.888, + "step": 490250 + }, + { + "epoch": 0.9903562179567464, + "grad_norm": 214.14462280273438, + "learning_rate": 1.1554876163197282e-08, + "loss": 13.0855, + "step": 490260 + }, + { + "epoch": 0.9903764185894303, + "grad_norm": 446.6460876464844, + "learning_rate": 1.1531170817496995e-08, + "loss": 12.9666, + "step": 490270 + }, + { + "epoch": 0.9903966192221141, + "grad_norm": 491.61016845703125, + "learning_rate": 1.1507489784989278e-08, + "loss": 12.7472, + "step": 490280 + }, + { + "epoch": 0.9904168198547979, + "grad_norm": 66.50450897216797, + "learning_rate": 1.1483833065789041e-08, + "loss": 14.7546, + "step": 490290 + }, + { + "epoch": 0.9904370204874817, + "grad_norm": 416.74530029296875, + "learning_rate": 1.146020066001119e-08, + "loss": 15.681, + "step": 490300 + }, + { + "epoch": 0.9904572211201654, + "grad_norm": 256.7451477050781, + "learning_rate": 1.1436592567771188e-08, + "loss": 21.3193, + "step": 490310 + }, + { + "epoch": 0.9904774217528493, + "grad_norm": 308.0254211425781, + "learning_rate": 1.1413008789184498e-08, + "loss": 17.4518, + "step": 490320 + }, + { + "epoch": 0.9904976223855331, + "grad_norm": 82.44973754882812, + "learning_rate": 1.1389449324365476e-08, + "loss": 15.4772, + "step": 490330 + }, + { + "epoch": 0.9905178230182169, + "grad_norm": 99.13777923583984, + "learning_rate": 1.1365914173429582e-08, + "loss": 6.1406, + "step": 490340 + }, + { + "epoch": 0.9905380236509007, + "grad_norm": 195.63157653808594, + "learning_rate": 1.134240333649117e-08, + "loss": 17.8515, + "step": 490350 + }, + { + "epoch": 0.9905582242835845, + "grad_norm": 270.7568359375, + "learning_rate": 1.1318916813664594e-08, + "loss": 21.5513, + "step": 490360 + }, + { + "epoch": 0.9905784249162684, + "grad_norm": 0.0, + "learning_rate": 1.129545460506476e-08, + "loss": 22.7828, + "step": 490370 + }, + { + "epoch": 0.9905986255489522, + "grad_norm": 376.3780822753906, + "learning_rate": 1.1272016710806021e-08, + "loss": 21.6448, + "step": 490380 + }, + { + "epoch": 0.990618826181636, + "grad_norm": 322.4913635253906, + "learning_rate": 1.1248603131002178e-08, + "loss": 38.267, + "step": 490390 + }, + { + "epoch": 0.9906390268143198, + "grad_norm": 1179.2825927734375, + "learning_rate": 1.1225213865767026e-08, + "loss": 42.169, + "step": 490400 + }, + { + "epoch": 0.9906592274470036, + "grad_norm": 152.80825805664062, + "learning_rate": 1.1201848915216029e-08, + "loss": 14.4051, + "step": 490410 + }, + { + "epoch": 0.9906794280796875, + "grad_norm": 187.526123046875, + "learning_rate": 1.1178508279461875e-08, + "loss": 27.7123, + "step": 490420 + }, + { + "epoch": 0.9906996287123713, + "grad_norm": 0.0, + "learning_rate": 1.115519195861836e-08, + "loss": 17.9001, + "step": 490430 + }, + { + "epoch": 0.9907198293450551, + "grad_norm": 503.9375305175781, + "learning_rate": 1.1131899952799285e-08, + "loss": 19.0012, + "step": 490440 + }, + { + "epoch": 0.9907400299777389, + "grad_norm": 437.102783203125, + "learning_rate": 1.1108632262118446e-08, + "loss": 25.6348, + "step": 490450 + }, + { + "epoch": 0.9907602306104227, + "grad_norm": 0.0, + "learning_rate": 1.1085388886689085e-08, + "loss": 15.7962, + "step": 490460 + }, + { + "epoch": 0.9907804312431066, + "grad_norm": 325.310546875, + "learning_rate": 1.1062169826624447e-08, + "loss": 24.1655, + "step": 490470 + }, + { + "epoch": 0.9908006318757904, + "grad_norm": 68.74517059326172, + "learning_rate": 1.1038975082037772e-08, + "loss": 7.7549, + "step": 490480 + }, + { + "epoch": 0.9908208325084742, + "grad_norm": 337.9579162597656, + "learning_rate": 1.101580465304175e-08, + "loss": 15.9894, + "step": 490490 + }, + { + "epoch": 0.990841033141158, + "grad_norm": 636.4308471679688, + "learning_rate": 1.0992658539750179e-08, + "loss": 29.1541, + "step": 490500 + }, + { + "epoch": 0.9908612337738418, + "grad_norm": 385.97589111328125, + "learning_rate": 1.0969536742274633e-08, + "loss": 23.8906, + "step": 490510 + }, + { + "epoch": 0.9908814344065257, + "grad_norm": 6.2642412185668945, + "learning_rate": 1.0946439260728914e-08, + "loss": 8.0843, + "step": 490520 + }, + { + "epoch": 0.9909016350392095, + "grad_norm": 15.482325553894043, + "learning_rate": 1.0923366095225152e-08, + "loss": 19.7717, + "step": 490530 + }, + { + "epoch": 0.9909218356718933, + "grad_norm": 18.279489517211914, + "learning_rate": 1.090031724587548e-08, + "loss": 7.8719, + "step": 490540 + }, + { + "epoch": 0.9909420363045771, + "grad_norm": 332.5791931152344, + "learning_rate": 1.0877292712792586e-08, + "loss": 17.9668, + "step": 490550 + }, + { + "epoch": 0.9909622369372608, + "grad_norm": 477.405517578125, + "learning_rate": 1.0854292496089158e-08, + "loss": 17.2365, + "step": 490560 + }, + { + "epoch": 0.9909824375699446, + "grad_norm": 2.2261149883270264, + "learning_rate": 1.0831316595876218e-08, + "loss": 14.3657, + "step": 490570 + }, + { + "epoch": 0.9910026382026285, + "grad_norm": 244.87901306152344, + "learning_rate": 1.0808365012266454e-08, + "loss": 15.724, + "step": 490580 + }, + { + "epoch": 0.9910228388353123, + "grad_norm": 147.3052215576172, + "learning_rate": 1.0785437745371996e-08, + "loss": 13.9779, + "step": 490590 + }, + { + "epoch": 0.9910430394679961, + "grad_norm": 95.43013000488281, + "learning_rate": 1.076253479530387e-08, + "loss": 11.3153, + "step": 490600 + }, + { + "epoch": 0.9910632401006799, + "grad_norm": 43.185733795166016, + "learning_rate": 1.0739656162174205e-08, + "loss": 15.0828, + "step": 490610 + }, + { + "epoch": 0.9910834407333637, + "grad_norm": 595.6671142578125, + "learning_rate": 1.0716801846094026e-08, + "loss": 26.5095, + "step": 490620 + }, + { + "epoch": 0.9911036413660476, + "grad_norm": 185.7697296142578, + "learning_rate": 1.0693971847175466e-08, + "loss": 15.6193, + "step": 490630 + }, + { + "epoch": 0.9911238419987314, + "grad_norm": 520.8781127929688, + "learning_rate": 1.067116616552899e-08, + "loss": 21.0543, + "step": 490640 + }, + { + "epoch": 0.9911440426314152, + "grad_norm": 152.95602416992188, + "learning_rate": 1.0648384801266176e-08, + "loss": 8.851, + "step": 490650 + }, + { + "epoch": 0.991164243264099, + "grad_norm": 0.0, + "learning_rate": 1.0625627754498048e-08, + "loss": 8.961, + "step": 490660 + }, + { + "epoch": 0.9911844438967828, + "grad_norm": 825.7657470703125, + "learning_rate": 1.0602895025335624e-08, + "loss": 14.3532, + "step": 490670 + }, + { + "epoch": 0.9912046445294667, + "grad_norm": 333.7046813964844, + "learning_rate": 1.0580186613888822e-08, + "loss": 14.0829, + "step": 490680 + }, + { + "epoch": 0.9912248451621505, + "grad_norm": 524.2979125976562, + "learning_rate": 1.055750252026977e-08, + "loss": 31.2251, + "step": 490690 + }, + { + "epoch": 0.9912450457948343, + "grad_norm": 249.78887939453125, + "learning_rate": 1.0534842744588381e-08, + "loss": 27.6849, + "step": 490700 + }, + { + "epoch": 0.9912652464275181, + "grad_norm": 287.83197021484375, + "learning_rate": 1.0512207286954568e-08, + "loss": 22.8785, + "step": 490710 + }, + { + "epoch": 0.9912854470602019, + "grad_norm": 296.106689453125, + "learning_rate": 1.0489596147479353e-08, + "loss": 25.3628, + "step": 490720 + }, + { + "epoch": 0.9913056476928858, + "grad_norm": 396.5429992675781, + "learning_rate": 1.0467009326272648e-08, + "loss": 12.7299, + "step": 490730 + }, + { + "epoch": 0.9913258483255696, + "grad_norm": 408.876708984375, + "learning_rate": 1.044444682344492e-08, + "loss": 17.2851, + "step": 490740 + }, + { + "epoch": 0.9913460489582534, + "grad_norm": 4.908615589141846, + "learning_rate": 1.0421908639104971e-08, + "loss": 26.2915, + "step": 490750 + }, + { + "epoch": 0.9913662495909372, + "grad_norm": 38.23313903808594, + "learning_rate": 1.039939477336438e-08, + "loss": 9.5834, + "step": 490760 + }, + { + "epoch": 0.991386450223621, + "grad_norm": 0.0, + "learning_rate": 1.0376905226331391e-08, + "loss": 19.0989, + "step": 490770 + }, + { + "epoch": 0.9914066508563049, + "grad_norm": 260.5321350097656, + "learning_rate": 1.0354439998116473e-08, + "loss": 10.56, + "step": 490780 + }, + { + "epoch": 0.9914268514889887, + "grad_norm": 462.5550231933594, + "learning_rate": 1.0331999088828425e-08, + "loss": 27.8829, + "step": 490790 + }, + { + "epoch": 0.9914470521216725, + "grad_norm": 125.8621826171875, + "learning_rate": 1.030958249857772e-08, + "loss": 15.1149, + "step": 490800 + }, + { + "epoch": 0.9914672527543563, + "grad_norm": 78.16096496582031, + "learning_rate": 1.02871902274726e-08, + "loss": 18.5501, + "step": 490810 + }, + { + "epoch": 0.99148745338704, + "grad_norm": 183.17477416992188, + "learning_rate": 1.026482227562242e-08, + "loss": 16.0614, + "step": 490820 + }, + { + "epoch": 0.9915076540197238, + "grad_norm": 194.7899169921875, + "learning_rate": 1.0242478643136545e-08, + "loss": 20.4395, + "step": 490830 + }, + { + "epoch": 0.9915278546524077, + "grad_norm": 300.5813903808594, + "learning_rate": 1.0220159330123214e-08, + "loss": 13.6394, + "step": 490840 + }, + { + "epoch": 0.9915480552850915, + "grad_norm": 528.572265625, + "learning_rate": 1.0197864336691788e-08, + "loss": 21.4617, + "step": 490850 + }, + { + "epoch": 0.9915682559177753, + "grad_norm": 227.086669921875, + "learning_rate": 1.0175593662951066e-08, + "loss": 16.7332, + "step": 490860 + }, + { + "epoch": 0.9915884565504591, + "grad_norm": 117.60448455810547, + "learning_rate": 1.0153347309009299e-08, + "loss": 13.3176, + "step": 490870 + }, + { + "epoch": 0.991608657183143, + "grad_norm": 477.0719909667969, + "learning_rate": 1.013112527497473e-08, + "loss": 21.0266, + "step": 490880 + }, + { + "epoch": 0.9916288578158268, + "grad_norm": 103.98390197753906, + "learning_rate": 1.0108927560955606e-08, + "loss": 13.0454, + "step": 490890 + }, + { + "epoch": 0.9916490584485106, + "grad_norm": 402.55023193359375, + "learning_rate": 1.008675416706073e-08, + "loss": 18.175, + "step": 490900 + }, + { + "epoch": 0.9916692590811944, + "grad_norm": 206.17750549316406, + "learning_rate": 1.0064605093397794e-08, + "loss": 21.9164, + "step": 490910 + }, + { + "epoch": 0.9916894597138782, + "grad_norm": 459.20501708984375, + "learning_rate": 1.0042480340075045e-08, + "loss": 21.358, + "step": 490920 + }, + { + "epoch": 0.991709660346562, + "grad_norm": 855.034912109375, + "learning_rate": 1.0020379907199618e-08, + "loss": 24.7808, + "step": 490930 + }, + { + "epoch": 0.9917298609792459, + "grad_norm": 170.81808471679688, + "learning_rate": 9.99830379487976e-09, + "loss": 19.9852, + "step": 490940 + }, + { + "epoch": 0.9917500616119297, + "grad_norm": 448.11920166015625, + "learning_rate": 9.976252003223164e-09, + "loss": 25.9088, + "step": 490950 + }, + { + "epoch": 0.9917702622446135, + "grad_norm": 86.4518814086914, + "learning_rate": 9.954224532336965e-09, + "loss": 7.129, + "step": 490960 + }, + { + "epoch": 0.9917904628772973, + "grad_norm": 391.43621826171875, + "learning_rate": 9.932221382328299e-09, + "loss": 11.2156, + "step": 490970 + }, + { + "epoch": 0.9918106635099811, + "grad_norm": 115.07550048828125, + "learning_rate": 9.91024255330486e-09, + "loss": 12.0163, + "step": 490980 + }, + { + "epoch": 0.991830864142665, + "grad_norm": 386.3694763183594, + "learning_rate": 9.888288045374339e-09, + "loss": 14.1917, + "step": 490990 + }, + { + "epoch": 0.9918510647753488, + "grad_norm": 109.41740417480469, + "learning_rate": 9.866357858642206e-09, + "loss": 11.2098, + "step": 491000 + }, + { + "epoch": 0.9918712654080326, + "grad_norm": 367.789306640625, + "learning_rate": 9.844451993216708e-09, + "loss": 19.6657, + "step": 491010 + }, + { + "epoch": 0.9918914660407164, + "grad_norm": 444.3191833496094, + "learning_rate": 9.822570449203873e-09, + "loss": 14.5652, + "step": 491020 + }, + { + "epoch": 0.9919116666734002, + "grad_norm": 186.89617919921875, + "learning_rate": 9.800713226710834e-09, + "loss": 12.7048, + "step": 491030 + }, + { + "epoch": 0.9919318673060841, + "grad_norm": 108.09678649902344, + "learning_rate": 9.77888032584362e-09, + "loss": 17.9147, + "step": 491040 + }, + { + "epoch": 0.9919520679387679, + "grad_norm": 289.07257080078125, + "learning_rate": 9.757071746708812e-09, + "loss": 12.7529, + "step": 491050 + }, + { + "epoch": 0.9919722685714517, + "grad_norm": 173.3177490234375, + "learning_rate": 9.735287489413547e-09, + "loss": 10.0509, + "step": 491060 + }, + { + "epoch": 0.9919924692041354, + "grad_norm": 126.21602630615234, + "learning_rate": 9.71352755406274e-09, + "loss": 9.7193, + "step": 491070 + }, + { + "epoch": 0.9920126698368192, + "grad_norm": 212.0915985107422, + "learning_rate": 9.691791940762418e-09, + "loss": 23.766, + "step": 491080 + }, + { + "epoch": 0.9920328704695031, + "grad_norm": 344.5935974121094, + "learning_rate": 9.670080649619717e-09, + "loss": 30.6248, + "step": 491090 + }, + { + "epoch": 0.9920530711021869, + "grad_norm": 753.1189575195312, + "learning_rate": 9.64839368074011e-09, + "loss": 22.1914, + "step": 491100 + }, + { + "epoch": 0.9920732717348707, + "grad_norm": 419.9007263183594, + "learning_rate": 9.626731034227954e-09, + "loss": 16.5807, + "step": 491110 + }, + { + "epoch": 0.9920934723675545, + "grad_norm": 190.58302307128906, + "learning_rate": 9.605092710190943e-09, + "loss": 14.3772, + "step": 491120 + }, + { + "epoch": 0.9921136730002383, + "grad_norm": 386.9684753417969, + "learning_rate": 9.583478708732886e-09, + "loss": 15.3493, + "step": 491130 + }, + { + "epoch": 0.9921338736329222, + "grad_norm": 115.29700469970703, + "learning_rate": 9.561889029959249e-09, + "loss": 10.7981, + "step": 491140 + }, + { + "epoch": 0.992154074265606, + "grad_norm": 253.6248016357422, + "learning_rate": 9.540323673976615e-09, + "loss": 19.4967, + "step": 491150 + }, + { + "epoch": 0.9921742748982898, + "grad_norm": 187.130859375, + "learning_rate": 9.518782640888235e-09, + "loss": 20.4386, + "step": 491160 + }, + { + "epoch": 0.9921944755309736, + "grad_norm": 317.4109802246094, + "learning_rate": 9.497265930800691e-09, + "loss": 22.2754, + "step": 491170 + }, + { + "epoch": 0.9922146761636574, + "grad_norm": 536.2616577148438, + "learning_rate": 9.475773543818345e-09, + "loss": 20.5593, + "step": 491180 + }, + { + "epoch": 0.9922348767963413, + "grad_norm": 276.8728942871094, + "learning_rate": 9.454305480045556e-09, + "loss": 21.9956, + "step": 491190 + }, + { + "epoch": 0.9922550774290251, + "grad_norm": 622.9331665039062, + "learning_rate": 9.432861739586685e-09, + "loss": 21.2642, + "step": 491200 + }, + { + "epoch": 0.9922752780617089, + "grad_norm": 399.0710754394531, + "learning_rate": 9.411442322547204e-09, + "loss": 26.3859, + "step": 491210 + }, + { + "epoch": 0.9922954786943927, + "grad_norm": 368.78289794921875, + "learning_rate": 9.390047229031474e-09, + "loss": 13.4541, + "step": 491220 + }, + { + "epoch": 0.9923156793270765, + "grad_norm": 276.2664794921875, + "learning_rate": 9.368676459142744e-09, + "loss": 21.949, + "step": 491230 + }, + { + "epoch": 0.9923358799597604, + "grad_norm": 226.25160217285156, + "learning_rate": 9.347330012985933e-09, + "loss": 22.1823, + "step": 491240 + }, + { + "epoch": 0.9923560805924442, + "grad_norm": 408.3966064453125, + "learning_rate": 9.3260078906654e-09, + "loss": 20.6702, + "step": 491250 + }, + { + "epoch": 0.992376281225128, + "grad_norm": 66.66519927978516, + "learning_rate": 9.304710092283842e-09, + "loss": 15.5139, + "step": 491260 + }, + { + "epoch": 0.9923964818578118, + "grad_norm": 385.6389465332031, + "learning_rate": 9.283436617946173e-09, + "loss": 9.6264, + "step": 491270 + }, + { + "epoch": 0.9924166824904956, + "grad_norm": 161.71920776367188, + "learning_rate": 9.262187467756201e-09, + "loss": 8.8075, + "step": 491280 + }, + { + "epoch": 0.9924368831231795, + "grad_norm": 184.41116333007812, + "learning_rate": 9.24096264181662e-09, + "loss": 22.24, + "step": 491290 + }, + { + "epoch": 0.9924570837558633, + "grad_norm": 294.45697021484375, + "learning_rate": 9.219762140231237e-09, + "loss": 16.0654, + "step": 491300 + }, + { + "epoch": 0.9924772843885471, + "grad_norm": 58.14417266845703, + "learning_rate": 9.198585963103302e-09, + "loss": 13.2526, + "step": 491310 + }, + { + "epoch": 0.9924974850212309, + "grad_norm": 283.6220397949219, + "learning_rate": 9.177434110536065e-09, + "loss": 13.8107, + "step": 491320 + }, + { + "epoch": 0.9925176856539146, + "grad_norm": 20.302827835083008, + "learning_rate": 9.156306582633334e-09, + "loss": 10.6104, + "step": 491330 + }, + { + "epoch": 0.9925378862865984, + "grad_norm": 73.37297058105469, + "learning_rate": 9.135203379496693e-09, + "loss": 13.4014, + "step": 491340 + }, + { + "epoch": 0.9925580869192823, + "grad_norm": 363.70428466796875, + "learning_rate": 9.114124501230504e-09, + "loss": 19.8887, + "step": 491350 + }, + { + "epoch": 0.9925782875519661, + "grad_norm": 310.9961853027344, + "learning_rate": 9.09306994793635e-09, + "loss": 18.7803, + "step": 491360 + }, + { + "epoch": 0.9925984881846499, + "grad_norm": 492.8285827636719, + "learning_rate": 9.07203971971693e-09, + "loss": 29.9061, + "step": 491370 + }, + { + "epoch": 0.9926186888173337, + "grad_norm": 380.22637939453125, + "learning_rate": 9.051033816675492e-09, + "loss": 14.0132, + "step": 491380 + }, + { + "epoch": 0.9926388894500175, + "grad_norm": 393.7242126464844, + "learning_rate": 9.030052238913622e-09, + "loss": 19.3284, + "step": 491390 + }, + { + "epoch": 0.9926590900827014, + "grad_norm": 172.16143798828125, + "learning_rate": 9.009094986534572e-09, + "loss": 18.2413, + "step": 491400 + }, + { + "epoch": 0.9926792907153852, + "grad_norm": 3.753679037094116, + "learning_rate": 8.988162059639371e-09, + "loss": 17.3487, + "step": 491410 + }, + { + "epoch": 0.992699491348069, + "grad_norm": 142.9956817626953, + "learning_rate": 8.967253458330715e-09, + "loss": 18.9747, + "step": 491420 + }, + { + "epoch": 0.9927196919807528, + "grad_norm": 202.57798767089844, + "learning_rate": 8.946369182710191e-09, + "loss": 15.247, + "step": 491430 + }, + { + "epoch": 0.9927398926134366, + "grad_norm": 321.2886047363281, + "learning_rate": 8.925509232879937e-09, + "loss": 18.6229, + "step": 491440 + }, + { + "epoch": 0.9927600932461205, + "grad_norm": 253.57022094726562, + "learning_rate": 8.904673608940983e-09, + "loss": 19.6635, + "step": 491450 + }, + { + "epoch": 0.9927802938788043, + "grad_norm": 48.63462829589844, + "learning_rate": 8.883862310995473e-09, + "loss": 14.1624, + "step": 491460 + }, + { + "epoch": 0.9928004945114881, + "grad_norm": 95.82794952392578, + "learning_rate": 8.863075339144988e-09, + "loss": 16.4728, + "step": 491470 + }, + { + "epoch": 0.9928206951441719, + "grad_norm": 254.70733642578125, + "learning_rate": 8.842312693490563e-09, + "loss": 18.6319, + "step": 491480 + }, + { + "epoch": 0.9928408957768557, + "grad_norm": 528.6598510742188, + "learning_rate": 8.821574374132669e-09, + "loss": 15.9206, + "step": 491490 + }, + { + "epoch": 0.9928610964095396, + "grad_norm": 308.7865295410156, + "learning_rate": 8.800860381173448e-09, + "loss": 24.6954, + "step": 491500 + }, + { + "epoch": 0.9928812970422234, + "grad_norm": 224.09146118164062, + "learning_rate": 8.780170714713931e-09, + "loss": 13.3492, + "step": 491510 + }, + { + "epoch": 0.9929014976749072, + "grad_norm": 118.38770294189453, + "learning_rate": 8.759505374854038e-09, + "loss": 19.3737, + "step": 491520 + }, + { + "epoch": 0.992921698307591, + "grad_norm": 232.26646423339844, + "learning_rate": 8.738864361694799e-09, + "loss": 32.7526, + "step": 491530 + }, + { + "epoch": 0.9929418989402748, + "grad_norm": 322.0654602050781, + "learning_rate": 8.718247675337243e-09, + "loss": 27.4142, + "step": 491540 + }, + { + "epoch": 0.9929620995729587, + "grad_norm": 478.8042907714844, + "learning_rate": 8.697655315881293e-09, + "loss": 20.8666, + "step": 491550 + }, + { + "epoch": 0.9929823002056425, + "grad_norm": 300.4228515625, + "learning_rate": 8.677087283427976e-09, + "loss": 20.7805, + "step": 491560 + }, + { + "epoch": 0.9930025008383263, + "grad_norm": 0.0, + "learning_rate": 8.656543578077215e-09, + "loss": 13.4335, + "step": 491570 + }, + { + "epoch": 0.9930227014710101, + "grad_norm": 56.2254753112793, + "learning_rate": 8.636024199928927e-09, + "loss": 26.2247, + "step": 491580 + }, + { + "epoch": 0.9930429021036938, + "grad_norm": 525.561279296875, + "learning_rate": 8.615529149083034e-09, + "loss": 25.3917, + "step": 491590 + }, + { + "epoch": 0.9930631027363777, + "grad_norm": 163.4384002685547, + "learning_rate": 8.595058425640012e-09, + "loss": 16.977, + "step": 491600 + }, + { + "epoch": 0.9930833033690615, + "grad_norm": 320.7842102050781, + "learning_rate": 8.574612029699224e-09, + "loss": 19.788, + "step": 491610 + }, + { + "epoch": 0.9931035040017453, + "grad_norm": 131.5560760498047, + "learning_rate": 8.554189961360037e-09, + "loss": 21.5623, + "step": 491620 + }, + { + "epoch": 0.9931237046344291, + "grad_norm": 181.0648651123047, + "learning_rate": 8.53379222072237e-09, + "loss": 15.097, + "step": 491630 + }, + { + "epoch": 0.9931439052671129, + "grad_norm": 417.7359619140625, + "learning_rate": 8.513418807886142e-09, + "loss": 20.7334, + "step": 491640 + }, + { + "epoch": 0.9931641058997968, + "grad_norm": 107.27851867675781, + "learning_rate": 8.49306972294961e-09, + "loss": 15.8216, + "step": 491650 + }, + { + "epoch": 0.9931843065324806, + "grad_norm": 442.1458435058594, + "learning_rate": 8.472744966012691e-09, + "loss": 18.0458, + "step": 491660 + }, + { + "epoch": 0.9932045071651644, + "grad_norm": 65.64525604248047, + "learning_rate": 8.452444537174198e-09, + "loss": 19.203, + "step": 491670 + }, + { + "epoch": 0.9932247077978482, + "grad_norm": 255.77297973632812, + "learning_rate": 8.43216843653294e-09, + "loss": 9.3265, + "step": 491680 + }, + { + "epoch": 0.993244908430532, + "grad_norm": 11.050529479980469, + "learning_rate": 8.41191666418828e-09, + "loss": 19.0387, + "step": 491690 + }, + { + "epoch": 0.9932651090632159, + "grad_norm": 362.9149475097656, + "learning_rate": 8.391689220238474e-09, + "loss": 15.137, + "step": 491700 + }, + { + "epoch": 0.9932853096958997, + "grad_norm": 353.6307067871094, + "learning_rate": 8.37148610478178e-09, + "loss": 18.2115, + "step": 491710 + }, + { + "epoch": 0.9933055103285835, + "grad_norm": 572.0382690429688, + "learning_rate": 8.351307317917002e-09, + "loss": 18.9829, + "step": 491720 + }, + { + "epoch": 0.9933257109612673, + "grad_norm": 92.88359069824219, + "learning_rate": 8.331152859742952e-09, + "loss": 29.1805, + "step": 491730 + }, + { + "epoch": 0.9933459115939511, + "grad_norm": 386.05322265625, + "learning_rate": 8.311022730357331e-09, + "loss": 17.9108, + "step": 491740 + }, + { + "epoch": 0.993366112226635, + "grad_norm": 29.423995971679688, + "learning_rate": 8.290916929858394e-09, + "loss": 30.5861, + "step": 491750 + }, + { + "epoch": 0.9933863128593188, + "grad_norm": 316.3428039550781, + "learning_rate": 8.27083545834384e-09, + "loss": 18.7068, + "step": 491760 + }, + { + "epoch": 0.9934065134920026, + "grad_norm": 361.76995849609375, + "learning_rate": 8.250778315911922e-09, + "loss": 19.9417, + "step": 491770 + }, + { + "epoch": 0.9934267141246864, + "grad_norm": 153.23692321777344, + "learning_rate": 8.230745502660343e-09, + "loss": 19.0708, + "step": 491780 + }, + { + "epoch": 0.9934469147573702, + "grad_norm": 307.64471435546875, + "learning_rate": 8.210737018686798e-09, + "loss": 17.6583, + "step": 491790 + }, + { + "epoch": 0.9934671153900541, + "grad_norm": 210.4161834716797, + "learning_rate": 8.190752864088436e-09, + "loss": 26.3436, + "step": 491800 + }, + { + "epoch": 0.9934873160227379, + "grad_norm": 0.0, + "learning_rate": 8.17079303896351e-09, + "loss": 28.192, + "step": 491810 + }, + { + "epoch": 0.9935075166554217, + "grad_norm": 238.99990844726562, + "learning_rate": 8.150857543408054e-09, + "loss": 21.8865, + "step": 491820 + }, + { + "epoch": 0.9935277172881055, + "grad_norm": 11.144876480102539, + "learning_rate": 8.130946377519767e-09, + "loss": 14.0856, + "step": 491830 + }, + { + "epoch": 0.9935479179207892, + "grad_norm": 158.1072235107422, + "learning_rate": 8.11105954139635e-09, + "loss": 12.2065, + "step": 491840 + }, + { + "epoch": 0.993568118553473, + "grad_norm": 173.94834899902344, + "learning_rate": 8.091197035133836e-09, + "loss": 18.2429, + "step": 491850 + }, + { + "epoch": 0.9935883191861569, + "grad_norm": 237.55453491210938, + "learning_rate": 8.07135885882937e-09, + "loss": 15.4611, + "step": 491860 + }, + { + "epoch": 0.9936085198188407, + "grad_norm": 348.1846618652344, + "learning_rate": 8.051545012580097e-09, + "loss": 13.8497, + "step": 491870 + }, + { + "epoch": 0.9936287204515245, + "grad_norm": 171.78451538085938, + "learning_rate": 8.031755496481496e-09, + "loss": 14.8311, + "step": 491880 + }, + { + "epoch": 0.9936489210842083, + "grad_norm": 336.60260009765625, + "learning_rate": 8.011990310631269e-09, + "loss": 14.865, + "step": 491890 + }, + { + "epoch": 0.9936691217168921, + "grad_norm": 114.01586151123047, + "learning_rate": 7.992249455124889e-09, + "loss": 10.4964, + "step": 491900 + }, + { + "epoch": 0.993689322349576, + "grad_norm": 160.74249267578125, + "learning_rate": 7.972532930058396e-09, + "loss": 17.2137, + "step": 491910 + }, + { + "epoch": 0.9937095229822598, + "grad_norm": 9.658797264099121, + "learning_rate": 7.952840735528933e-09, + "loss": 13.6641, + "step": 491920 + }, + { + "epoch": 0.9937297236149436, + "grad_norm": 325.94775390625, + "learning_rate": 7.933172871631978e-09, + "loss": 13.8571, + "step": 491930 + }, + { + "epoch": 0.9937499242476274, + "grad_norm": 20.64225959777832, + "learning_rate": 7.913529338463011e-09, + "loss": 15.0212, + "step": 491940 + }, + { + "epoch": 0.9937701248803112, + "grad_norm": 7.2898640632629395, + "learning_rate": 7.89391013611751e-09, + "loss": 20.1271, + "step": 491950 + }, + { + "epoch": 0.9937903255129951, + "grad_norm": 323.5410461425781, + "learning_rate": 7.874315264692622e-09, + "loss": 15.424, + "step": 491960 + }, + { + "epoch": 0.9938105261456789, + "grad_norm": 409.5080871582031, + "learning_rate": 7.85474472428216e-09, + "loss": 15.4924, + "step": 491970 + }, + { + "epoch": 0.9938307267783627, + "grad_norm": 364.72625732421875, + "learning_rate": 7.835198514982156e-09, + "loss": 19.0092, + "step": 491980 + }, + { + "epoch": 0.9938509274110465, + "grad_norm": 66.80720520019531, + "learning_rate": 7.815676636888093e-09, + "loss": 15.9973, + "step": 491990 + }, + { + "epoch": 0.9938711280437303, + "grad_norm": 456.00714111328125, + "learning_rate": 7.796179090094891e-09, + "loss": 17.795, + "step": 492000 + }, + { + "epoch": 0.9938913286764142, + "grad_norm": 362.4913024902344, + "learning_rate": 7.776705874698032e-09, + "loss": 13.1142, + "step": 492010 + }, + { + "epoch": 0.993911529309098, + "grad_norm": 240.89476013183594, + "learning_rate": 7.757256990791328e-09, + "loss": 11.7076, + "step": 492020 + }, + { + "epoch": 0.9939317299417818, + "grad_norm": 233.6432647705078, + "learning_rate": 7.737832438470816e-09, + "loss": 16.5738, + "step": 492030 + }, + { + "epoch": 0.9939519305744656, + "grad_norm": 179.1953125, + "learning_rate": 7.718432217830307e-09, + "loss": 18.0129, + "step": 492040 + }, + { + "epoch": 0.9939721312071494, + "grad_norm": 157.79263305664062, + "learning_rate": 7.699056328964726e-09, + "loss": 17.3427, + "step": 492050 + }, + { + "epoch": 0.9939923318398333, + "grad_norm": 226.1960906982422, + "learning_rate": 7.679704771968998e-09, + "loss": 12.2909, + "step": 492060 + }, + { + "epoch": 0.9940125324725171, + "grad_norm": 145.84329223632812, + "learning_rate": 7.660377546936382e-09, + "loss": 9.1649, + "step": 492070 + }, + { + "epoch": 0.9940327331052009, + "grad_norm": 238.32235717773438, + "learning_rate": 7.641074653961244e-09, + "loss": 16.3975, + "step": 492080 + }, + { + "epoch": 0.9940529337378847, + "grad_norm": 176.8657989501953, + "learning_rate": 7.621796093138512e-09, + "loss": 10.0998, + "step": 492090 + }, + { + "epoch": 0.9940731343705684, + "grad_norm": 200.0746307373047, + "learning_rate": 7.602541864561442e-09, + "loss": 15.2308, + "step": 492100 + }, + { + "epoch": 0.9940933350032523, + "grad_norm": 132.1905517578125, + "learning_rate": 7.583311968324403e-09, + "loss": 20.6681, + "step": 492110 + }, + { + "epoch": 0.9941135356359361, + "grad_norm": 218.55520629882812, + "learning_rate": 7.564106404520654e-09, + "loss": 20.1178, + "step": 492120 + }, + { + "epoch": 0.9941337362686199, + "grad_norm": 162.3923797607422, + "learning_rate": 7.544925173243455e-09, + "loss": 13.2029, + "step": 492130 + }, + { + "epoch": 0.9941539369013037, + "grad_norm": 54.22587203979492, + "learning_rate": 7.525768274587175e-09, + "loss": 24.1705, + "step": 492140 + }, + { + "epoch": 0.9941741375339875, + "grad_norm": 0.0, + "learning_rate": 7.506635708645072e-09, + "loss": 17.7284, + "step": 492150 + }, + { + "epoch": 0.9941943381666714, + "grad_norm": 246.87582397460938, + "learning_rate": 7.487527475509848e-09, + "loss": 13.0934, + "step": 492160 + }, + { + "epoch": 0.9942145387993552, + "grad_norm": 526.3923950195312, + "learning_rate": 7.468443575274764e-09, + "loss": 23.2213, + "step": 492170 + }, + { + "epoch": 0.994234739432039, + "grad_norm": 42.60770034790039, + "learning_rate": 7.449384008033078e-09, + "loss": 24.0846, + "step": 492180 + }, + { + "epoch": 0.9942549400647228, + "grad_norm": 466.7528991699219, + "learning_rate": 7.430348773877494e-09, + "loss": 22.2213, + "step": 492190 + }, + { + "epoch": 0.9942751406974066, + "grad_norm": 131.48727416992188, + "learning_rate": 7.411337872900715e-09, + "loss": 22.6214, + "step": 492200 + }, + { + "epoch": 0.9942953413300905, + "grad_norm": 169.9774627685547, + "learning_rate": 7.392351305195999e-09, + "loss": 17.74, + "step": 492210 + }, + { + "epoch": 0.9943155419627743, + "grad_norm": 0.9770389199256897, + "learning_rate": 7.373389070854941e-09, + "loss": 15.1575, + "step": 492220 + }, + { + "epoch": 0.9943357425954581, + "grad_norm": 490.18450927734375, + "learning_rate": 7.3544511699708e-09, + "loss": 17.1803, + "step": 492230 + }, + { + "epoch": 0.9943559432281419, + "grad_norm": 353.4809875488281, + "learning_rate": 7.335537602635723e-09, + "loss": 14.9076, + "step": 492240 + }, + { + "epoch": 0.9943761438608257, + "grad_norm": 140.79559326171875, + "learning_rate": 7.3166483689413035e-09, + "loss": 18.4327, + "step": 492250 + }, + { + "epoch": 0.9943963444935096, + "grad_norm": 316.9680480957031, + "learning_rate": 7.297783468980246e-09, + "loss": 17.3414, + "step": 492260 + }, + { + "epoch": 0.9944165451261934, + "grad_norm": 220.2744903564453, + "learning_rate": 7.278942902843589e-09, + "loss": 12.5453, + "step": 492270 + }, + { + "epoch": 0.9944367457588772, + "grad_norm": 127.3188705444336, + "learning_rate": 7.26012667062459e-09, + "loss": 11.6946, + "step": 492280 + }, + { + "epoch": 0.994456946391561, + "grad_norm": 204.44300842285156, + "learning_rate": 7.241334772414288e-09, + "loss": 21.4793, + "step": 492290 + }, + { + "epoch": 0.9944771470242448, + "grad_norm": 155.9869384765625, + "learning_rate": 7.222567208303721e-09, + "loss": 22.7941, + "step": 492300 + }, + { + "epoch": 0.9944973476569287, + "grad_norm": 533.785888671875, + "learning_rate": 7.203823978384483e-09, + "loss": 32.2214, + "step": 492310 + }, + { + "epoch": 0.9945175482896125, + "grad_norm": 109.73191833496094, + "learning_rate": 7.185105082748722e-09, + "loss": 13.7931, + "step": 492320 + }, + { + "epoch": 0.9945377489222963, + "grad_norm": 230.13491821289062, + "learning_rate": 7.166410521487477e-09, + "loss": 21.6221, + "step": 492330 + }, + { + "epoch": 0.9945579495549801, + "grad_norm": 90.71044158935547, + "learning_rate": 7.14774029469123e-09, + "loss": 21.1023, + "step": 492340 + }, + { + "epoch": 0.9945781501876638, + "grad_norm": 353.1844787597656, + "learning_rate": 7.129094402451575e-09, + "loss": 19.1026, + "step": 492350 + }, + { + "epoch": 0.9945983508203476, + "grad_norm": 282.05908203125, + "learning_rate": 7.11047284485844e-09, + "loss": 15.9006, + "step": 492360 + }, + { + "epoch": 0.9946185514530315, + "grad_norm": 469.705810546875, + "learning_rate": 7.0918756220039745e-09, + "loss": 22.5142, + "step": 492370 + }, + { + "epoch": 0.9946387520857153, + "grad_norm": 214.02598571777344, + "learning_rate": 7.073302733978104e-09, + "loss": 17.0717, + "step": 492380 + }, + { + "epoch": 0.9946589527183991, + "grad_norm": 288.0621337890625, + "learning_rate": 7.054754180871315e-09, + "loss": 7.8111, + "step": 492390 + }, + { + "epoch": 0.9946791533510829, + "grad_norm": 222.0833282470703, + "learning_rate": 7.036229962774088e-09, + "loss": 19.9986, + "step": 492400 + }, + { + "epoch": 0.9946993539837667, + "grad_norm": 232.6152801513672, + "learning_rate": 7.0177300797763526e-09, + "loss": 33.8875, + "step": 492410 + }, + { + "epoch": 0.9947195546164506, + "grad_norm": 500.5577392578125, + "learning_rate": 6.999254531969146e-09, + "loss": 18.8919, + "step": 492420 + }, + { + "epoch": 0.9947397552491344, + "grad_norm": 446.86334228515625, + "learning_rate": 6.980803319441842e-09, + "loss": 33.4219, + "step": 492430 + }, + { + "epoch": 0.9947599558818182, + "grad_norm": 150.64266967773438, + "learning_rate": 6.962376442284368e-09, + "loss": 25.4434, + "step": 492440 + }, + { + "epoch": 0.994780156514502, + "grad_norm": 437.434326171875, + "learning_rate": 6.943973900586654e-09, + "loss": 36.1648, + "step": 492450 + }, + { + "epoch": 0.9948003571471858, + "grad_norm": 332.0025329589844, + "learning_rate": 6.925595694438625e-09, + "loss": 15.5303, + "step": 492460 + }, + { + "epoch": 0.9948205577798697, + "grad_norm": 365.9385070800781, + "learning_rate": 6.9072418239296556e-09, + "loss": 38.4911, + "step": 492470 + }, + { + "epoch": 0.9948407584125535, + "grad_norm": 492.3500671386719, + "learning_rate": 6.888912289149119e-09, + "loss": 19.1304, + "step": 492480 + }, + { + "epoch": 0.9948609590452373, + "grad_norm": 135.6409454345703, + "learning_rate": 6.8706070901863876e-09, + "loss": 34.5697, + "step": 492490 + }, + { + "epoch": 0.9948811596779211, + "grad_norm": 89.8146743774414, + "learning_rate": 6.852326227130835e-09, + "loss": 12.0278, + "step": 492500 + }, + { + "epoch": 0.9949013603106049, + "grad_norm": 399.4657897949219, + "learning_rate": 6.834069700071277e-09, + "loss": 18.217, + "step": 492510 + }, + { + "epoch": 0.9949215609432888, + "grad_norm": 17.456974029541016, + "learning_rate": 6.81583750909709e-09, + "loss": 8.5865, + "step": 492520 + }, + { + "epoch": 0.9949417615759726, + "grad_norm": 166.68055725097656, + "learning_rate": 6.797629654296533e-09, + "loss": 15.6642, + "step": 492530 + }, + { + "epoch": 0.9949619622086564, + "grad_norm": 450.2313537597656, + "learning_rate": 6.779446135758982e-09, + "loss": 22.3462, + "step": 492540 + }, + { + "epoch": 0.9949821628413402, + "grad_norm": 264.4354553222656, + "learning_rate": 6.761286953572699e-09, + "loss": 15.3926, + "step": 492550 + }, + { + "epoch": 0.995002363474024, + "grad_norm": 639.0560913085938, + "learning_rate": 6.7431521078265e-09, + "loss": 24.994, + "step": 492560 + }, + { + "epoch": 0.9950225641067079, + "grad_norm": 459.2167053222656, + "learning_rate": 6.725041598608651e-09, + "loss": 19.193, + "step": 492570 + }, + { + "epoch": 0.9950427647393917, + "grad_norm": 2859.9921875, + "learning_rate": 6.706955426006856e-09, + "loss": 20.0577, + "step": 492580 + }, + { + "epoch": 0.9950629653720755, + "grad_norm": 73.68702697753906, + "learning_rate": 6.688893590109935e-09, + "loss": 14.732, + "step": 492590 + }, + { + "epoch": 0.9950831660047593, + "grad_norm": 207.21372985839844, + "learning_rate": 6.670856091006151e-09, + "loss": 14.8194, + "step": 492600 + }, + { + "epoch": 0.995103366637443, + "grad_norm": 72.70738983154297, + "learning_rate": 6.652842928782655e-09, + "loss": 21.3107, + "step": 492610 + }, + { + "epoch": 0.9951235672701269, + "grad_norm": 179.05630493164062, + "learning_rate": 6.63485410352771e-09, + "loss": 23.2535, + "step": 492620 + }, + { + "epoch": 0.9951437679028107, + "grad_norm": 175.2220458984375, + "learning_rate": 6.61688961532847e-09, + "loss": 7.077, + "step": 492630 + }, + { + "epoch": 0.9951639685354945, + "grad_norm": 367.4979248046875, + "learning_rate": 6.598949464273196e-09, + "loss": 17.3254, + "step": 492640 + }, + { + "epoch": 0.9951841691681783, + "grad_norm": 331.68743896484375, + "learning_rate": 6.581033650449043e-09, + "loss": 15.688, + "step": 492650 + }, + { + "epoch": 0.9952043698008621, + "grad_norm": 376.92083740234375, + "learning_rate": 6.563142173943715e-09, + "loss": 17.2048, + "step": 492660 + }, + { + "epoch": 0.995224570433546, + "grad_norm": 170.47386169433594, + "learning_rate": 6.545275034843257e-09, + "loss": 12.9788, + "step": 492670 + }, + { + "epoch": 0.9952447710662298, + "grad_norm": 152.96368408203125, + "learning_rate": 6.527432233235931e-09, + "loss": 13.7648, + "step": 492680 + }, + { + "epoch": 0.9952649716989136, + "grad_norm": 268.8824462890625, + "learning_rate": 6.509613769207778e-09, + "loss": 27.7853, + "step": 492690 + }, + { + "epoch": 0.9952851723315974, + "grad_norm": 60.152130126953125, + "learning_rate": 6.491819642846509e-09, + "loss": 18.511, + "step": 492700 + }, + { + "epoch": 0.9953053729642812, + "grad_norm": 62.471656799316406, + "learning_rate": 6.4740498542387174e-09, + "loss": 6.4525, + "step": 492710 + }, + { + "epoch": 0.995325573596965, + "grad_norm": 22.64701271057129, + "learning_rate": 6.456304403470448e-09, + "loss": 15.0782, + "step": 492720 + }, + { + "epoch": 0.9953457742296489, + "grad_norm": 509.4891052246094, + "learning_rate": 6.438583290628298e-09, + "loss": 20.1131, + "step": 492730 + }, + { + "epoch": 0.9953659748623327, + "grad_norm": 164.00579833984375, + "learning_rate": 6.420886515799418e-09, + "loss": 23.3696, + "step": 492740 + }, + { + "epoch": 0.9953861754950165, + "grad_norm": 296.6884765625, + "learning_rate": 6.403214079069298e-09, + "loss": 21.8988, + "step": 492750 + }, + { + "epoch": 0.9954063761277003, + "grad_norm": 104.28278350830078, + "learning_rate": 6.385565980523978e-09, + "loss": 8.4838, + "step": 492760 + }, + { + "epoch": 0.9954265767603842, + "grad_norm": 223.9442901611328, + "learning_rate": 6.3679422202495015e-09, + "loss": 13.4782, + "step": 492770 + }, + { + "epoch": 0.995446777393068, + "grad_norm": 286.3375549316406, + "learning_rate": 6.350342798332465e-09, + "loss": 20.3432, + "step": 492780 + }, + { + "epoch": 0.9954669780257518, + "grad_norm": 296.218505859375, + "learning_rate": 6.332767714858357e-09, + "loss": 21.4292, + "step": 492790 + }, + { + "epoch": 0.9954871786584356, + "grad_norm": 316.03076171875, + "learning_rate": 6.315216969912663e-09, + "loss": 9.5164, + "step": 492800 + }, + { + "epoch": 0.9955073792911194, + "grad_norm": 441.8782958984375, + "learning_rate": 6.2976905635803165e-09, + "loss": 22.2427, + "step": 492810 + }, + { + "epoch": 0.9955275799238033, + "grad_norm": 453.11181640625, + "learning_rate": 6.280188495947914e-09, + "loss": 18.2984, + "step": 492820 + }, + { + "epoch": 0.9955477805564871, + "grad_norm": 0.0, + "learning_rate": 6.262710767100388e-09, + "loss": 18.1285, + "step": 492830 + }, + { + "epoch": 0.9955679811891709, + "grad_norm": 307.5326843261719, + "learning_rate": 6.245257377122116e-09, + "loss": 23.1288, + "step": 492840 + }, + { + "epoch": 0.9955881818218547, + "grad_norm": 116.1676254272461, + "learning_rate": 6.227828326099139e-09, + "loss": 16.4471, + "step": 492850 + }, + { + "epoch": 0.9956083824545384, + "grad_norm": 195.77703857421875, + "learning_rate": 6.21042361411639e-09, + "loss": 12.6839, + "step": 492860 + }, + { + "epoch": 0.9956285830872222, + "grad_norm": 276.8194885253906, + "learning_rate": 6.19304324125769e-09, + "loss": 15.2534, + "step": 492870 + }, + { + "epoch": 0.9956487837199061, + "grad_norm": 75.78239440917969, + "learning_rate": 6.175687207609082e-09, + "loss": 13.7528, + "step": 492880 + }, + { + "epoch": 0.9956689843525899, + "grad_norm": 437.89654541015625, + "learning_rate": 6.1583555132543886e-09, + "loss": 25.4995, + "step": 492890 + }, + { + "epoch": 0.9956891849852737, + "grad_norm": 154.37973022460938, + "learning_rate": 6.141048158277429e-09, + "loss": 11.9446, + "step": 492900 + }, + { + "epoch": 0.9957093856179575, + "grad_norm": 180.64698791503906, + "learning_rate": 6.123765142764249e-09, + "loss": 11.4718, + "step": 492910 + }, + { + "epoch": 0.9957295862506413, + "grad_norm": 163.42579650878906, + "learning_rate": 6.106506466797557e-09, + "loss": 11.0536, + "step": 492920 + }, + { + "epoch": 0.9957497868833252, + "grad_norm": 281.0152282714844, + "learning_rate": 6.0892721304622874e-09, + "loss": 21.0349, + "step": 492930 + }, + { + "epoch": 0.995769987516009, + "grad_norm": 193.30465698242188, + "learning_rate": 6.0720621338422606e-09, + "loss": 14.8078, + "step": 492940 + }, + { + "epoch": 0.9957901881486928, + "grad_norm": 271.8005065917969, + "learning_rate": 6.054876477021299e-09, + "loss": 10.3514, + "step": 492950 + }, + { + "epoch": 0.9958103887813766, + "grad_norm": 154.5194549560547, + "learning_rate": 6.037715160083224e-09, + "loss": 12.1968, + "step": 492960 + }, + { + "epoch": 0.9958305894140604, + "grad_norm": 431.3895263671875, + "learning_rate": 6.020578183111303e-09, + "loss": 14.8034, + "step": 492970 + }, + { + "epoch": 0.9958507900467443, + "grad_norm": 1497.735595703125, + "learning_rate": 6.003465546189358e-09, + "loss": 38.3837, + "step": 492980 + }, + { + "epoch": 0.9958709906794281, + "grad_norm": 583.8172607421875, + "learning_rate": 5.98637724940121e-09, + "loss": 20.6984, + "step": 492990 + }, + { + "epoch": 0.9958911913121119, + "grad_norm": 143.6290740966797, + "learning_rate": 5.969313292830126e-09, + "loss": 9.2027, + "step": 493000 + }, + { + "epoch": 0.9959113919447957, + "grad_norm": 93.39070892333984, + "learning_rate": 5.952273676558262e-09, + "loss": 9.86, + "step": 493010 + }, + { + "epoch": 0.9959315925774795, + "grad_norm": 406.0321960449219, + "learning_rate": 5.935258400669442e-09, + "loss": 12.2395, + "step": 493020 + }, + { + "epoch": 0.9959517932101634, + "grad_norm": 223.07144165039062, + "learning_rate": 5.918267465246374e-09, + "loss": 13.2936, + "step": 493030 + }, + { + "epoch": 0.9959719938428472, + "grad_norm": 258.6623229980469, + "learning_rate": 5.901300870372329e-09, + "loss": 20.8765, + "step": 493040 + }, + { + "epoch": 0.995992194475531, + "grad_norm": 185.70497131347656, + "learning_rate": 5.8843586161289045e-09, + "loss": 9.3024, + "step": 493050 + }, + { + "epoch": 0.9960123951082148, + "grad_norm": 667.3585205078125, + "learning_rate": 5.867440702599925e-09, + "loss": 13.7881, + "step": 493060 + }, + { + "epoch": 0.9960325957408986, + "grad_norm": 717.0975952148438, + "learning_rate": 5.850547129867546e-09, + "loss": 30.2538, + "step": 493070 + }, + { + "epoch": 0.9960527963735825, + "grad_norm": 185.55636596679688, + "learning_rate": 5.833677898013368e-09, + "loss": 21.8775, + "step": 493080 + }, + { + "epoch": 0.9960729970062663, + "grad_norm": 380.11956787109375, + "learning_rate": 5.816833007120659e-09, + "loss": 19.064, + "step": 493090 + }, + { + "epoch": 0.9960931976389501, + "grad_norm": 218.5950164794922, + "learning_rate": 5.800012457270466e-09, + "loss": 22.8577, + "step": 493100 + }, + { + "epoch": 0.9961133982716339, + "grad_norm": 224.53257751464844, + "learning_rate": 5.783216248545498e-09, + "loss": 12.1389, + "step": 493110 + }, + { + "epoch": 0.9961335989043176, + "grad_norm": 290.1208801269531, + "learning_rate": 5.766444381027358e-09, + "loss": 18.0064, + "step": 493120 + }, + { + "epoch": 0.9961537995370014, + "grad_norm": 335.3583984375, + "learning_rate": 5.749696854798204e-09, + "loss": 15.1208, + "step": 493130 + }, + { + "epoch": 0.9961740001696853, + "grad_norm": 4.631486892700195, + "learning_rate": 5.732973669939079e-09, + "loss": 16.424, + "step": 493140 + }, + { + "epoch": 0.9961942008023691, + "grad_norm": 425.11138916015625, + "learning_rate": 5.716274826531587e-09, + "loss": 19.8227, + "step": 493150 + }, + { + "epoch": 0.9962144014350529, + "grad_norm": 175.2985382080078, + "learning_rate": 5.699600324657328e-09, + "loss": 19.6608, + "step": 493160 + }, + { + "epoch": 0.9962346020677367, + "grad_norm": 163.50164794921875, + "learning_rate": 5.682950164397349e-09, + "loss": 11.5084, + "step": 493170 + }, + { + "epoch": 0.9962548027004205, + "grad_norm": 240.91807556152344, + "learning_rate": 5.6663243458332514e-09, + "loss": 19.7767, + "step": 493180 + }, + { + "epoch": 0.9962750033331044, + "grad_norm": 85.84590911865234, + "learning_rate": 5.649722869044971e-09, + "loss": 31.0176, + "step": 493190 + }, + { + "epoch": 0.9962952039657882, + "grad_norm": 253.2265167236328, + "learning_rate": 5.633145734114665e-09, + "loss": 19.1726, + "step": 493200 + }, + { + "epoch": 0.996315404598472, + "grad_norm": 226.51393127441406, + "learning_rate": 5.616592941123378e-09, + "loss": 24.0137, + "step": 493210 + }, + { + "epoch": 0.9963356052311558, + "grad_norm": 312.4957275390625, + "learning_rate": 5.600064490149937e-09, + "loss": 16.8343, + "step": 493220 + }, + { + "epoch": 0.9963558058638396, + "grad_norm": 67.92218017578125, + "learning_rate": 5.583560381276498e-09, + "loss": 21.5998, + "step": 493230 + }, + { + "epoch": 0.9963760064965235, + "grad_norm": 342.03173828125, + "learning_rate": 5.5670806145835536e-09, + "loss": 32.474, + "step": 493240 + }, + { + "epoch": 0.9963962071292073, + "grad_norm": 272.71844482421875, + "learning_rate": 5.5506251901504825e-09, + "loss": 19.1489, + "step": 493250 + }, + { + "epoch": 0.9964164077618911, + "grad_norm": 283.71484375, + "learning_rate": 5.534194108057778e-09, + "loss": 6.9455, + "step": 493260 + }, + { + "epoch": 0.9964366083945749, + "grad_norm": 244.4901123046875, + "learning_rate": 5.517787368385375e-09, + "loss": 12.2465, + "step": 493270 + }, + { + "epoch": 0.9964568090272587, + "grad_norm": 431.8292541503906, + "learning_rate": 5.501404971214319e-09, + "loss": 14.8472, + "step": 493280 + }, + { + "epoch": 0.9964770096599426, + "grad_norm": 369.4844665527344, + "learning_rate": 5.485046916622883e-09, + "loss": 9.1092, + "step": 493290 + }, + { + "epoch": 0.9964972102926264, + "grad_norm": 232.2456817626953, + "learning_rate": 5.468713204692111e-09, + "loss": 8.0756, + "step": 493300 + }, + { + "epoch": 0.9965174109253102, + "grad_norm": 252.66551208496094, + "learning_rate": 5.45240383550083e-09, + "loss": 15.236, + "step": 493310 + }, + { + "epoch": 0.996537611557994, + "grad_norm": 471.36676025390625, + "learning_rate": 5.436118809128421e-09, + "loss": 21.6559, + "step": 493320 + }, + { + "epoch": 0.9965578121906778, + "grad_norm": 165.3039093017578, + "learning_rate": 5.419858125655375e-09, + "loss": 11.0622, + "step": 493330 + }, + { + "epoch": 0.9965780128233617, + "grad_norm": 53.91355895996094, + "learning_rate": 5.403621785159407e-09, + "loss": 17.1724, + "step": 493340 + }, + { + "epoch": 0.9965982134560455, + "grad_norm": 80.09151458740234, + "learning_rate": 5.38740978772101e-09, + "loss": 11.7053, + "step": 493350 + }, + { + "epoch": 0.9966184140887293, + "grad_norm": 442.5940856933594, + "learning_rate": 5.371222133418452e-09, + "loss": 15.5027, + "step": 493360 + }, + { + "epoch": 0.9966386147214131, + "grad_norm": 111.42676544189453, + "learning_rate": 5.355058822330561e-09, + "loss": 16.7894, + "step": 493370 + }, + { + "epoch": 0.9966588153540968, + "grad_norm": 412.7138366699219, + "learning_rate": 5.338919854536162e-09, + "loss": 11.4344, + "step": 493380 + }, + { + "epoch": 0.9966790159867807, + "grad_norm": 261.85955810546875, + "learning_rate": 5.322805230114636e-09, + "loss": 17.3089, + "step": 493390 + }, + { + "epoch": 0.9966992166194645, + "grad_norm": 374.5972900390625, + "learning_rate": 5.306714949143699e-09, + "loss": 11.36, + "step": 493400 + }, + { + "epoch": 0.9967194172521483, + "grad_norm": 392.8015441894531, + "learning_rate": 5.290649011702176e-09, + "loss": 27.3323, + "step": 493410 + }, + { + "epoch": 0.9967396178848321, + "grad_norm": 380.3060607910156, + "learning_rate": 5.2746074178683385e-09, + "loss": 16.8733, + "step": 493420 + }, + { + "epoch": 0.9967598185175159, + "grad_norm": 91.70571899414062, + "learning_rate": 5.258590167719901e-09, + "loss": 20.3022, + "step": 493430 + }, + { + "epoch": 0.9967800191501998, + "grad_norm": 245.89756774902344, + "learning_rate": 5.242597261335691e-09, + "loss": 20.0283, + "step": 493440 + }, + { + "epoch": 0.9968002197828836, + "grad_norm": 119.63677978515625, + "learning_rate": 5.226628698792868e-09, + "loss": 16.8436, + "step": 493450 + }, + { + "epoch": 0.9968204204155674, + "grad_norm": 456.9029541015625, + "learning_rate": 5.210684480169703e-09, + "loss": 29.493, + "step": 493460 + }, + { + "epoch": 0.9968406210482512, + "grad_norm": 32.520992279052734, + "learning_rate": 5.1947646055444665e-09, + "loss": 19.6443, + "step": 493470 + }, + { + "epoch": 0.996860821680935, + "grad_norm": 333.980224609375, + "learning_rate": 5.178869074993209e-09, + "loss": 19.9943, + "step": 493480 + }, + { + "epoch": 0.9968810223136189, + "grad_norm": 452.90362548828125, + "learning_rate": 5.162997888595312e-09, + "loss": 17.3427, + "step": 493490 + }, + { + "epoch": 0.9969012229463027, + "grad_norm": 424.89251708984375, + "learning_rate": 5.147151046426824e-09, + "loss": 9.6077, + "step": 493500 + }, + { + "epoch": 0.9969214235789865, + "grad_norm": 38.030208587646484, + "learning_rate": 5.1313285485649064e-09, + "loss": 10.3104, + "step": 493510 + }, + { + "epoch": 0.9969416242116703, + "grad_norm": 1094.377685546875, + "learning_rate": 5.115530395087276e-09, + "loss": 17.6154, + "step": 493520 + }, + { + "epoch": 0.9969618248443541, + "grad_norm": 9.166926383972168, + "learning_rate": 5.099756586071092e-09, + "loss": 11.0072, + "step": 493530 + }, + { + "epoch": 0.996982025477038, + "grad_norm": 175.388671875, + "learning_rate": 5.084007121592405e-09, + "loss": 26.7231, + "step": 493540 + }, + { + "epoch": 0.9970022261097218, + "grad_norm": 235.22972106933594, + "learning_rate": 5.06828200172893e-09, + "loss": 13.2122, + "step": 493550 + }, + { + "epoch": 0.9970224267424056, + "grad_norm": 297.00189208984375, + "learning_rate": 5.052581226556719e-09, + "loss": 18.2604, + "step": 493560 + }, + { + "epoch": 0.9970426273750894, + "grad_norm": 536.833740234375, + "learning_rate": 5.036904796152375e-09, + "loss": 28.2438, + "step": 493570 + }, + { + "epoch": 0.9970628280077732, + "grad_norm": 659.442626953125, + "learning_rate": 5.02125271059195e-09, + "loss": 26.9861, + "step": 493580 + }, + { + "epoch": 0.9970830286404571, + "grad_norm": 181.55291748046875, + "learning_rate": 5.0056249699526046e-09, + "loss": 24.303, + "step": 493590 + }, + { + "epoch": 0.9971032292731409, + "grad_norm": 251.4856719970703, + "learning_rate": 4.990021574309834e-09, + "loss": 13.7942, + "step": 493600 + }, + { + "epoch": 0.9971234299058247, + "grad_norm": 285.0186462402344, + "learning_rate": 4.9744425237396865e-09, + "loss": 11.717, + "step": 493610 + }, + { + "epoch": 0.9971436305385085, + "grad_norm": 446.0801086425781, + "learning_rate": 4.95888781831877e-09, + "loss": 16.6561, + "step": 493620 + }, + { + "epoch": 0.9971638311711922, + "grad_norm": 256.6576232910156, + "learning_rate": 4.9433574581220225e-09, + "loss": 10.5709, + "step": 493630 + }, + { + "epoch": 0.997184031803876, + "grad_norm": 123.8934326171875, + "learning_rate": 4.927851443225495e-09, + "loss": 18.5653, + "step": 493640 + }, + { + "epoch": 0.9972042324365599, + "grad_norm": 574.6906127929688, + "learning_rate": 4.9123697737052386e-09, + "loss": 14.9605, + "step": 493650 + }, + { + "epoch": 0.9972244330692437, + "grad_norm": 628.70849609375, + "learning_rate": 4.896912449635638e-09, + "loss": 20.5991, + "step": 493660 + }, + { + "epoch": 0.9972446337019275, + "grad_norm": 113.13594818115234, + "learning_rate": 4.881479471093298e-09, + "loss": 28.9503, + "step": 493670 + }, + { + "epoch": 0.9972648343346113, + "grad_norm": 186.0068817138672, + "learning_rate": 4.866070838152049e-09, + "loss": 13.5168, + "step": 493680 + }, + { + "epoch": 0.9972850349672951, + "grad_norm": 272.0819091796875, + "learning_rate": 4.850686550888495e-09, + "loss": 22.2566, + "step": 493690 + }, + { + "epoch": 0.997305235599979, + "grad_norm": 607.2677612304688, + "learning_rate": 4.835326609376468e-09, + "loss": 16.489, + "step": 493700 + }, + { + "epoch": 0.9973254362326628, + "grad_norm": 303.22735595703125, + "learning_rate": 4.81999101369146e-09, + "loss": 22.1093, + "step": 493710 + }, + { + "epoch": 0.9973456368653466, + "grad_norm": 252.09075927734375, + "learning_rate": 4.804679763907305e-09, + "loss": 17.7456, + "step": 493720 + }, + { + "epoch": 0.9973658374980304, + "grad_norm": 309.9205017089844, + "learning_rate": 4.789392860100051e-09, + "loss": 27.9882, + "step": 493730 + }, + { + "epoch": 0.9973860381307142, + "grad_norm": 188.27894592285156, + "learning_rate": 4.774130302342972e-09, + "loss": 10.7294, + "step": 493740 + }, + { + "epoch": 0.9974062387633981, + "grad_norm": 186.87384033203125, + "learning_rate": 4.758892090711009e-09, + "loss": 16.9977, + "step": 493750 + }, + { + "epoch": 0.9974264393960819, + "grad_norm": 1355.3223876953125, + "learning_rate": 4.743678225278547e-09, + "loss": 22.0995, + "step": 493760 + }, + { + "epoch": 0.9974466400287657, + "grad_norm": 146.46865844726562, + "learning_rate": 4.7284887061194165e-09, + "loss": 10.3052, + "step": 493770 + }, + { + "epoch": 0.9974668406614495, + "grad_norm": 202.4141387939453, + "learning_rate": 4.713323533308001e-09, + "loss": 14.6178, + "step": 493780 + }, + { + "epoch": 0.9974870412941333, + "grad_norm": 233.77516174316406, + "learning_rate": 4.6981827069181305e-09, + "loss": 26.2586, + "step": 493790 + }, + { + "epoch": 0.9975072419268172, + "grad_norm": 485.7827453613281, + "learning_rate": 4.683066227023081e-09, + "loss": 12.0104, + "step": 493800 + }, + { + "epoch": 0.997527442559501, + "grad_norm": 111.74791717529297, + "learning_rate": 4.667974093696681e-09, + "loss": 11.123, + "step": 493810 + }, + { + "epoch": 0.9975476431921848, + "grad_norm": 210.8526153564453, + "learning_rate": 4.6529063070133165e-09, + "loss": 14.3363, + "step": 493820 + }, + { + "epoch": 0.9975678438248686, + "grad_norm": 333.481689453125, + "learning_rate": 4.637862867045151e-09, + "loss": 16.3601, + "step": 493830 + }, + { + "epoch": 0.9975880444575524, + "grad_norm": 334.3294677734375, + "learning_rate": 4.6228437738665695e-09, + "loss": 11.7012, + "step": 493840 + }, + { + "epoch": 0.9976082450902363, + "grad_norm": 431.4281311035156, + "learning_rate": 4.607849027550293e-09, + "loss": 14.8651, + "step": 493850 + }, + { + "epoch": 0.9976284457229201, + "grad_norm": 367.46844482421875, + "learning_rate": 4.592878628169595e-09, + "loss": 27.9158, + "step": 493860 + }, + { + "epoch": 0.9976486463556039, + "grad_norm": 366.1986083984375, + "learning_rate": 4.577932575797195e-09, + "loss": 20.3304, + "step": 493870 + }, + { + "epoch": 0.9976688469882877, + "grad_norm": 816.3501586914062, + "learning_rate": 4.5630108705063684e-09, + "loss": 18.9362, + "step": 493880 + }, + { + "epoch": 0.9976890476209714, + "grad_norm": 380.4483337402344, + "learning_rate": 4.5481135123692786e-09, + "loss": 17.446, + "step": 493890 + }, + { + "epoch": 0.9977092482536553, + "grad_norm": 488.8197326660156, + "learning_rate": 4.533240501459202e-09, + "loss": 17.5896, + "step": 493900 + }, + { + "epoch": 0.9977294488863391, + "grad_norm": 159.9177703857422, + "learning_rate": 4.518391837847747e-09, + "loss": 10.0191, + "step": 493910 + }, + { + "epoch": 0.9977496495190229, + "grad_norm": 377.70111083984375, + "learning_rate": 4.503567521608187e-09, + "loss": 18.2045, + "step": 493920 + }, + { + "epoch": 0.9977698501517067, + "grad_norm": 66.71868133544922, + "learning_rate": 4.4887675528121345e-09, + "loss": 16.9787, + "step": 493930 + }, + { + "epoch": 0.9977900507843905, + "grad_norm": 175.28021240234375, + "learning_rate": 4.473991931531752e-09, + "loss": 24.261, + "step": 493940 + }, + { + "epoch": 0.9978102514170744, + "grad_norm": 162.11776733398438, + "learning_rate": 4.459240657839203e-09, + "loss": 26.007, + "step": 493950 + }, + { + "epoch": 0.9978304520497582, + "grad_norm": 235.6828155517578, + "learning_rate": 4.4445137318072096e-09, + "loss": 19.5397, + "step": 493960 + }, + { + "epoch": 0.997850652682442, + "grad_norm": 12.251602172851562, + "learning_rate": 4.429811153505714e-09, + "loss": 6.0647, + "step": 493970 + }, + { + "epoch": 0.9978708533151258, + "grad_norm": 334.3155517578125, + "learning_rate": 4.415132923007992e-09, + "loss": 14.3883, + "step": 493980 + }, + { + "epoch": 0.9978910539478096, + "grad_norm": 18.55206298828125, + "learning_rate": 4.400479040385098e-09, + "loss": 12.8171, + "step": 493990 + }, + { + "epoch": 0.9979112545804935, + "grad_norm": 2870.7685546875, + "learning_rate": 4.385849505708084e-09, + "loss": 40.0994, + "step": 494000 + }, + { + "epoch": 0.9979314552131773, + "grad_norm": 237.31381225585938, + "learning_rate": 4.3712443190491175e-09, + "loss": 15.5543, + "step": 494010 + }, + { + "epoch": 0.9979516558458611, + "grad_norm": 169.72447204589844, + "learning_rate": 4.3566634804781405e-09, + "loss": 15.9001, + "step": 494020 + }, + { + "epoch": 0.9979718564785449, + "grad_norm": 489.4140319824219, + "learning_rate": 4.342106990067319e-09, + "loss": 13.7482, + "step": 494030 + }, + { + "epoch": 0.9979920571112287, + "grad_norm": 262.1561279296875, + "learning_rate": 4.327574847886595e-09, + "loss": 16.6041, + "step": 494040 + }, + { + "epoch": 0.9980122577439126, + "grad_norm": 244.12388610839844, + "learning_rate": 4.313067054008135e-09, + "loss": 14.7642, + "step": 494050 + }, + { + "epoch": 0.9980324583765964, + "grad_norm": 620.3541259765625, + "learning_rate": 4.298583608501328e-09, + "loss": 25.66, + "step": 494060 + }, + { + "epoch": 0.9980526590092802, + "grad_norm": 1296.5672607421875, + "learning_rate": 4.284124511437782e-09, + "loss": 34.4936, + "step": 494070 + }, + { + "epoch": 0.998072859641964, + "grad_norm": 182.0353240966797, + "learning_rate": 4.269689762886886e-09, + "loss": 22.3797, + "step": 494080 + }, + { + "epoch": 0.9980930602746478, + "grad_norm": 346.6893615722656, + "learning_rate": 4.2552793629202506e-09, + "loss": 19.1555, + "step": 494090 + }, + { + "epoch": 0.9981132609073317, + "grad_norm": 71.67774200439453, + "learning_rate": 4.2408933116072635e-09, + "loss": 14.7107, + "step": 494100 + }, + { + "epoch": 0.9981334615400155, + "grad_norm": 197.77346801757812, + "learning_rate": 4.22653160901787e-09, + "loss": 17.0342, + "step": 494110 + }, + { + "epoch": 0.9981536621726993, + "grad_norm": 79.39214324951172, + "learning_rate": 4.212194255222568e-09, + "loss": 5.2157, + "step": 494120 + }, + { + "epoch": 0.9981738628053831, + "grad_norm": 181.61233520507812, + "learning_rate": 4.197881250291302e-09, + "loss": 31.528, + "step": 494130 + }, + { + "epoch": 0.9981940634380668, + "grad_norm": 309.17108154296875, + "learning_rate": 4.183592594294017e-09, + "loss": 12.9723, + "step": 494140 + }, + { + "epoch": 0.9982142640707506, + "grad_norm": 405.5485534667969, + "learning_rate": 4.169328287299545e-09, + "loss": 22.8086, + "step": 494150 + }, + { + "epoch": 0.9982344647034345, + "grad_norm": 442.8080139160156, + "learning_rate": 4.155088329377832e-09, + "loss": 26.5508, + "step": 494160 + }, + { + "epoch": 0.9982546653361183, + "grad_norm": 87.66561889648438, + "learning_rate": 4.140872720598266e-09, + "loss": 16.6031, + "step": 494170 + }, + { + "epoch": 0.9982748659688021, + "grad_norm": 430.75762939453125, + "learning_rate": 4.126681461030236e-09, + "loss": 12.1058, + "step": 494180 + }, + { + "epoch": 0.9982950666014859, + "grad_norm": 376.21795654296875, + "learning_rate": 4.11251455074313e-09, + "loss": 17.4234, + "step": 494190 + }, + { + "epoch": 0.9983152672341697, + "grad_norm": 403.6354675292969, + "learning_rate": 4.098371989805227e-09, + "loss": 25.0719, + "step": 494200 + }, + { + "epoch": 0.9983354678668536, + "grad_norm": 51.054805755615234, + "learning_rate": 4.0842537782859185e-09, + "loss": 13.4751, + "step": 494210 + }, + { + "epoch": 0.9983556684995374, + "grad_norm": 509.38482666015625, + "learning_rate": 4.07015991625459e-09, + "loss": 9.6944, + "step": 494220 + }, + { + "epoch": 0.9983758691322212, + "grad_norm": 167.8269500732422, + "learning_rate": 4.056090403778967e-09, + "loss": 27.3016, + "step": 494230 + }, + { + "epoch": 0.998396069764905, + "grad_norm": 382.88677978515625, + "learning_rate": 4.042045240927883e-09, + "loss": 13.0717, + "step": 494240 + }, + { + "epoch": 0.9984162703975888, + "grad_norm": 26.21680450439453, + "learning_rate": 4.028024427770172e-09, + "loss": 8.1485, + "step": 494250 + }, + { + "epoch": 0.9984364710302727, + "grad_norm": 202.1404266357422, + "learning_rate": 4.014027964373557e-09, + "loss": 10.6287, + "step": 494260 + }, + { + "epoch": 0.9984566716629565, + "grad_norm": 150.67433166503906, + "learning_rate": 4.000055850807427e-09, + "loss": 12.2298, + "step": 494270 + }, + { + "epoch": 0.9984768722956403, + "grad_norm": 296.3301696777344, + "learning_rate": 3.986108087138396e-09, + "loss": 13.9021, + "step": 494280 + }, + { + "epoch": 0.9984970729283241, + "grad_norm": 176.92782592773438, + "learning_rate": 3.972184673435297e-09, + "loss": 16.7603, + "step": 494290 + }, + { + "epoch": 0.998517273561008, + "grad_norm": 236.4563751220703, + "learning_rate": 3.9582856097658554e-09, + "loss": 18.8897, + "step": 494300 + }, + { + "epoch": 0.9985374741936918, + "grad_norm": 274.7383728027344, + "learning_rate": 3.944410896197792e-09, + "loss": 19.7265, + "step": 494310 + }, + { + "epoch": 0.9985576748263756, + "grad_norm": 563.5479736328125, + "learning_rate": 3.930560532798832e-09, + "loss": 24.5245, + "step": 494320 + }, + { + "epoch": 0.9985778754590594, + "grad_norm": 124.28654479980469, + "learning_rate": 3.9167345196361454e-09, + "loss": 17.0896, + "step": 494330 + }, + { + "epoch": 0.9985980760917432, + "grad_norm": 58.03425216674805, + "learning_rate": 3.902932856777453e-09, + "loss": 10.4322, + "step": 494340 + }, + { + "epoch": 0.998618276724427, + "grad_norm": 260.0706481933594, + "learning_rate": 3.889155544289924e-09, + "loss": 19.126, + "step": 494350 + }, + { + "epoch": 0.9986384773571109, + "grad_norm": 160.23228454589844, + "learning_rate": 3.8754025822407285e-09, + "loss": 22.1344, + "step": 494360 + }, + { + "epoch": 0.9986586779897947, + "grad_norm": 319.59918212890625, + "learning_rate": 3.861673970697033e-09, + "loss": 13.2886, + "step": 494370 + }, + { + "epoch": 0.9986788786224785, + "grad_norm": 209.1680145263672, + "learning_rate": 3.847969709725452e-09, + "loss": 18.2661, + "step": 494380 + }, + { + "epoch": 0.9986990792551623, + "grad_norm": 114.0342788696289, + "learning_rate": 3.834289799392598e-09, + "loss": 24.9581, + "step": 494390 + }, + { + "epoch": 0.998719279887846, + "grad_norm": 160.35536193847656, + "learning_rate": 3.820634239765642e-09, + "loss": 15.435, + "step": 494400 + }, + { + "epoch": 0.9987394805205299, + "grad_norm": 237.72604370117188, + "learning_rate": 3.8070030309111935e-09, + "loss": 11.5302, + "step": 494410 + }, + { + "epoch": 0.9987596811532137, + "grad_norm": 486.2515869140625, + "learning_rate": 3.793396172895314e-09, + "loss": 21.8128, + "step": 494420 + }, + { + "epoch": 0.9987798817858975, + "grad_norm": 212.57371520996094, + "learning_rate": 3.77981366578406e-09, + "loss": 10.2289, + "step": 494430 + }, + { + "epoch": 0.9988000824185813, + "grad_norm": 40.69304656982422, + "learning_rate": 3.766255509644601e-09, + "loss": 18.4652, + "step": 494440 + }, + { + "epoch": 0.9988202830512651, + "grad_norm": 331.1392517089844, + "learning_rate": 3.752721704541884e-09, + "loss": 17.761, + "step": 494450 + }, + { + "epoch": 0.998840483683949, + "grad_norm": 666.123291015625, + "learning_rate": 3.739212250543078e-09, + "loss": 22.8242, + "step": 494460 + }, + { + "epoch": 0.9988606843166328, + "grad_norm": 310.6073303222656, + "learning_rate": 3.7257271477131314e-09, + "loss": 17.9392, + "step": 494470 + }, + { + "epoch": 0.9988808849493166, + "grad_norm": 0.0, + "learning_rate": 3.7122663961175477e-09, + "loss": 11.0337, + "step": 494480 + }, + { + "epoch": 0.9989010855820004, + "grad_norm": 243.14625549316406, + "learning_rate": 3.698829995822939e-09, + "loss": 15.4749, + "step": 494490 + }, + { + "epoch": 0.9989212862146842, + "grad_norm": 21.81050682067871, + "learning_rate": 3.685417946894254e-09, + "loss": 20.1226, + "step": 494500 + }, + { + "epoch": 0.998941486847368, + "grad_norm": 140.5139923095703, + "learning_rate": 3.672030249396441e-09, + "loss": 9.3858, + "step": 494510 + }, + { + "epoch": 0.9989616874800519, + "grad_norm": 192.8984832763672, + "learning_rate": 3.6586669033955578e-09, + "loss": 11.125, + "step": 494520 + }, + { + "epoch": 0.9989818881127357, + "grad_norm": 381.0818176269531, + "learning_rate": 3.645327908955998e-09, + "loss": 14.2186, + "step": 494530 + }, + { + "epoch": 0.9990020887454195, + "grad_norm": 44.641876220703125, + "learning_rate": 3.632013266143264e-09, + "loss": 18.9591, + "step": 494540 + }, + { + "epoch": 0.9990222893781033, + "grad_norm": 190.85699462890625, + "learning_rate": 3.618722975022304e-09, + "loss": 13.7379, + "step": 494550 + }, + { + "epoch": 0.9990424900107872, + "grad_norm": 343.0568542480469, + "learning_rate": 3.605457035657511e-09, + "loss": 17.9219, + "step": 494560 + }, + { + "epoch": 0.999062690643471, + "grad_norm": 164.60711669921875, + "learning_rate": 3.592215448113834e-09, + "loss": 19.8793, + "step": 494570 + }, + { + "epoch": 0.9990828912761548, + "grad_norm": 1118.8013916015625, + "learning_rate": 3.5789982124556646e-09, + "loss": 21.6261, + "step": 494580 + }, + { + "epoch": 0.9991030919088386, + "grad_norm": 352.3271789550781, + "learning_rate": 3.565805328747951e-09, + "loss": 10.4939, + "step": 494590 + }, + { + "epoch": 0.9991232925415224, + "grad_norm": 193.1532440185547, + "learning_rate": 3.5526367970539765e-09, + "loss": 18.272, + "step": 494600 + }, + { + "epoch": 0.9991434931742063, + "grad_norm": 212.69444274902344, + "learning_rate": 3.5394926174381338e-09, + "loss": 36.8045, + "step": 494610 + }, + { + "epoch": 0.9991636938068901, + "grad_norm": 646.9624633789062, + "learning_rate": 3.526372789965371e-09, + "loss": 25.4694, + "step": 494620 + }, + { + "epoch": 0.9991838944395739, + "grad_norm": 352.7398376464844, + "learning_rate": 3.5132773146989706e-09, + "loss": 13.6786, + "step": 494630 + }, + { + "epoch": 0.9992040950722577, + "grad_norm": 57.33049774169922, + "learning_rate": 3.5002061917027708e-09, + "loss": 18.9907, + "step": 494640 + }, + { + "epoch": 0.9992242957049415, + "grad_norm": 121.9939956665039, + "learning_rate": 3.487159421040609e-09, + "loss": 10.8586, + "step": 494650 + }, + { + "epoch": 0.9992444963376252, + "grad_norm": 319.4378967285156, + "learning_rate": 3.474137002775768e-09, + "loss": 7.7744, + "step": 494660 + }, + { + "epoch": 0.9992646969703091, + "grad_norm": 141.7076416015625, + "learning_rate": 3.461138936972086e-09, + "loss": 21.0372, + "step": 494670 + }, + { + "epoch": 0.9992848976029929, + "grad_norm": 276.5302429199219, + "learning_rate": 3.4481652236934006e-09, + "loss": 16.0682, + "step": 494680 + }, + { + "epoch": 0.9993050982356767, + "grad_norm": 313.83416748046875, + "learning_rate": 3.4352158630018837e-09, + "loss": 22.9036, + "step": 494690 + }, + { + "epoch": 0.9993252988683605, + "grad_norm": 321.11041259765625, + "learning_rate": 3.4222908549608193e-09, + "loss": 15.7496, + "step": 494700 + }, + { + "epoch": 0.9993454995010443, + "grad_norm": 379.5470886230469, + "learning_rate": 3.409390199634044e-09, + "loss": 23.053, + "step": 494710 + }, + { + "epoch": 0.9993657001337282, + "grad_norm": 467.63946533203125, + "learning_rate": 3.3965138970831758e-09, + "loss": 29.7448, + "step": 494720 + }, + { + "epoch": 0.999385900766412, + "grad_norm": 331.6331481933594, + "learning_rate": 3.3836619473720522e-09, + "loss": 13.9507, + "step": 494730 + }, + { + "epoch": 0.9994061013990958, + "grad_norm": 457.5184326171875, + "learning_rate": 3.370834350563401e-09, + "loss": 14.6985, + "step": 494740 + }, + { + "epoch": 0.9994263020317796, + "grad_norm": 335.27069091796875, + "learning_rate": 3.3580311067188396e-09, + "loss": 16.1703, + "step": 494750 + }, + { + "epoch": 0.9994465026644634, + "grad_norm": 167.1810760498047, + "learning_rate": 3.3452522159010957e-09, + "loss": 22.494, + "step": 494760 + }, + { + "epoch": 0.9994667032971473, + "grad_norm": 319.8489685058594, + "learning_rate": 3.332497678172897e-09, + "loss": 13.6591, + "step": 494770 + }, + { + "epoch": 0.9994869039298311, + "grad_norm": 339.9732666015625, + "learning_rate": 3.31976749359586e-09, + "loss": 23.3354, + "step": 494780 + }, + { + "epoch": 0.9995071045625149, + "grad_norm": 401.10968017578125, + "learning_rate": 3.3070616622321584e-09, + "loss": 10.5454, + "step": 494790 + }, + { + "epoch": 0.9995273051951987, + "grad_norm": 147.7809295654297, + "learning_rate": 3.294380184143964e-09, + "loss": 18.4338, + "step": 494800 + }, + { + "epoch": 0.9995475058278825, + "grad_norm": 276.3037109375, + "learning_rate": 3.2817230593928938e-09, + "loss": 16.4183, + "step": 494810 + }, + { + "epoch": 0.9995677064605664, + "grad_norm": 553.263427734375, + "learning_rate": 3.269090288041121e-09, + "loss": 22.1128, + "step": 494820 + }, + { + "epoch": 0.9995879070932502, + "grad_norm": 82.76365661621094, + "learning_rate": 3.256481870149153e-09, + "loss": 28.3858, + "step": 494830 + }, + { + "epoch": 0.999608107725934, + "grad_norm": 225.22779846191406, + "learning_rate": 3.2438978057791615e-09, + "loss": 22.2406, + "step": 494840 + }, + { + "epoch": 0.9996283083586178, + "grad_norm": 413.48529052734375, + "learning_rate": 3.2313380949927643e-09, + "loss": 14.8626, + "step": 494850 + }, + { + "epoch": 0.9996485089913016, + "grad_norm": 148.7971954345703, + "learning_rate": 3.218802737850468e-09, + "loss": 13.117, + "step": 494860 + }, + { + "epoch": 0.9996687096239855, + "grad_norm": 179.87042236328125, + "learning_rate": 3.206291734413891e-09, + "loss": 11.7422, + "step": 494870 + }, + { + "epoch": 0.9996889102566693, + "grad_norm": 187.84117126464844, + "learning_rate": 3.1938050847435398e-09, + "loss": 9.0214, + "step": 494880 + }, + { + "epoch": 0.9997091108893531, + "grad_norm": 165.037109375, + "learning_rate": 3.1813427889004767e-09, + "loss": 14.8521, + "step": 494890 + }, + { + "epoch": 0.9997293115220369, + "grad_norm": 287.6842041015625, + "learning_rate": 3.1689048469457638e-09, + "loss": 14.124, + "step": 494900 + }, + { + "epoch": 0.9997495121547206, + "grad_norm": 21.54877471923828, + "learning_rate": 3.156491258939909e-09, + "loss": 17.4252, + "step": 494910 + }, + { + "epoch": 0.9997697127874045, + "grad_norm": 107.47203826904297, + "learning_rate": 3.1441020249428635e-09, + "loss": 17.2954, + "step": 494920 + }, + { + "epoch": 0.9997899134200883, + "grad_norm": 16.81981086730957, + "learning_rate": 3.1317371450156897e-09, + "loss": 6.7111, + "step": 494930 + }, + { + "epoch": 0.9998101140527721, + "grad_norm": 147.71060180664062, + "learning_rate": 3.11939661921834e-09, + "loss": 14.4858, + "step": 494940 + }, + { + "epoch": 0.9998303146854559, + "grad_norm": 438.4915771484375, + "learning_rate": 3.1070804476113213e-09, + "loss": 9.8981, + "step": 494950 + }, + { + "epoch": 0.9998505153181397, + "grad_norm": 289.93597412109375, + "learning_rate": 3.094788630254031e-09, + "loss": 30.2553, + "step": 494960 + }, + { + "epoch": 0.9998707159508236, + "grad_norm": 144.87721252441406, + "learning_rate": 3.0825211672064203e-09, + "loss": 15.9481, + "step": 494970 + }, + { + "epoch": 0.9998909165835074, + "grad_norm": 497.0413513183594, + "learning_rate": 3.070278058528997e-09, + "loss": 22.4205, + "step": 494980 + }, + { + "epoch": 0.9999111172161912, + "grad_norm": 252.9429473876953, + "learning_rate": 3.058059304280603e-09, + "loss": 25.7312, + "step": 494990 + }, + { + "epoch": 0.999931317848875, + "grad_norm": 162.82374572753906, + "learning_rate": 3.0458649045211897e-09, + "loss": 17.4093, + "step": 495000 + }, + { + "epoch": 0.9999515184815588, + "grad_norm": 165.43084716796875, + "learning_rate": 3.03369485931071e-09, + "loss": 20.4357, + "step": 495010 + }, + { + "epoch": 0.9999717191142427, + "grad_norm": 328.46612548828125, + "learning_rate": 3.0215491687074492e-09, + "loss": 20.1648, + "step": 495020 + }, + { + "epoch": 0.9999919197469265, + "grad_norm": 359.5785827636719, + "learning_rate": 3.009427832771361e-09, + "loss": 26.0108, + "step": 495030 + }, + { + "epoch": 1.0, + "eval_loss": 18.470447540283203, + "eval_runtime": 409.5979, + "eval_samples_per_second": 24.417, + "eval_steps_per_second": 12.21, + "step": 495034 + }, + { + "epoch": 1.0000121203796102, + "grad_norm": 350.266845703125, + "learning_rate": 2.9973308515607313e-09, + "loss": 20.6201, + "step": 495040 + }, + { + "epoch": 1.0000323210122941, + "grad_norm": 297.6217041015625, + "learning_rate": 2.9852582251355124e-09, + "loss": 15.3668, + "step": 495050 + }, + { + "epoch": 1.0000525216449778, + "grad_norm": 232.36581420898438, + "learning_rate": 2.9732099535539905e-09, + "loss": 12.8444, + "step": 495060 + }, + { + "epoch": 1.0000727222776618, + "grad_norm": 0.0, + "learning_rate": 2.961186036875008e-09, + "loss": 22.4988, + "step": 495070 + }, + { + "epoch": 1.0000929229103455, + "grad_norm": 423.0118713378906, + "learning_rate": 2.949186475157406e-09, + "loss": 13.9478, + "step": 495080 + }, + { + "epoch": 1.0001131235430294, + "grad_norm": 322.5085754394531, + "learning_rate": 2.937211268458917e-09, + "loss": 22.6801, + "step": 495090 + }, + { + "epoch": 1.000133324175713, + "grad_norm": 256.5771179199219, + "learning_rate": 2.9252604168383826e-09, + "loss": 11.055, + "step": 495100 + }, + { + "epoch": 1.000153524808397, + "grad_norm": 171.06491088867188, + "learning_rate": 2.913333920354644e-09, + "loss": 24.1243, + "step": 495110 + }, + { + "epoch": 1.0001737254410807, + "grad_norm": 274.31451416015625, + "learning_rate": 2.901431779064323e-09, + "loss": 17.4806, + "step": 495120 + }, + { + "epoch": 1.0001939260737647, + "grad_norm": 281.48431396484375, + "learning_rate": 2.889553993027372e-09, + "loss": 14.8861, + "step": 495130 + }, + { + "epoch": 1.0002141267064484, + "grad_norm": 484.43438720703125, + "learning_rate": 2.8777005622998567e-09, + "loss": 15.3516, + "step": 495140 + }, + { + "epoch": 1.0002343273391323, + "grad_norm": 90.8409652709961, + "learning_rate": 2.865871486940619e-09, + "loss": 10.165, + "step": 495150 + }, + { + "epoch": 1.000254527971816, + "grad_norm": 220.88543701171875, + "learning_rate": 2.8540667670073905e-09, + "loss": 12.697, + "step": 495160 + }, + { + "epoch": 1.0002747286045, + "grad_norm": 303.0424499511719, + "learning_rate": 2.842286402556793e-09, + "loss": 14.4591, + "step": 495170 + }, + { + "epoch": 1.0002949292371837, + "grad_norm": 388.16571044921875, + "learning_rate": 2.830530393647113e-09, + "loss": 21.5584, + "step": 495180 + }, + { + "epoch": 1.0003151298698676, + "grad_norm": 246.49331665039062, + "learning_rate": 2.8187987403355268e-09, + "loss": 20.6374, + "step": 495190 + }, + { + "epoch": 1.0003353305025513, + "grad_norm": 42.6082649230957, + "learning_rate": 2.8070914426786555e-09, + "loss": 35.4803, + "step": 495200 + }, + { + "epoch": 1.0003555311352352, + "grad_norm": 238.4278106689453, + "learning_rate": 2.7954085007342315e-09, + "loss": 26.5214, + "step": 495210 + }, + { + "epoch": 1.000375731767919, + "grad_norm": 88.92025756835938, + "learning_rate": 2.78374991455832e-09, + "loss": 21.3553, + "step": 495220 + }, + { + "epoch": 1.0003959324006029, + "grad_norm": 52.954776763916016, + "learning_rate": 2.772115684209209e-09, + "loss": 10.0283, + "step": 495230 + }, + { + "epoch": 1.0004161330332866, + "grad_norm": 138.259033203125, + "learning_rate": 2.7605058097418536e-09, + "loss": 13.1708, + "step": 495240 + }, + { + "epoch": 1.0004363336659705, + "grad_norm": 290.2542724609375, + "learning_rate": 2.748920291214541e-09, + "loss": 10.6109, + "step": 495250 + }, + { + "epoch": 1.0004565342986542, + "grad_norm": 316.5008239746094, + "learning_rate": 2.7373591286822266e-09, + "loss": 17.5169, + "step": 495260 + }, + { + "epoch": 1.0004767349313382, + "grad_norm": 939.4614868164062, + "learning_rate": 2.7258223222020876e-09, + "loss": 24.7996, + "step": 495270 + }, + { + "epoch": 1.0004969355640219, + "grad_norm": 424.4223327636719, + "learning_rate": 2.7143098718301896e-09, + "loss": 22.1629, + "step": 495280 + }, + { + "epoch": 1.0005171361967056, + "grad_norm": 193.19212341308594, + "learning_rate": 2.7028217776225994e-09, + "loss": 8.4584, + "step": 495290 + }, + { + "epoch": 1.0005373368293895, + "grad_norm": 124.94254302978516, + "learning_rate": 2.6913580396359384e-09, + "loss": 17.981, + "step": 495300 + }, + { + "epoch": 1.0005575374620732, + "grad_norm": 449.8768310546875, + "learning_rate": 2.6799186579246074e-09, + "loss": 15.3962, + "step": 495310 + }, + { + "epoch": 1.0005777380947571, + "grad_norm": 323.71661376953125, + "learning_rate": 2.6685036325457826e-09, + "loss": 28.1691, + "step": 495320 + }, + { + "epoch": 1.0005979387274408, + "grad_norm": 200.54359436035156, + "learning_rate": 2.65711296355442e-09, + "loss": 23.6755, + "step": 495330 + }, + { + "epoch": 1.0006181393601248, + "grad_norm": 337.1823425292969, + "learning_rate": 2.6457466510065866e-09, + "loss": 13.0376, + "step": 495340 + }, + { + "epoch": 1.0006383399928085, + "grad_norm": 163.7471160888672, + "learning_rate": 2.6344046949566825e-09, + "loss": 19.5054, + "step": 495350 + }, + { + "epoch": 1.0006585406254924, + "grad_norm": 493.8697814941406, + "learning_rate": 2.6230870954607746e-09, + "loss": 21.381, + "step": 495360 + }, + { + "epoch": 1.0006787412581761, + "grad_norm": 209.40423583984375, + "learning_rate": 2.6117938525738185e-09, + "loss": 13.005, + "step": 495370 + }, + { + "epoch": 1.00069894189086, + "grad_norm": 169.35916137695312, + "learning_rate": 2.6005249663513254e-09, + "loss": 19.9928, + "step": 495380 + }, + { + "epoch": 1.0007191425235438, + "grad_norm": 274.072509765625, + "learning_rate": 2.5892804368471414e-09, + "loss": 17.1564, + "step": 495390 + }, + { + "epoch": 1.0007393431562277, + "grad_norm": 419.8789367675781, + "learning_rate": 2.5780602641167774e-09, + "loss": 19.1674, + "step": 495400 + }, + { + "epoch": 1.0007595437889114, + "grad_norm": 148.21514892578125, + "learning_rate": 2.5668644482151892e-09, + "loss": 19.6976, + "step": 495410 + }, + { + "epoch": 1.0007797444215953, + "grad_norm": 413.3901062011719, + "learning_rate": 2.5556929891962234e-09, + "loss": 10.4757, + "step": 495420 + }, + { + "epoch": 1.000799945054279, + "grad_norm": 224.62789916992188, + "learning_rate": 2.5445458871148353e-09, + "loss": 17.2608, + "step": 495430 + }, + { + "epoch": 1.000820145686963, + "grad_norm": 216.81382751464844, + "learning_rate": 2.5334231420254262e-09, + "loss": 13.2153, + "step": 495440 + }, + { + "epoch": 1.0008403463196467, + "grad_norm": 61.02236557006836, + "learning_rate": 2.522324753981842e-09, + "loss": 8.6135, + "step": 495450 + }, + { + "epoch": 1.0008605469523306, + "grad_norm": 216.50863647460938, + "learning_rate": 2.511250723037928e-09, + "loss": 29.0171, + "step": 495460 + }, + { + "epoch": 1.0008807475850143, + "grad_norm": 221.52297973632812, + "learning_rate": 2.5002010492486405e-09, + "loss": 14.8874, + "step": 495470 + }, + { + "epoch": 1.0009009482176983, + "grad_norm": 66.00392150878906, + "learning_rate": 2.4891757326667154e-09, + "loss": 25.6189, + "step": 495480 + }, + { + "epoch": 1.000921148850382, + "grad_norm": 944.4640502929688, + "learning_rate": 2.4781747733471085e-09, + "loss": 30.2171, + "step": 495490 + }, + { + "epoch": 1.000941349483066, + "grad_norm": 489.37469482421875, + "learning_rate": 2.4671981713420003e-09, + "loss": 26.4947, + "step": 495500 + }, + { + "epoch": 1.0009615501157496, + "grad_norm": 198.69480895996094, + "learning_rate": 2.4562459267063466e-09, + "loss": 12.7678, + "step": 495510 + }, + { + "epoch": 1.0009817507484335, + "grad_norm": 161.24114990234375, + "learning_rate": 2.445318039492328e-09, + "loss": 9.1362, + "step": 495520 + }, + { + "epoch": 1.0010019513811172, + "grad_norm": 115.72456359863281, + "learning_rate": 2.4344145097537906e-09, + "loss": 14.2754, + "step": 495530 + }, + { + "epoch": 1.0010221520138012, + "grad_norm": 402.95782470703125, + "learning_rate": 2.423535337544025e-09, + "loss": 14.8401, + "step": 495540 + }, + { + "epoch": 1.001042352646485, + "grad_norm": 275.5498046875, + "learning_rate": 2.412680522915767e-09, + "loss": 25.1825, + "step": 495550 + }, + { + "epoch": 1.0010625532791686, + "grad_norm": 491.6571960449219, + "learning_rate": 2.4018500659217515e-09, + "loss": 23.5318, + "step": 495560 + }, + { + "epoch": 1.0010827539118525, + "grad_norm": 133.8235626220703, + "learning_rate": 2.3910439666147147e-09, + "loss": 14.7339, + "step": 495570 + }, + { + "epoch": 1.0011029545445362, + "grad_norm": 147.26856994628906, + "learning_rate": 2.380262225047947e-09, + "loss": 11.1961, + "step": 495580 + }, + { + "epoch": 1.0011231551772202, + "grad_norm": 497.2104187011719, + "learning_rate": 2.369504841273629e-09, + "loss": 16.4336, + "step": 495590 + }, + { + "epoch": 1.0011433558099039, + "grad_norm": 263.5239562988281, + "learning_rate": 2.358771815344496e-09, + "loss": 11.2798, + "step": 495600 + }, + { + "epoch": 1.0011635564425878, + "grad_norm": 377.579345703125, + "learning_rate": 2.348063147312174e-09, + "loss": 12.8141, + "step": 495610 + }, + { + "epoch": 1.0011837570752715, + "grad_norm": 310.23370361328125, + "learning_rate": 2.337378837229398e-09, + "loss": 17.9364, + "step": 495620 + }, + { + "epoch": 1.0012039577079554, + "grad_norm": 367.58270263671875, + "learning_rate": 2.326718885147794e-09, + "loss": 10.5867, + "step": 495630 + }, + { + "epoch": 1.0012241583406392, + "grad_norm": 4.401870250701904, + "learning_rate": 2.316083291120097e-09, + "loss": 11.452, + "step": 495640 + }, + { + "epoch": 1.001244358973323, + "grad_norm": 110.84075927734375, + "learning_rate": 2.3054720551973773e-09, + "loss": 18.5355, + "step": 495650 + }, + { + "epoch": 1.0012645596060068, + "grad_norm": 146.7382354736328, + "learning_rate": 2.294885177431816e-09, + "loss": 14.5424, + "step": 495660 + }, + { + "epoch": 1.0012847602386907, + "grad_norm": 157.50096130371094, + "learning_rate": 2.2843226578744826e-09, + "loss": 12.3647, + "step": 495670 + }, + { + "epoch": 1.0013049608713744, + "grad_norm": 780.4160766601562, + "learning_rate": 2.2737844965775578e-09, + "loss": 25.2432, + "step": 495680 + }, + { + "epoch": 1.0013251615040584, + "grad_norm": 70.00536346435547, + "learning_rate": 2.263270693592112e-09, + "loss": 14.2905, + "step": 495690 + }, + { + "epoch": 1.001345362136742, + "grad_norm": 546.5574340820312, + "learning_rate": 2.2527812489692156e-09, + "loss": 25.6824, + "step": 495700 + }, + { + "epoch": 1.001365562769426, + "grad_norm": 408.5946960449219, + "learning_rate": 2.2423161627599386e-09, + "loss": 16.6912, + "step": 495710 + }, + { + "epoch": 1.0013857634021097, + "grad_norm": 34.1672248840332, + "learning_rate": 2.2318754350159067e-09, + "loss": 20.0957, + "step": 495720 + }, + { + "epoch": 1.0014059640347936, + "grad_norm": 202.27102661132812, + "learning_rate": 2.2214590657870795e-09, + "loss": 16.8399, + "step": 495730 + }, + { + "epoch": 1.0014261646674774, + "grad_norm": 67.70073699951172, + "learning_rate": 2.211067055124527e-09, + "loss": 12.9959, + "step": 495740 + }, + { + "epoch": 1.0014463653001613, + "grad_norm": 1.5654852390289307, + "learning_rate": 2.2006994030798758e-09, + "loss": 7.921, + "step": 495750 + }, + { + "epoch": 1.001466565932845, + "grad_norm": 185.27125549316406, + "learning_rate": 2.1903561097019744e-09, + "loss": 14.1706, + "step": 495760 + }, + { + "epoch": 1.001486766565529, + "grad_norm": 451.46075439453125, + "learning_rate": 2.1800371750430037e-09, + "loss": 33.0078, + "step": 495770 + }, + { + "epoch": 1.0015069671982126, + "grad_norm": 169.50633239746094, + "learning_rate": 2.169742599151814e-09, + "loss": 21.5873, + "step": 495780 + }, + { + "epoch": 1.0015271678308966, + "grad_norm": 335.2918395996094, + "learning_rate": 2.15947238207892e-09, + "loss": 15.987, + "step": 495790 + }, + { + "epoch": 1.0015473684635803, + "grad_norm": 206.71524047851562, + "learning_rate": 2.149226523874837e-09, + "loss": 13.8174, + "step": 495800 + }, + { + "epoch": 1.001567569096264, + "grad_norm": 262.7821960449219, + "learning_rate": 2.1390050245895246e-09, + "loss": 18.3342, + "step": 495810 + }, + { + "epoch": 1.001587769728948, + "grad_norm": 379.2049560546875, + "learning_rate": 2.128807884272388e-09, + "loss": 22.2588, + "step": 495820 + }, + { + "epoch": 1.0016079703616316, + "grad_norm": 381.626220703125, + "learning_rate": 2.1186351029733877e-09, + "loss": 16.0041, + "step": 495830 + }, + { + "epoch": 1.0016281709943156, + "grad_norm": 402.493896484375, + "learning_rate": 2.1084866807413727e-09, + "loss": 30.4098, + "step": 495840 + }, + { + "epoch": 1.0016483716269993, + "grad_norm": 133.56912231445312, + "learning_rate": 2.098362617626859e-09, + "loss": 12.5756, + "step": 495850 + }, + { + "epoch": 1.0016685722596832, + "grad_norm": 159.41049194335938, + "learning_rate": 2.088262913679251e-09, + "loss": 5.5835, + "step": 495860 + }, + { + "epoch": 1.001688772892367, + "grad_norm": 89.73715209960938, + "learning_rate": 2.078187568946288e-09, + "loss": 13.8562, + "step": 495870 + }, + { + "epoch": 1.0017089735250508, + "grad_norm": 184.55747985839844, + "learning_rate": 2.0681365834790413e-09, + "loss": 16.0826, + "step": 495880 + }, + { + "epoch": 1.0017291741577345, + "grad_norm": 251.4383087158203, + "learning_rate": 2.0581099573246943e-09, + "loss": 37.0574, + "step": 495890 + }, + { + "epoch": 1.0017493747904185, + "grad_norm": 207.5780792236328, + "learning_rate": 2.0481076905332074e-09, + "loss": 7.3997, + "step": 495900 + }, + { + "epoch": 1.0017695754231022, + "grad_norm": 169.85557556152344, + "learning_rate": 2.038129783153431e-09, + "loss": 10.5287, + "step": 495910 + }, + { + "epoch": 1.0017897760557861, + "grad_norm": 225.95216369628906, + "learning_rate": 2.0281762352331034e-09, + "loss": 15.2157, + "step": 495920 + }, + { + "epoch": 1.0018099766884698, + "grad_norm": 215.59495544433594, + "learning_rate": 2.018247046821631e-09, + "loss": 12.579, + "step": 495930 + }, + { + "epoch": 1.0018301773211538, + "grad_norm": 155.67726135253906, + "learning_rate": 2.008342217966752e-09, + "loss": 21.3434, + "step": 495940 + }, + { + "epoch": 1.0018503779538375, + "grad_norm": 330.3739318847656, + "learning_rate": 1.9984617487173174e-09, + "loss": 17.323, + "step": 495950 + }, + { + "epoch": 1.0018705785865214, + "grad_norm": 139.68040466308594, + "learning_rate": 1.9886056391210663e-09, + "loss": 22.3888, + "step": 495960 + }, + { + "epoch": 1.001890779219205, + "grad_norm": 50.877384185791016, + "learning_rate": 1.9787738892262932e-09, + "loss": 5.3651, + "step": 495970 + }, + { + "epoch": 1.001910979851889, + "grad_norm": 193.98800659179688, + "learning_rate": 1.968966499080738e-09, + "loss": 13.2665, + "step": 495980 + }, + { + "epoch": 1.0019311804845727, + "grad_norm": 261.4365539550781, + "learning_rate": 1.95918346873214e-09, + "loss": 9.9879, + "step": 495990 + }, + { + "epoch": 1.0019513811172567, + "grad_norm": 251.7092742919922, + "learning_rate": 1.9494247982282386e-09, + "loss": 31.1196, + "step": 496000 + }, + { + "epoch": 1.0019715817499404, + "grad_norm": 10.172210693359375, + "learning_rate": 1.9396904876167742e-09, + "loss": 17.0401, + "step": 496010 + }, + { + "epoch": 1.0019917823826243, + "grad_norm": 259.8248596191406, + "learning_rate": 1.9299805369449307e-09, + "loss": 14.1641, + "step": 496020 + }, + { + "epoch": 1.002011983015308, + "grad_norm": 241.33895874023438, + "learning_rate": 1.920294946260448e-09, + "loss": 13.5077, + "step": 496030 + }, + { + "epoch": 1.002032183647992, + "grad_norm": 220.9041748046875, + "learning_rate": 1.9106337156099553e-09, + "loss": 12.3981, + "step": 496040 + }, + { + "epoch": 1.0020523842806757, + "grad_norm": 201.5591278076172, + "learning_rate": 1.9009968450406368e-09, + "loss": 25.9868, + "step": 496050 + }, + { + "epoch": 1.0020725849133594, + "grad_norm": 331.458740234375, + "learning_rate": 1.8913843346002324e-09, + "loss": 13.4085, + "step": 496060 + }, + { + "epoch": 1.0020927855460433, + "grad_norm": 259.1542053222656, + "learning_rate": 1.8817961843348166e-09, + "loss": 8.3266, + "step": 496070 + }, + { + "epoch": 1.002112986178727, + "grad_norm": 225.1783447265625, + "learning_rate": 1.872232394291018e-09, + "loss": 13.4523, + "step": 496080 + }, + { + "epoch": 1.002133186811411, + "grad_norm": 101.02706146240234, + "learning_rate": 1.8626929645160218e-09, + "loss": 23.3397, + "step": 496090 + }, + { + "epoch": 1.0021533874440947, + "grad_norm": 511.61883544921875, + "learning_rate": 1.8531778950564572e-09, + "loss": 19.1652, + "step": 496100 + }, + { + "epoch": 1.0021735880767786, + "grad_norm": 241.39195251464844, + "learning_rate": 1.8436871859578431e-09, + "loss": 18.7063, + "step": 496110 + }, + { + "epoch": 1.0021937887094623, + "grad_norm": 341.495361328125, + "learning_rate": 1.834220837266809e-09, + "loss": 17.6599, + "step": 496120 + }, + { + "epoch": 1.0022139893421462, + "grad_norm": 148.2803192138672, + "learning_rate": 1.8247788490299846e-09, + "loss": 20.9109, + "step": 496130 + }, + { + "epoch": 1.00223418997483, + "grad_norm": 135.738037109375, + "learning_rate": 1.8153612212923333e-09, + "loss": 13.6855, + "step": 496140 + }, + { + "epoch": 1.0022543906075139, + "grad_norm": 65.21240234375, + "learning_rate": 1.80596795410104e-09, + "loss": 28.8057, + "step": 496150 + }, + { + "epoch": 1.0022745912401976, + "grad_norm": 4.850376605987549, + "learning_rate": 1.7965990475010686e-09, + "loss": 17.6667, + "step": 496160 + }, + { + "epoch": 1.0022947918728815, + "grad_norm": 270.9783630371094, + "learning_rate": 1.7872545015379382e-09, + "loss": 16.2643, + "step": 496170 + }, + { + "epoch": 1.0023149925055652, + "grad_norm": 150.8543701171875, + "learning_rate": 1.7779343162577233e-09, + "loss": 11.1799, + "step": 496180 + }, + { + "epoch": 1.0023351931382491, + "grad_norm": 347.93060302734375, + "learning_rate": 1.7686384917059429e-09, + "loss": 28.7505, + "step": 496190 + }, + { + "epoch": 1.0023553937709329, + "grad_norm": 10.917989730834961, + "learning_rate": 1.759367027927561e-09, + "loss": 7.8245, + "step": 496200 + }, + { + "epoch": 1.0023755944036168, + "grad_norm": 167.07542419433594, + "learning_rate": 1.7501199249675416e-09, + "loss": 12.516, + "step": 496210 + }, + { + "epoch": 1.0023957950363005, + "grad_norm": 105.32134246826172, + "learning_rate": 1.740897182871404e-09, + "loss": 11.817, + "step": 496220 + }, + { + "epoch": 1.0024159956689844, + "grad_norm": 181.1662139892578, + "learning_rate": 1.7316988016835567e-09, + "loss": 9.7884, + "step": 496230 + }, + { + "epoch": 1.0024361963016681, + "grad_norm": 390.3924560546875, + "learning_rate": 1.7225247814495194e-09, + "loss": 10.4173, + "step": 496240 + }, + { + "epoch": 1.002456396934352, + "grad_norm": 19.388301849365234, + "learning_rate": 1.7133751222137007e-09, + "loss": 18.2841, + "step": 496250 + }, + { + "epoch": 1.0024765975670358, + "grad_norm": 193.1466522216797, + "learning_rate": 1.7042498240205097e-09, + "loss": 18.2591, + "step": 496260 + }, + { + "epoch": 1.0024967981997197, + "grad_norm": 263.32916259765625, + "learning_rate": 1.6951488869149103e-09, + "loss": 5.0993, + "step": 496270 + }, + { + "epoch": 1.0025169988324034, + "grad_norm": 148.9720001220703, + "learning_rate": 1.686072310940201e-09, + "loss": 10.3391, + "step": 496280 + }, + { + "epoch": 1.0025371994650873, + "grad_norm": 343.7486267089844, + "learning_rate": 1.6770200961419015e-09, + "loss": 17.7179, + "step": 496290 + }, + { + "epoch": 1.002557400097771, + "grad_norm": 200.78855895996094, + "learning_rate": 1.6679922425638651e-09, + "loss": 22.5343, + "step": 496300 + }, + { + "epoch": 1.002577600730455, + "grad_norm": 190.37265014648438, + "learning_rate": 1.6589887502493907e-09, + "loss": 15.7389, + "step": 496310 + }, + { + "epoch": 1.0025978013631387, + "grad_norm": 185.1874237060547, + "learning_rate": 1.650009619242887e-09, + "loss": 12.1452, + "step": 496320 + }, + { + "epoch": 1.0026180019958224, + "grad_norm": 102.01954650878906, + "learning_rate": 1.6410548495876533e-09, + "loss": 11.1675, + "step": 496330 + }, + { + "epoch": 1.0026382026285063, + "grad_norm": 93.6607437133789, + "learning_rate": 1.632124441328098e-09, + "loss": 16.2916, + "step": 496340 + }, + { + "epoch": 1.00265840326119, + "grad_norm": 471.4112854003906, + "learning_rate": 1.6232183945075197e-09, + "loss": 19.1414, + "step": 496350 + }, + { + "epoch": 1.002678603893874, + "grad_norm": 276.6101379394531, + "learning_rate": 1.6143367091686624e-09, + "loss": 16.2045, + "step": 496360 + }, + { + "epoch": 1.0026988045265577, + "grad_norm": 258.1728820800781, + "learning_rate": 1.6054793853553797e-09, + "loss": 11.0517, + "step": 496370 + }, + { + "epoch": 1.0027190051592416, + "grad_norm": 362.923583984375, + "learning_rate": 1.59664642311097e-09, + "loss": 20.8093, + "step": 496380 + }, + { + "epoch": 1.0027392057919253, + "grad_norm": 445.3919372558594, + "learning_rate": 1.5878378224781777e-09, + "loss": 10.7264, + "step": 496390 + }, + { + "epoch": 1.0027594064246093, + "grad_norm": 209.9241180419922, + "learning_rate": 1.5790535835003006e-09, + "loss": 11.5396, + "step": 496400 + }, + { + "epoch": 1.002779607057293, + "grad_norm": 188.69015502929688, + "learning_rate": 1.570293706219528e-09, + "loss": 6.4494, + "step": 496410 + }, + { + "epoch": 1.002799807689977, + "grad_norm": 149.41795349121094, + "learning_rate": 1.5615581906791576e-09, + "loss": 6.0343, + "step": 496420 + }, + { + "epoch": 1.0028200083226606, + "grad_norm": 175.53099060058594, + "learning_rate": 1.5528470369208238e-09, + "loss": 21.2724, + "step": 496430 + }, + { + "epoch": 1.0028402089553445, + "grad_norm": 663.169921875, + "learning_rate": 1.5441602449883797e-09, + "loss": 15.0054, + "step": 496440 + }, + { + "epoch": 1.0028604095880282, + "grad_norm": 263.2573547363281, + "learning_rate": 1.535497814923459e-09, + "loss": 9.5173, + "step": 496450 + }, + { + "epoch": 1.0028806102207122, + "grad_norm": 7.298611640930176, + "learning_rate": 1.52685974676825e-09, + "loss": 19.2869, + "step": 496460 + }, + { + "epoch": 1.0029008108533959, + "grad_norm": 239.89503479003906, + "learning_rate": 1.518246040564386e-09, + "loss": 11.5415, + "step": 496470 + }, + { + "epoch": 1.0029210114860798, + "grad_norm": 1895.709716796875, + "learning_rate": 1.509656696354611e-09, + "loss": 30.2155, + "step": 496480 + }, + { + "epoch": 1.0029412121187635, + "grad_norm": 379.2161865234375, + "learning_rate": 1.5010917141811132e-09, + "loss": 23.8914, + "step": 496490 + }, + { + "epoch": 1.0029614127514475, + "grad_norm": 211.70846557617188, + "learning_rate": 1.4925510940844157e-09, + "loss": 11.5429, + "step": 496500 + }, + { + "epoch": 1.0029816133841312, + "grad_norm": 459.5061950683594, + "learning_rate": 1.4840348361067069e-09, + "loss": 30.6356, + "step": 496510 + }, + { + "epoch": 1.003001814016815, + "grad_norm": 224.3471221923828, + "learning_rate": 1.4755429402901755e-09, + "loss": 9.8845, + "step": 496520 + }, + { + "epoch": 1.0030220146494988, + "grad_norm": 383.6967468261719, + "learning_rate": 1.4670754066747895e-09, + "loss": 11.0713, + "step": 496530 + }, + { + "epoch": 1.0030422152821827, + "grad_norm": 557.1777954101562, + "learning_rate": 1.4586322353032923e-09, + "loss": 14.309, + "step": 496540 + }, + { + "epoch": 1.0030624159148664, + "grad_norm": 202.27076721191406, + "learning_rate": 1.4502134262156519e-09, + "loss": 12.6591, + "step": 496550 + }, + { + "epoch": 1.0030826165475504, + "grad_norm": 17.173858642578125, + "learning_rate": 1.4418189794540572e-09, + "loss": 10.8518, + "step": 496560 + }, + { + "epoch": 1.003102817180234, + "grad_norm": 126.16844177246094, + "learning_rate": 1.4334488950579206e-09, + "loss": 14.1143, + "step": 496570 + }, + { + "epoch": 1.0031230178129178, + "grad_norm": 8.855607032775879, + "learning_rate": 1.425103173069986e-09, + "loss": 12.0355, + "step": 496580 + }, + { + "epoch": 1.0031432184456017, + "grad_norm": 359.2356262207031, + "learning_rate": 1.4167818135291112e-09, + "loss": 13.1176, + "step": 496590 + }, + { + "epoch": 1.0031634190782854, + "grad_norm": 67.09163665771484, + "learning_rate": 1.4084848164763742e-09, + "loss": 11.2893, + "step": 496600 + }, + { + "epoch": 1.0031836197109694, + "grad_norm": 5.554289817810059, + "learning_rate": 1.4002121819528535e-09, + "loss": 17.339, + "step": 496610 + }, + { + "epoch": 1.003203820343653, + "grad_norm": 315.95751953125, + "learning_rate": 1.3919639099985171e-09, + "loss": 20.8446, + "step": 496620 + }, + { + "epoch": 1.003224020976337, + "grad_norm": 75.73570251464844, + "learning_rate": 1.3837400006533331e-09, + "loss": 18.169, + "step": 496630 + }, + { + "epoch": 1.0032442216090207, + "grad_norm": 276.59979248046875, + "learning_rate": 1.3755404539572692e-09, + "loss": 14.5566, + "step": 496640 + }, + { + "epoch": 1.0032644222417046, + "grad_norm": 233.65902709960938, + "learning_rate": 1.3673652699508487e-09, + "loss": 11.2905, + "step": 496650 + }, + { + "epoch": 1.0032846228743884, + "grad_norm": 184.38006591796875, + "learning_rate": 1.3592144486740399e-09, + "loss": 19.635, + "step": 496660 + }, + { + "epoch": 1.0033048235070723, + "grad_norm": 60.485477447509766, + "learning_rate": 1.3510879901657003e-09, + "loss": 7.4499, + "step": 496670 + }, + { + "epoch": 1.003325024139756, + "grad_norm": 217.9574737548828, + "learning_rate": 1.342985894465798e-09, + "loss": 13.5795, + "step": 496680 + }, + { + "epoch": 1.00334522477244, + "grad_norm": 119.2909927368164, + "learning_rate": 1.3349081616143012e-09, + "loss": 21.8638, + "step": 496690 + }, + { + "epoch": 1.0033654254051236, + "grad_norm": 251.32972717285156, + "learning_rate": 1.3268547916495124e-09, + "loss": 10.1939, + "step": 496700 + }, + { + "epoch": 1.0033856260378076, + "grad_norm": 41.26898956298828, + "learning_rate": 1.3188257846119545e-09, + "loss": 11.8392, + "step": 496710 + }, + { + "epoch": 1.0034058266704913, + "grad_norm": 19.208417892456055, + "learning_rate": 1.3108211405399307e-09, + "loss": 9.9375, + "step": 496720 + }, + { + "epoch": 1.0034260273031752, + "grad_norm": 585.7509155273438, + "learning_rate": 1.3028408594728536e-09, + "loss": 15.5444, + "step": 496730 + }, + { + "epoch": 1.003446227935859, + "grad_norm": 211.11203002929688, + "learning_rate": 1.2948849414495811e-09, + "loss": 14.4546, + "step": 496740 + }, + { + "epoch": 1.0034664285685428, + "grad_norm": 253.2190399169922, + "learning_rate": 1.286953386508416e-09, + "loss": 16.9565, + "step": 496750 + }, + { + "epoch": 1.0034866292012266, + "grad_norm": 209.68011474609375, + "learning_rate": 1.2790461946887712e-09, + "loss": 22.8853, + "step": 496760 + }, + { + "epoch": 1.0035068298339105, + "grad_norm": 124.85618591308594, + "learning_rate": 1.271163366028394e-09, + "loss": 22.4875, + "step": 496770 + }, + { + "epoch": 1.0035270304665942, + "grad_norm": 257.1024169921875, + "learning_rate": 1.2633049005661423e-09, + "loss": 5.9409, + "step": 496780 + }, + { + "epoch": 1.0035472310992781, + "grad_norm": 262.7595520019531, + "learning_rate": 1.2554707983403192e-09, + "loss": 17.5791, + "step": 496790 + }, + { + "epoch": 1.0035674317319618, + "grad_norm": 230.92210388183594, + "learning_rate": 1.247661059389227e-09, + "loss": 17.6548, + "step": 496800 + }, + { + "epoch": 1.0035876323646458, + "grad_norm": 123.04821014404297, + "learning_rate": 1.2398756837506131e-09, + "loss": 15.8132, + "step": 496810 + }, + { + "epoch": 1.0036078329973295, + "grad_norm": 176.6087188720703, + "learning_rate": 1.2321146714627807e-09, + "loss": 8.2122, + "step": 496820 + }, + { + "epoch": 1.0036280336300132, + "grad_norm": 171.78781127929688, + "learning_rate": 1.224378022562922e-09, + "loss": 16.1176, + "step": 496830 + }, + { + "epoch": 1.0036482342626971, + "grad_norm": 156.961181640625, + "learning_rate": 1.2166657370898948e-09, + "loss": 14.4038, + "step": 496840 + }, + { + "epoch": 1.0036684348953808, + "grad_norm": 333.74078369140625, + "learning_rate": 1.2089778150797816e-09, + "loss": 19.8862, + "step": 496850 + }, + { + "epoch": 1.0036886355280648, + "grad_norm": 48.801414489746094, + "learning_rate": 1.2013142565708845e-09, + "loss": 15.524, + "step": 496860 + }, + { + "epoch": 1.0037088361607485, + "grad_norm": 199.96466064453125, + "learning_rate": 1.193675061600952e-09, + "loss": 13.8182, + "step": 496870 + }, + { + "epoch": 1.0037290367934324, + "grad_norm": 336.8670959472656, + "learning_rate": 1.1860602302066203e-09, + "loss": 24.7726, + "step": 496880 + }, + { + "epoch": 1.003749237426116, + "grad_norm": 1754.968017578125, + "learning_rate": 1.178469762425083e-09, + "loss": 11.5773, + "step": 496890 + }, + { + "epoch": 1.0037694380588, + "grad_norm": 344.7020263671875, + "learning_rate": 1.170903658293532e-09, + "loss": 10.1763, + "step": 496900 + }, + { + "epoch": 1.0037896386914837, + "grad_norm": 243.2534942626953, + "learning_rate": 1.1633619178486044e-09, + "loss": 12.3744, + "step": 496910 + }, + { + "epoch": 1.0038098393241677, + "grad_norm": 331.5781555175781, + "learning_rate": 1.155844541126938e-09, + "loss": 21.797, + "step": 496920 + }, + { + "epoch": 1.0038300399568514, + "grad_norm": 168.9166259765625, + "learning_rate": 1.1483515281657254e-09, + "loss": 14.3653, + "step": 496930 + }, + { + "epoch": 1.0038502405895353, + "grad_norm": 86.74561309814453, + "learning_rate": 1.1408828790010484e-09, + "loss": 9.9988, + "step": 496940 + }, + { + "epoch": 1.003870441222219, + "grad_norm": 130.0760955810547, + "learning_rate": 1.1334385936695447e-09, + "loss": 20.6595, + "step": 496950 + }, + { + "epoch": 1.003890641854903, + "grad_norm": 148.57781982421875, + "learning_rate": 1.1260186722067411e-09, + "loss": 19.4977, + "step": 496960 + }, + { + "epoch": 1.0039108424875867, + "grad_norm": 214.80091857910156, + "learning_rate": 1.1186231146503856e-09, + "loss": 16.563, + "step": 496970 + }, + { + "epoch": 1.0039310431202706, + "grad_norm": 397.2010803222656, + "learning_rate": 1.111251921034895e-09, + "loss": 26.6753, + "step": 496980 + }, + { + "epoch": 1.0039512437529543, + "grad_norm": 215.48861694335938, + "learning_rate": 1.1039050913969062e-09, + "loss": 11.6482, + "step": 496990 + }, + { + "epoch": 1.0039714443856382, + "grad_norm": 599.6796264648438, + "learning_rate": 1.096582625772502e-09, + "loss": 13.3068, + "step": 497000 + }, + { + "epoch": 1.003991645018322, + "grad_norm": 263.6522216796875, + "learning_rate": 1.0892845241972094e-09, + "loss": 13.7127, + "step": 497010 + }, + { + "epoch": 1.0040118456510059, + "grad_norm": 265.34515380859375, + "learning_rate": 1.0820107867060004e-09, + "loss": 19.5239, + "step": 497020 + }, + { + "epoch": 1.0040320462836896, + "grad_norm": 227.15771484375, + "learning_rate": 1.074761413334957e-09, + "loss": 30.504, + "step": 497030 + }, + { + "epoch": 1.0040522469163735, + "grad_norm": 423.343994140625, + "learning_rate": 1.0675364041190516e-09, + "loss": 14.5951, + "step": 497040 + }, + { + "epoch": 1.0040724475490572, + "grad_norm": 165.7329864501953, + "learning_rate": 1.0603357590938112e-09, + "loss": 7.8363, + "step": 497050 + }, + { + "epoch": 1.0040926481817412, + "grad_norm": 252.2505340576172, + "learning_rate": 1.0531594782942079e-09, + "loss": 14.6733, + "step": 497060 + }, + { + "epoch": 1.0041128488144249, + "grad_norm": 363.8894348144531, + "learning_rate": 1.0460075617552134e-09, + "loss": 16.4312, + "step": 497070 + }, + { + "epoch": 1.0041330494471086, + "grad_norm": 583.636962890625, + "learning_rate": 1.0388800095118002e-09, + "loss": 16.2948, + "step": 497080 + }, + { + "epoch": 1.0041532500797925, + "grad_norm": 332.56158447265625, + "learning_rate": 1.0317768215983847e-09, + "loss": 12.3562, + "step": 497090 + }, + { + "epoch": 1.0041734507124762, + "grad_norm": 340.92022705078125, + "learning_rate": 1.0246979980499395e-09, + "loss": 17.9949, + "step": 497100 + }, + { + "epoch": 1.0041936513451601, + "grad_norm": 62.27390670776367, + "learning_rate": 1.017643538900881e-09, + "loss": 8.2491, + "step": 497110 + }, + { + "epoch": 1.0042138519778439, + "grad_norm": 283.4289245605469, + "learning_rate": 1.0106134441850712e-09, + "loss": 9.8237, + "step": 497120 + }, + { + "epoch": 1.0042340526105278, + "grad_norm": 268.71484375, + "learning_rate": 1.0036077139380373e-09, + "loss": 10.6235, + "step": 497130 + }, + { + "epoch": 1.0042542532432115, + "grad_norm": 240.248046875, + "learning_rate": 9.96626348192531e-10, + "loss": 44.8136, + "step": 497140 + }, + { + "epoch": 1.0042744538758954, + "grad_norm": 108.62383270263672, + "learning_rate": 9.896693469829689e-10, + "loss": 28.2563, + "step": 497150 + }, + { + "epoch": 1.0042946545085791, + "grad_norm": 190.87635803222656, + "learning_rate": 9.827367103437679e-10, + "loss": 14.5446, + "step": 497160 + }, + { + "epoch": 1.004314855141263, + "grad_norm": 208.57196044921875, + "learning_rate": 9.758284383082351e-10, + "loss": 16.7593, + "step": 497170 + }, + { + "epoch": 1.0043350557739468, + "grad_norm": 215.20289611816406, + "learning_rate": 9.68944530910787e-10, + "loss": 23.861, + "step": 497180 + }, + { + "epoch": 1.0043552564066307, + "grad_norm": 538.6763916015625, + "learning_rate": 9.620849881836203e-10, + "loss": 12.2761, + "step": 497190 + }, + { + "epoch": 1.0043754570393144, + "grad_norm": 230.8127899169922, + "learning_rate": 9.55249810161152e-10, + "loss": 15.7254, + "step": 497200 + }, + { + "epoch": 1.0043956576719983, + "grad_norm": 64.42849731445312, + "learning_rate": 9.484389968766882e-10, + "loss": 15.4476, + "step": 497210 + }, + { + "epoch": 1.004415858304682, + "grad_norm": 25.883228302001953, + "learning_rate": 9.416525483635364e-10, + "loss": 12.8251, + "step": 497220 + }, + { + "epoch": 1.004436058937366, + "grad_norm": 366.4950256347656, + "learning_rate": 9.348904646538925e-10, + "loss": 19.1774, + "step": 497230 + }, + { + "epoch": 1.0044562595700497, + "grad_norm": 295.9607849121094, + "learning_rate": 9.281527457816186e-10, + "loss": 15.7636, + "step": 497240 + }, + { + "epoch": 1.0044764602027336, + "grad_norm": 164.70501708984375, + "learning_rate": 9.214393917789111e-10, + "loss": 14.53, + "step": 497250 + }, + { + "epoch": 1.0044966608354173, + "grad_norm": 110.36573791503906, + "learning_rate": 9.147504026790766e-10, + "loss": 6.9312, + "step": 497260 + }, + { + "epoch": 1.0045168614681013, + "grad_norm": 19.185224533081055, + "learning_rate": 9.080857785137564e-10, + "loss": 16.6655, + "step": 497270 + }, + { + "epoch": 1.004537062100785, + "grad_norm": 380.02532958984375, + "learning_rate": 9.014455193168125e-10, + "loss": 8.3419, + "step": 497280 + }, + { + "epoch": 1.004557262733469, + "grad_norm": 543.848876953125, + "learning_rate": 8.948296251198863e-10, + "loss": 20.6537, + "step": 497290 + }, + { + "epoch": 1.0045774633661526, + "grad_norm": 241.8990020751953, + "learning_rate": 8.88238095955174e-10, + "loss": 16.4705, + "step": 497300 + }, + { + "epoch": 1.0045976639988365, + "grad_norm": 774.7713623046875, + "learning_rate": 8.816709318543171e-10, + "loss": 19.6519, + "step": 497310 + }, + { + "epoch": 1.0046178646315203, + "grad_norm": 437.86175537109375, + "learning_rate": 8.751281328506223e-10, + "loss": 19.5219, + "step": 497320 + }, + { + "epoch": 1.0046380652642042, + "grad_norm": 314.4528503417969, + "learning_rate": 8.686096989751758e-10, + "loss": 18.3735, + "step": 497330 + }, + { + "epoch": 1.004658265896888, + "grad_norm": 183.13369750976562, + "learning_rate": 8.621156302590639e-10, + "loss": 9.6208, + "step": 497340 + }, + { + "epoch": 1.0046784665295716, + "grad_norm": 179.684814453125, + "learning_rate": 8.556459267355932e-10, + "loss": 9.7718, + "step": 497350 + }, + { + "epoch": 1.0046986671622555, + "grad_norm": 372.1221923828125, + "learning_rate": 8.492005884347398e-10, + "loss": 27.6225, + "step": 497360 + }, + { + "epoch": 1.0047188677949392, + "grad_norm": 255.3513946533203, + "learning_rate": 8.427796153887002e-10, + "loss": 18.5596, + "step": 497370 + }, + { + "epoch": 1.0047390684276232, + "grad_norm": 291.7013854980469, + "learning_rate": 8.363830076285606e-10, + "loss": 7.915, + "step": 497380 + }, + { + "epoch": 1.0047592690603069, + "grad_norm": 209.06103515625, + "learning_rate": 8.300107651859623e-10, + "loss": 20.9561, + "step": 497390 + }, + { + "epoch": 1.0047794696929908, + "grad_norm": 189.4288330078125, + "learning_rate": 8.236628880914365e-10, + "loss": 16.9448, + "step": 497400 + }, + { + "epoch": 1.0047996703256745, + "grad_norm": 358.1460266113281, + "learning_rate": 8.173393763760695e-10, + "loss": 22.2148, + "step": 497410 + }, + { + "epoch": 1.0048198709583585, + "grad_norm": 403.0713806152344, + "learning_rate": 8.110402300703924e-10, + "loss": 22.7103, + "step": 497420 + }, + { + "epoch": 1.0048400715910422, + "grad_norm": 200.86256408691406, + "learning_rate": 8.047654492054913e-10, + "loss": 7.5697, + "step": 497430 + }, + { + "epoch": 1.004860272223726, + "grad_norm": 297.1061706542969, + "learning_rate": 7.985150338118974e-10, + "loss": 17.3693, + "step": 497440 + }, + { + "epoch": 1.0048804728564098, + "grad_norm": 83.62934112548828, + "learning_rate": 7.92288983920142e-10, + "loss": 20.8319, + "step": 497450 + }, + { + "epoch": 1.0049006734890937, + "grad_norm": 307.7446594238281, + "learning_rate": 7.860872995602009e-10, + "loss": 11.7905, + "step": 497460 + }, + { + "epoch": 1.0049208741217774, + "grad_norm": 317.6285705566406, + "learning_rate": 7.799099807626054e-10, + "loss": 12.442, + "step": 497470 + }, + { + "epoch": 1.0049410747544614, + "grad_norm": 835.113037109375, + "learning_rate": 7.737570275573314e-10, + "loss": 18.0063, + "step": 497480 + }, + { + "epoch": 1.004961275387145, + "grad_norm": 467.5967712402344, + "learning_rate": 7.67628439974355e-10, + "loss": 12.2073, + "step": 497490 + }, + { + "epoch": 1.004981476019829, + "grad_norm": 191.9807586669922, + "learning_rate": 7.615242180436521e-10, + "loss": 9.385, + "step": 497500 + }, + { + "epoch": 1.0050016766525127, + "grad_norm": 224.09347534179688, + "learning_rate": 7.55444361795199e-10, + "loss": 11.5828, + "step": 497510 + }, + { + "epoch": 1.0050218772851967, + "grad_norm": 316.2989196777344, + "learning_rate": 7.493888712584163e-10, + "loss": 10.3182, + "step": 497520 + }, + { + "epoch": 1.0050420779178804, + "grad_norm": 323.5889892578125, + "learning_rate": 7.433577464621699e-10, + "loss": 17.9219, + "step": 497530 + }, + { + "epoch": 1.0050622785505643, + "grad_norm": 109.38935089111328, + "learning_rate": 7.373509874369911e-10, + "loss": 16.7291, + "step": 497540 + }, + { + "epoch": 1.005082479183248, + "grad_norm": 526.5947265625, + "learning_rate": 7.313685942117454e-10, + "loss": 17.5336, + "step": 497550 + }, + { + "epoch": 1.005102679815932, + "grad_norm": 398.5237121582031, + "learning_rate": 7.254105668152988e-10, + "loss": 24.8671, + "step": 497560 + }, + { + "epoch": 1.0051228804486156, + "grad_norm": 200.6566162109375, + "learning_rate": 7.194769052765171e-10, + "loss": 11.9736, + "step": 497570 + }, + { + "epoch": 1.0051430810812996, + "grad_norm": 374.6748352050781, + "learning_rate": 7.135676096253763e-10, + "loss": 21.5813, + "step": 497580 + }, + { + "epoch": 1.0051632817139833, + "grad_norm": 112.6958999633789, + "learning_rate": 7.076826798890768e-10, + "loss": 13.3266, + "step": 497590 + }, + { + "epoch": 1.005183482346667, + "grad_norm": 113.08387756347656, + "learning_rate": 7.018221160981498e-10, + "loss": 8.6187, + "step": 497600 + }, + { + "epoch": 1.005203682979351, + "grad_norm": 237.25608825683594, + "learning_rate": 6.959859182792406e-10, + "loss": 11.2016, + "step": 497610 + }, + { + "epoch": 1.0052238836120346, + "grad_norm": 0.0, + "learning_rate": 6.901740864623252e-10, + "loss": 5.223, + "step": 497620 + }, + { + "epoch": 1.0052440842447186, + "grad_norm": 220.59524536132812, + "learning_rate": 6.843866206751593e-10, + "loss": 5.644, + "step": 497630 + }, + { + "epoch": 1.0052642848774023, + "grad_norm": 290.9652404785156, + "learning_rate": 6.786235209460534e-10, + "loss": 18.3629, + "step": 497640 + }, + { + "epoch": 1.0052844855100862, + "grad_norm": 368.1429748535156, + "learning_rate": 6.728847873027633e-10, + "loss": 18.1752, + "step": 497650 + }, + { + "epoch": 1.00530468614277, + "grad_norm": 496.81610107421875, + "learning_rate": 6.671704197735995e-10, + "loss": 16.5869, + "step": 497660 + }, + { + "epoch": 1.0053248867754538, + "grad_norm": 226.71218872070312, + "learning_rate": 6.614804183857626e-10, + "loss": 19.0954, + "step": 497670 + }, + { + "epoch": 1.0053450874081375, + "grad_norm": 172.08290100097656, + "learning_rate": 6.558147831681183e-10, + "loss": 11.4151, + "step": 497680 + }, + { + "epoch": 1.0053652880408215, + "grad_norm": 290.1051025390625, + "learning_rate": 6.501735141478672e-10, + "loss": 17.5828, + "step": 497690 + }, + { + "epoch": 1.0053854886735052, + "grad_norm": 131.67520141601562, + "learning_rate": 6.445566113516544e-10, + "loss": 15.6308, + "step": 497700 + }, + { + "epoch": 1.0054056893061891, + "grad_norm": 505.48431396484375, + "learning_rate": 6.389640748077907e-10, + "loss": 16.8782, + "step": 497710 + }, + { + "epoch": 1.0054258899388728, + "grad_norm": 107.32551574707031, + "learning_rate": 6.333959045434768e-10, + "loss": 8.1946, + "step": 497720 + }, + { + "epoch": 1.0054460905715568, + "grad_norm": 281.859375, + "learning_rate": 6.278521005853578e-10, + "loss": 11.7121, + "step": 497730 + }, + { + "epoch": 1.0054662912042405, + "grad_norm": 103.37158203125, + "learning_rate": 6.223326629611893e-10, + "loss": 13.5691, + "step": 497740 + }, + { + "epoch": 1.0054864918369244, + "grad_norm": 203.9019775390625, + "learning_rate": 6.168375916970615e-10, + "loss": 11.0136, + "step": 497750 + }, + { + "epoch": 1.005506692469608, + "grad_norm": 7.61803674697876, + "learning_rate": 6.11366886820175e-10, + "loss": 17.7279, + "step": 497760 + }, + { + "epoch": 1.005526893102292, + "grad_norm": 543.367919921875, + "learning_rate": 6.05920548357175e-10, + "loss": 20.9936, + "step": 497770 + }, + { + "epoch": 1.0055470937349757, + "grad_norm": 286.6015319824219, + "learning_rate": 6.00498576334152e-10, + "loss": 22.6335, + "step": 497780 + }, + { + "epoch": 1.0055672943676597, + "grad_norm": 273.9652099609375, + "learning_rate": 5.951009707783062e-10, + "loss": 13.7487, + "step": 497790 + }, + { + "epoch": 1.0055874950003434, + "grad_norm": 204.1086883544922, + "learning_rate": 5.897277317157279e-10, + "loss": 20.8657, + "step": 497800 + }, + { + "epoch": 1.0056076956330273, + "grad_norm": 352.064453125, + "learning_rate": 5.843788591725074e-10, + "loss": 14.8754, + "step": 497810 + }, + { + "epoch": 1.005627896265711, + "grad_norm": 289.3622131347656, + "learning_rate": 5.790543531741799e-10, + "loss": 16.5565, + "step": 497820 + }, + { + "epoch": 1.005648096898395, + "grad_norm": 148.64691162109375, + "learning_rate": 5.737542137479457e-10, + "loss": 29.357, + "step": 497830 + }, + { + "epoch": 1.0056682975310787, + "grad_norm": 122.69599151611328, + "learning_rate": 5.684784409182298e-10, + "loss": 11.7857, + "step": 497840 + }, + { + "epoch": 1.0056884981637624, + "grad_norm": 108.96839141845703, + "learning_rate": 5.632270347116775e-10, + "loss": 10.1209, + "step": 497850 + }, + { + "epoch": 1.0057086987964463, + "grad_norm": 142.5657958984375, + "learning_rate": 5.579999951532688e-10, + "loss": 20.193, + "step": 497860 + }, + { + "epoch": 1.00572889942913, + "grad_norm": 378.4535217285156, + "learning_rate": 5.527973222690941e-10, + "loss": 23.4196, + "step": 497870 + }, + { + "epoch": 1.005749100061814, + "grad_norm": 341.4775695800781, + "learning_rate": 5.476190160841333e-10, + "loss": 17.7467, + "step": 497880 + }, + { + "epoch": 1.0057693006944977, + "grad_norm": 183.7340087890625, + "learning_rate": 5.424650766239215e-10, + "loss": 22.6553, + "step": 497890 + }, + { + "epoch": 1.0057895013271816, + "grad_norm": 810.3782958984375, + "learning_rate": 5.373355039128836e-10, + "loss": 17.3577, + "step": 497900 + }, + { + "epoch": 1.0058097019598653, + "grad_norm": 119.6341323852539, + "learning_rate": 5.322302979771099e-10, + "loss": 14.5229, + "step": 497910 + }, + { + "epoch": 1.0058299025925492, + "grad_norm": 310.62347412109375, + "learning_rate": 5.271494588404702e-10, + "loss": 14.242, + "step": 497920 + }, + { + "epoch": 1.005850103225233, + "grad_norm": 119.24578094482422, + "learning_rate": 5.220929865284996e-10, + "loss": 12.5786, + "step": 497930 + }, + { + "epoch": 1.0058703038579169, + "grad_norm": 376.8143005371094, + "learning_rate": 5.170608810650679e-10, + "loss": 23.0576, + "step": 497940 + }, + { + "epoch": 1.0058905044906006, + "grad_norm": 646.37646484375, + "learning_rate": 5.120531424751551e-10, + "loss": 14.2424, + "step": 497950 + }, + { + "epoch": 1.0059107051232845, + "grad_norm": 164.1946258544922, + "learning_rate": 5.070697707837413e-10, + "loss": 35.6545, + "step": 497960 + }, + { + "epoch": 1.0059309057559682, + "grad_norm": 310.8088073730469, + "learning_rate": 5.02110766013586e-10, + "loss": 13.8999, + "step": 497970 + }, + { + "epoch": 1.0059511063886521, + "grad_norm": 249.61207580566406, + "learning_rate": 4.971761281907795e-10, + "loss": 14.672, + "step": 497980 + }, + { + "epoch": 1.0059713070213359, + "grad_norm": 5.761580467224121, + "learning_rate": 4.922658573375261e-10, + "loss": 15.3031, + "step": 497990 + }, + { + "epoch": 1.0059915076540198, + "grad_norm": 52.5190544128418, + "learning_rate": 4.87379953478806e-10, + "loss": 15.158, + "step": 498000 + }, + { + "epoch": 1.0060117082867035, + "grad_norm": 221.9866485595703, + "learning_rate": 4.825184166384888e-10, + "loss": 25.3647, + "step": 498010 + }, + { + "epoch": 1.0060319089193874, + "grad_norm": 407.00274658203125, + "learning_rate": 4.776812468398895e-10, + "loss": 10.4687, + "step": 498020 + }, + { + "epoch": 1.0060521095520711, + "grad_norm": 305.9030456542969, + "learning_rate": 4.728684441068776e-10, + "loss": 17.8631, + "step": 498030 + }, + { + "epoch": 1.006072310184755, + "grad_norm": 103.88262939453125, + "learning_rate": 4.680800084622128e-10, + "loss": 15.7122, + "step": 498040 + }, + { + "epoch": 1.0060925108174388, + "grad_norm": 70.55221557617188, + "learning_rate": 4.6331593993032e-10, + "loss": 11.8809, + "step": 498050 + }, + { + "epoch": 1.0061127114501227, + "grad_norm": 44.675846099853516, + "learning_rate": 4.585762385334036e-10, + "loss": 4.9157, + "step": 498060 + }, + { + "epoch": 1.0061329120828064, + "grad_norm": 270.4665832519531, + "learning_rate": 4.538609042953335e-10, + "loss": 13.6184, + "step": 498070 + }, + { + "epoch": 1.0061531127154903, + "grad_norm": 359.03851318359375, + "learning_rate": 4.49169937238314e-10, + "loss": 9.6673, + "step": 498080 + }, + { + "epoch": 1.006173313348174, + "grad_norm": 158.07606506347656, + "learning_rate": 4.445033373862151e-10, + "loss": 12.3881, + "step": 498090 + }, + { + "epoch": 1.006193513980858, + "grad_norm": 410.2479553222656, + "learning_rate": 4.398611047612411e-10, + "loss": 15.5781, + "step": 498100 + }, + { + "epoch": 1.0062137146135417, + "grad_norm": 400.76806640625, + "learning_rate": 4.3524323938559655e-10, + "loss": 15.3732, + "step": 498110 + }, + { + "epoch": 1.0062339152462254, + "grad_norm": 102.79998016357422, + "learning_rate": 4.3064974128259605e-10, + "loss": 10.8671, + "step": 498120 + }, + { + "epoch": 1.0062541158789093, + "grad_norm": 183.90966796875, + "learning_rate": 4.2608061047388905e-10, + "loss": 16.1218, + "step": 498130 + }, + { + "epoch": 1.006274316511593, + "grad_norm": 91.87322998046875, + "learning_rate": 4.21535846982235e-10, + "loss": 11.3491, + "step": 498140 + }, + { + "epoch": 1.006294517144277, + "grad_norm": 249.82664489746094, + "learning_rate": 4.1701545082928343e-10, + "loss": 26.3386, + "step": 498150 + }, + { + "epoch": 1.0063147177769607, + "grad_norm": 193.24899291992188, + "learning_rate": 4.125194220377937e-10, + "loss": 15.6192, + "step": 498160 + }, + { + "epoch": 1.0063349184096446, + "grad_norm": 149.47335815429688, + "learning_rate": 4.0804776062941533e-10, + "loss": 6.2518, + "step": 498170 + }, + { + "epoch": 1.0063551190423283, + "grad_norm": 324.95867919921875, + "learning_rate": 4.0360046662579753e-10, + "loss": 12.2561, + "step": 498180 + }, + { + "epoch": 1.0063753196750123, + "grad_norm": 96.63861083984375, + "learning_rate": 3.991775400485898e-10, + "loss": 15.6981, + "step": 498190 + }, + { + "epoch": 1.006395520307696, + "grad_norm": 335.8397216796875, + "learning_rate": 3.9477898091944135e-10, + "loss": 25.09, + "step": 498200 + }, + { + "epoch": 1.00641572094038, + "grad_norm": 449.616943359375, + "learning_rate": 3.9040478925944645e-10, + "loss": 31.7562, + "step": 498210 + }, + { + "epoch": 1.0064359215730636, + "grad_norm": 92.94026184082031, + "learning_rate": 3.8605496509080966e-10, + "loss": 11.5487, + "step": 498220 + }, + { + "epoch": 1.0064561222057475, + "grad_norm": 378.7860107421875, + "learning_rate": 3.8172950843351485e-10, + "loss": 25.2461, + "step": 498230 + }, + { + "epoch": 1.0064763228384312, + "grad_norm": 139.0156707763672, + "learning_rate": 3.774284193097666e-10, + "loss": 8.0539, + "step": 498240 + }, + { + "epoch": 1.0064965234711152, + "grad_norm": 239.59571838378906, + "learning_rate": 3.7315169774010397e-10, + "loss": 9.1578, + "step": 498250 + }, + { + "epoch": 1.0065167241037989, + "grad_norm": 215.89488220214844, + "learning_rate": 3.6889934374506606e-10, + "loss": 25.5079, + "step": 498260 + }, + { + "epoch": 1.0065369247364828, + "grad_norm": 179.49853515625, + "learning_rate": 3.646713573457472e-10, + "loss": 17.75, + "step": 498270 + }, + { + "epoch": 1.0065571253691665, + "grad_norm": 264.7878723144531, + "learning_rate": 3.604677385626865e-10, + "loss": 13.9832, + "step": 498280 + }, + { + "epoch": 1.0065773260018505, + "grad_norm": 97.20784759521484, + "learning_rate": 3.562884874158679e-10, + "loss": 14.5283, + "step": 498290 + }, + { + "epoch": 1.0065975266345342, + "grad_norm": 69.8996810913086, + "learning_rate": 3.521336039263856e-10, + "loss": 5.11, + "step": 498300 + }, + { + "epoch": 1.006617727267218, + "grad_norm": 188.6199951171875, + "learning_rate": 3.480030881147789e-10, + "loss": 10.2871, + "step": 498310 + }, + { + "epoch": 1.0066379278999018, + "grad_norm": 345.9925231933594, + "learning_rate": 3.4389693999992146e-10, + "loss": 17.5328, + "step": 498320 + }, + { + "epoch": 1.0066581285325857, + "grad_norm": 154.6565399169922, + "learning_rate": 3.3981515960290757e-10, + "loss": 13.0229, + "step": 498330 + }, + { + "epoch": 1.0066783291652694, + "grad_norm": 198.02406311035156, + "learning_rate": 3.357577469431661e-10, + "loss": 17.9335, + "step": 498340 + }, + { + "epoch": 1.0066985297979534, + "grad_norm": 87.4178237915039, + "learning_rate": 3.3172470204012597e-10, + "loss": 14.529, + "step": 498350 + }, + { + "epoch": 1.006718730430637, + "grad_norm": 730.363037109375, + "learning_rate": 3.277160249143263e-10, + "loss": 27.7188, + "step": 498360 + }, + { + "epoch": 1.0067389310633208, + "grad_norm": 123.78430938720703, + "learning_rate": 3.237317155846409e-10, + "loss": 12.3887, + "step": 498370 + }, + { + "epoch": 1.0067591316960047, + "grad_norm": 555.4959716796875, + "learning_rate": 3.1977177407105376e-10, + "loss": 13.2058, + "step": 498380 + }, + { + "epoch": 1.0067793323286884, + "grad_norm": 362.38067626953125, + "learning_rate": 3.158362003918836e-10, + "loss": 10.9317, + "step": 498390 + }, + { + "epoch": 1.0067995329613724, + "grad_norm": 158.46035766601562, + "learning_rate": 3.1192499456766947e-10, + "loss": 16.7167, + "step": 498400 + }, + { + "epoch": 1.006819733594056, + "grad_norm": 55.679298400878906, + "learning_rate": 3.0803815661617495e-10, + "loss": 11.2871, + "step": 498410 + }, + { + "epoch": 1.00683993422674, + "grad_norm": 1470.99365234375, + "learning_rate": 3.0417568655738416e-10, + "loss": 12.9113, + "step": 498420 + }, + { + "epoch": 1.0068601348594237, + "grad_norm": 348.9452819824219, + "learning_rate": 3.003375844090606e-10, + "loss": 21.9742, + "step": 498430 + }, + { + "epoch": 1.0068803354921076, + "grad_norm": 300.7489318847656, + "learning_rate": 2.9652385019118823e-10, + "loss": 17.7186, + "step": 498440 + }, + { + "epoch": 1.0069005361247914, + "grad_norm": 15.27856159210205, + "learning_rate": 2.9273448392097557e-10, + "loss": 16.9864, + "step": 498450 + }, + { + "epoch": 1.0069207367574753, + "grad_norm": 293.1330871582031, + "learning_rate": 2.8896948561785156e-10, + "loss": 20.0468, + "step": 498460 + }, + { + "epoch": 1.006940937390159, + "grad_norm": 152.7106170654297, + "learning_rate": 2.8522885530013475e-10, + "loss": 14.7963, + "step": 498470 + }, + { + "epoch": 1.006961138022843, + "grad_norm": 85.65802764892578, + "learning_rate": 2.8151259298558884e-10, + "loss": 12.1627, + "step": 498480 + }, + { + "epoch": 1.0069813386555266, + "grad_norm": 335.9873046875, + "learning_rate": 2.7782069869253247e-10, + "loss": 10.7897, + "step": 498490 + }, + { + "epoch": 1.0070015392882106, + "grad_norm": 232.81271362304688, + "learning_rate": 2.741531724392843e-10, + "loss": 11.3882, + "step": 498500 + }, + { + "epoch": 1.0070217399208943, + "grad_norm": 203.44442749023438, + "learning_rate": 2.705100142430528e-10, + "loss": 19.1838, + "step": 498510 + }, + { + "epoch": 1.0070419405535782, + "grad_norm": 141.52403259277344, + "learning_rate": 2.668912241221566e-10, + "loss": 12.449, + "step": 498520 + }, + { + "epoch": 1.007062141186262, + "grad_norm": 194.8858642578125, + "learning_rate": 2.6329680209435935e-10, + "loss": 18.4131, + "step": 498530 + }, + { + "epoch": 1.0070823418189458, + "grad_norm": 119.25762176513672, + "learning_rate": 2.597267481763144e-10, + "loss": 15.6827, + "step": 498540 + }, + { + "epoch": 1.0071025424516296, + "grad_norm": 156.83926391601562, + "learning_rate": 2.5618106238634033e-10, + "loss": 11.1116, + "step": 498550 + }, + { + "epoch": 1.0071227430843135, + "grad_norm": 163.69297790527344, + "learning_rate": 2.5265974474109054e-10, + "loss": 10.6595, + "step": 498560 + }, + { + "epoch": 1.0071429437169972, + "grad_norm": 313.9014587402344, + "learning_rate": 2.4916279525777356e-10, + "loss": 9.8448, + "step": 498570 + }, + { + "epoch": 1.0071631443496811, + "grad_norm": 152.2070770263672, + "learning_rate": 2.4569021395415283e-10, + "loss": 17.6692, + "step": 498580 + }, + { + "epoch": 1.0071833449823648, + "grad_norm": 63.56298828125, + "learning_rate": 2.4224200084632664e-10, + "loss": 6.8223, + "step": 498590 + }, + { + "epoch": 1.0072035456150488, + "grad_norm": 274.88763427734375, + "learning_rate": 2.388181559515035e-10, + "loss": 10.3704, + "step": 498600 + }, + { + "epoch": 1.0072237462477325, + "grad_norm": 278.88787841796875, + "learning_rate": 2.3541867928633665e-10, + "loss": 25.6605, + "step": 498610 + }, + { + "epoch": 1.0072439468804162, + "grad_norm": 307.20465087890625, + "learning_rate": 2.3204357086747952e-10, + "loss": 12.244, + "step": 498620 + }, + { + "epoch": 1.0072641475131001, + "grad_norm": 353.385498046875, + "learning_rate": 2.2869283071103032e-10, + "loss": 14.1536, + "step": 498630 + }, + { + "epoch": 1.0072843481457838, + "grad_norm": 387.729736328125, + "learning_rate": 2.2536645883308728e-10, + "loss": 15.4943, + "step": 498640 + }, + { + "epoch": 1.0073045487784678, + "grad_norm": 110.98235321044922, + "learning_rate": 2.2206445525085886e-10, + "loss": 7.5882, + "step": 498650 + }, + { + "epoch": 1.0073247494111515, + "grad_norm": 396.0930480957031, + "learning_rate": 2.1878681997988816e-10, + "loss": 17.9858, + "step": 498660 + }, + { + "epoch": 1.0073449500438354, + "grad_norm": 247.63925170898438, + "learning_rate": 2.1553355303627343e-10, + "loss": 10.9372, + "step": 498670 + }, + { + "epoch": 1.007365150676519, + "grad_norm": 178.1328125, + "learning_rate": 2.123046544355578e-10, + "loss": 24.8558, + "step": 498680 + }, + { + "epoch": 1.007385351309203, + "grad_norm": 297.8135070800781, + "learning_rate": 2.091001241932844e-10, + "loss": 13.2768, + "step": 498690 + }, + { + "epoch": 1.0074055519418867, + "grad_norm": 39.48568344116211, + "learning_rate": 2.0591996232610656e-10, + "loss": 18.014, + "step": 498700 + }, + { + "epoch": 1.0074257525745707, + "grad_norm": 301.92315673828125, + "learning_rate": 2.0276416884845718e-10, + "loss": 12.5918, + "step": 498710 + }, + { + "epoch": 1.0074459532072544, + "grad_norm": 447.9718017578125, + "learning_rate": 1.9963274377643448e-10, + "loss": 14.6538, + "step": 498720 + }, + { + "epoch": 1.0074661538399383, + "grad_norm": 312.9599304199219, + "learning_rate": 1.965256871244714e-10, + "loss": 19.0012, + "step": 498730 + }, + { + "epoch": 1.007486354472622, + "grad_norm": 353.3367004394531, + "learning_rate": 1.9344299890866614e-10, + "loss": 20.735, + "step": 498740 + }, + { + "epoch": 1.007506555105306, + "grad_norm": 319.9928894042969, + "learning_rate": 1.903846791434516e-10, + "loss": 15.8078, + "step": 498750 + }, + { + "epoch": 1.0075267557379897, + "grad_norm": 9.733345985412598, + "learning_rate": 1.873507278438158e-10, + "loss": 12.6649, + "step": 498760 + }, + { + "epoch": 1.0075469563706736, + "grad_norm": 333.54791259765625, + "learning_rate": 1.8434114502530187e-10, + "loss": 16.1153, + "step": 498770 + }, + { + "epoch": 1.0075671570033573, + "grad_norm": 376.7235107421875, + "learning_rate": 1.8135593070123246e-10, + "loss": 27.7726, + "step": 498780 + }, + { + "epoch": 1.0075873576360412, + "grad_norm": 367.3818359375, + "learning_rate": 1.7839508488715075e-10, + "loss": 14.0848, + "step": 498790 + }, + { + "epoch": 1.007607558268725, + "grad_norm": 215.5403594970703, + "learning_rate": 1.7545860759693446e-10, + "loss": 8.5867, + "step": 498800 + }, + { + "epoch": 1.0076277589014089, + "grad_norm": 269.4993591308594, + "learning_rate": 1.725464988450165e-10, + "loss": 8.9464, + "step": 498810 + }, + { + "epoch": 1.0076479595340926, + "grad_norm": 226.98304748535156, + "learning_rate": 1.6965875864582983e-10, + "loss": 14.4526, + "step": 498820 + }, + { + "epoch": 1.0076681601667765, + "grad_norm": 252.58116149902344, + "learning_rate": 1.6679538701325215e-10, + "loss": 21.8598, + "step": 498830 + }, + { + "epoch": 1.0076883607994602, + "grad_norm": 263.5116271972656, + "learning_rate": 1.6395638396171643e-10, + "loss": 12.1145, + "step": 498840 + }, + { + "epoch": 1.0077085614321442, + "grad_norm": 147.0958251953125, + "learning_rate": 1.611417495045453e-10, + "loss": 19.2575, + "step": 498850 + }, + { + "epoch": 1.0077287620648279, + "grad_norm": 621.6185302734375, + "learning_rate": 1.5835148365506148e-10, + "loss": 25.4507, + "step": 498860 + }, + { + "epoch": 1.0077489626975118, + "grad_norm": 176.71005249023438, + "learning_rate": 1.5558558642769782e-10, + "loss": 14.3358, + "step": 498870 + }, + { + "epoch": 1.0077691633301955, + "grad_norm": 188.56503295898438, + "learning_rate": 1.5284405783577706e-10, + "loss": 15.4363, + "step": 498880 + }, + { + "epoch": 1.0077893639628792, + "grad_norm": 273.8232421875, + "learning_rate": 1.501268978920667e-10, + "loss": 14.8906, + "step": 498890 + }, + { + "epoch": 1.0078095645955631, + "grad_norm": 307.99542236328125, + "learning_rate": 1.4743410661044454e-10, + "loss": 15.3241, + "step": 498900 + }, + { + "epoch": 1.0078297652282469, + "grad_norm": 114.76146697998047, + "learning_rate": 1.4476568400367819e-10, + "loss": 6.4186, + "step": 498910 + }, + { + "epoch": 1.0078499658609308, + "grad_norm": 582.0213012695312, + "learning_rate": 1.4212163008509028e-10, + "loss": 19.5914, + "step": 498920 + }, + { + "epoch": 1.0078701664936145, + "grad_norm": 226.5014190673828, + "learning_rate": 1.3950194486744838e-10, + "loss": 16.7806, + "step": 498930 + }, + { + "epoch": 1.0078903671262984, + "grad_norm": 446.14508056640625, + "learning_rate": 1.369066283635201e-10, + "loss": 16.9134, + "step": 498940 + }, + { + "epoch": 1.0079105677589821, + "grad_norm": 1160.8868408203125, + "learning_rate": 1.3433568058607293e-10, + "loss": 21.4087, + "step": 498950 + }, + { + "epoch": 1.007930768391666, + "grad_norm": 241.24908447265625, + "learning_rate": 1.3178910154676427e-10, + "loss": 16.0294, + "step": 498960 + }, + { + "epoch": 1.0079509690243498, + "grad_norm": 472.5611877441406, + "learning_rate": 1.292668912594719e-10, + "loss": 15.3888, + "step": 498970 + }, + { + "epoch": 1.0079711696570337, + "grad_norm": 43.85075378417969, + "learning_rate": 1.2676904973529802e-10, + "loss": 33.6512, + "step": 498980 + }, + { + "epoch": 1.0079913702897174, + "grad_norm": 124.53466033935547, + "learning_rate": 1.2429557698645512e-10, + "loss": 15.233, + "step": 498990 + }, + { + "epoch": 1.0080115709224013, + "grad_norm": 281.6215515136719, + "learning_rate": 1.2184647302626585e-10, + "loss": 12.2033, + "step": 499000 + }, + { + "epoch": 1.008031771555085, + "grad_norm": 43.871124267578125, + "learning_rate": 1.1942173786527732e-10, + "loss": 18.2486, + "step": 499010 + }, + { + "epoch": 1.008051972187769, + "grad_norm": 137.5821075439453, + "learning_rate": 1.1702137151570203e-10, + "loss": 14.4781, + "step": 499020 + }, + { + "epoch": 1.0080721728204527, + "grad_norm": 551.835693359375, + "learning_rate": 1.146453739897524e-10, + "loss": 14.7082, + "step": 499030 + }, + { + "epoch": 1.0080923734531366, + "grad_norm": 975.0075073242188, + "learning_rate": 1.1229374529797555e-10, + "loss": 19.17, + "step": 499040 + }, + { + "epoch": 1.0081125740858203, + "grad_norm": 103.84477996826172, + "learning_rate": 1.0996648545313904e-10, + "loss": 8.8648, + "step": 499050 + }, + { + "epoch": 1.0081327747185043, + "grad_norm": 181.8811492919922, + "learning_rate": 1.0766359446579e-10, + "loss": 9.433, + "step": 499060 + }, + { + "epoch": 1.008152975351188, + "grad_norm": 80.49881744384766, + "learning_rate": 1.0538507234703066e-10, + "loss": 13.4556, + "step": 499070 + }, + { + "epoch": 1.008173175983872, + "grad_norm": 9.578737258911133, + "learning_rate": 1.0313091910796324e-10, + "loss": 11.9596, + "step": 499080 + }, + { + "epoch": 1.0081933766165556, + "grad_norm": 1.9659548997879028, + "learning_rate": 1.009011347602451e-10, + "loss": 11.4909, + "step": 499090 + }, + { + "epoch": 1.0082135772492395, + "grad_norm": 67.1401138305664, + "learning_rate": 9.869571931442334e-11, + "loss": 15.3432, + "step": 499100 + }, + { + "epoch": 1.0082337778819233, + "grad_norm": 281.44976806640625, + "learning_rate": 9.65146727810451e-11, + "loss": 15.6687, + "step": 499110 + }, + { + "epoch": 1.0082539785146072, + "grad_norm": 80.7608871459961, + "learning_rate": 9.435799517065746e-11, + "loss": 11.8936, + "step": 499120 + }, + { + "epoch": 1.008274179147291, + "grad_norm": 383.69366455078125, + "learning_rate": 9.222568649380759e-11, + "loss": 29.9398, + "step": 499130 + }, + { + "epoch": 1.0082943797799746, + "grad_norm": 378.7021484375, + "learning_rate": 9.011774676159767e-11, + "loss": 10.354, + "step": 499140 + }, + { + "epoch": 1.0083145804126585, + "grad_norm": 442.16265869140625, + "learning_rate": 8.803417598346465e-11, + "loss": 24.7036, + "step": 499150 + }, + { + "epoch": 1.0083347810453422, + "grad_norm": 180.1383819580078, + "learning_rate": 8.597497416940048e-11, + "loss": 20.6631, + "step": 499160 + }, + { + "epoch": 1.0083549816780262, + "grad_norm": 709.421875, + "learning_rate": 8.394014133050743e-11, + "loss": 19.2424, + "step": 499170 + }, + { + "epoch": 1.0083751823107099, + "grad_norm": 0.0, + "learning_rate": 8.192967747566727e-11, + "loss": 14.6445, + "step": 499180 + }, + { + "epoch": 1.0083953829433938, + "grad_norm": 232.08509826660156, + "learning_rate": 7.994358261542712e-11, + "loss": 30.6636, + "step": 499190 + }, + { + "epoch": 1.0084155835760775, + "grad_norm": 118.77250671386719, + "learning_rate": 7.798185675866876e-11, + "loss": 10.9463, + "step": 499200 + }, + { + "epoch": 1.0084357842087615, + "grad_norm": 441.1081237792969, + "learning_rate": 7.604449991593932e-11, + "loss": 18.0995, + "step": 499210 + }, + { + "epoch": 1.0084559848414452, + "grad_norm": 219.89122009277344, + "learning_rate": 7.413151209612057e-11, + "loss": 16.1622, + "step": 499220 + }, + { + "epoch": 1.008476185474129, + "grad_norm": 275.44464111328125, + "learning_rate": 7.224289330809431e-11, + "loss": 14.1064, + "step": 499230 + }, + { + "epoch": 1.0084963861068128, + "grad_norm": 375.02862548828125, + "learning_rate": 7.037864356185254e-11, + "loss": 10.0396, + "step": 499240 + }, + { + "epoch": 1.0085165867394967, + "grad_norm": 171.23094177246094, + "learning_rate": 6.853876286627703e-11, + "loss": 11.9643, + "step": 499250 + }, + { + "epoch": 1.0085367873721804, + "grad_norm": 296.3878173828125, + "learning_rate": 6.672325122969447e-11, + "loss": 18.2356, + "step": 499260 + }, + { + "epoch": 1.0085569880048644, + "grad_norm": 173.04734802246094, + "learning_rate": 6.493210866209687e-11, + "loss": 9.8609, + "step": 499270 + }, + { + "epoch": 1.008577188637548, + "grad_norm": 181.06671142578125, + "learning_rate": 6.316533517125578e-11, + "loss": 13.4467, + "step": 499280 + }, + { + "epoch": 1.008597389270232, + "grad_norm": 131.52392578125, + "learning_rate": 6.142293076605299e-11, + "loss": 13.3902, + "step": 499290 + }, + { + "epoch": 1.0086175899029157, + "grad_norm": 188.04368591308594, + "learning_rate": 5.970489545537028e-11, + "loss": 14.8991, + "step": 499300 + }, + { + "epoch": 1.0086377905355997, + "grad_norm": 270.3106689453125, + "learning_rate": 5.801122924697922e-11, + "loss": 10.326, + "step": 499310 + }, + { + "epoch": 1.0086579911682834, + "grad_norm": 221.03317260742188, + "learning_rate": 5.634193214976158e-11, + "loss": 22.3886, + "step": 499320 + }, + { + "epoch": 1.0086781918009673, + "grad_norm": 572.0333862304688, + "learning_rate": 5.469700417093382e-11, + "loss": 26.1335, + "step": 499330 + }, + { + "epoch": 1.008698392433651, + "grad_norm": 15.824953079223633, + "learning_rate": 5.3076445319932835e-11, + "loss": 11.863, + "step": 499340 + }, + { + "epoch": 1.008718593066335, + "grad_norm": 136.35269165039062, + "learning_rate": 5.148025560341996e-11, + "loss": 9.789, + "step": 499350 + }, + { + "epoch": 1.0087387936990186, + "grad_norm": 508.6393127441406, + "learning_rate": 4.990843502916676e-11, + "loss": 24.4717, + "step": 499360 + }, + { + "epoch": 1.0087589943317026, + "grad_norm": 341.3861389160156, + "learning_rate": 4.83609836054999e-11, + "loss": 17.634, + "step": 499370 + }, + { + "epoch": 1.0087791949643863, + "grad_norm": 250.8182373046875, + "learning_rate": 4.683790134019095e-11, + "loss": 15.8949, + "step": 499380 + }, + { + "epoch": 1.00879939559707, + "grad_norm": 104.42710876464844, + "learning_rate": 4.533918823934613e-11, + "loss": 15.2551, + "step": 499390 + }, + { + "epoch": 1.008819596229754, + "grad_norm": 228.41319274902344, + "learning_rate": 4.3864844311847235e-11, + "loss": 14.0003, + "step": 499400 + }, + { + "epoch": 1.0088397968624376, + "grad_norm": 86.82367706298828, + "learning_rate": 4.2414869563800475e-11, + "loss": 13.2424, + "step": 499410 + }, + { + "epoch": 1.0088599974951216, + "grad_norm": 206.96160888671875, + "learning_rate": 4.0989264002422315e-11, + "loss": 19.9374, + "step": 499420 + }, + { + "epoch": 1.0088801981278053, + "grad_norm": 1443.3238525390625, + "learning_rate": 3.9588027634929195e-11, + "loss": 27.769, + "step": 499430 + }, + { + "epoch": 1.0089003987604892, + "grad_norm": 196.1706085205078, + "learning_rate": 3.8211160467982453e-11, + "loss": 22.0951, + "step": 499440 + }, + { + "epoch": 1.008920599393173, + "grad_norm": 75.63075256347656, + "learning_rate": 3.685866250879855e-11, + "loss": 7.8608, + "step": 499450 + }, + { + "epoch": 1.0089408000258568, + "grad_norm": 316.41925048828125, + "learning_rate": 3.55305337634837e-11, + "loss": 11.1891, + "step": 499460 + }, + { + "epoch": 1.0089610006585406, + "grad_norm": 382.5196228027344, + "learning_rate": 3.4226774238144135e-11, + "loss": 20.3267, + "step": 499470 + }, + { + "epoch": 1.0089812012912245, + "grad_norm": 243.59719848632812, + "learning_rate": 3.29473839399963e-11, + "loss": 9.1568, + "step": 499480 + }, + { + "epoch": 1.0090014019239082, + "grad_norm": 317.107177734375, + "learning_rate": 3.169236287459132e-11, + "loss": 21.2411, + "step": 499490 + }, + { + "epoch": 1.0090216025565921, + "grad_norm": 245.490966796875, + "learning_rate": 3.0461711048035415e-11, + "loss": 13.6802, + "step": 499500 + }, + { + "epoch": 1.0090418031892758, + "grad_norm": 248.85791015625, + "learning_rate": 2.925542846698992e-11, + "loss": 17.4928, + "step": 499510 + }, + { + "epoch": 1.0090620038219598, + "grad_norm": 139.34979248046875, + "learning_rate": 2.8073515137005957e-11, + "loss": 21.458, + "step": 499520 + }, + { + "epoch": 1.0090822044546435, + "grad_norm": 14.044170379638672, + "learning_rate": 2.6915971063079527e-11, + "loss": 10.9046, + "step": 499530 + }, + { + "epoch": 1.0091024050873274, + "grad_norm": 114.22227478027344, + "learning_rate": 2.5782796252427078e-11, + "loss": 21.2074, + "step": 499540 + }, + { + "epoch": 1.0091226057200111, + "grad_norm": 268.5044250488281, + "learning_rate": 2.467399070893439e-11, + "loss": 11.8885, + "step": 499550 + }, + { + "epoch": 1.009142806352695, + "grad_norm": 115.34527587890625, + "learning_rate": 2.3589554439262807e-11, + "loss": 7.9746, + "step": 499560 + }, + { + "epoch": 1.0091630069853788, + "grad_norm": 165.5419158935547, + "learning_rate": 2.252948744840833e-11, + "loss": 16.3721, + "step": 499570 + }, + { + "epoch": 1.0091832076180627, + "grad_norm": 215.672119140625, + "learning_rate": 2.1493789740811843e-11, + "loss": 24.9816, + "step": 499580 + }, + { + "epoch": 1.0092034082507464, + "grad_norm": 349.49420166015625, + "learning_rate": 2.048246132202447e-11, + "loss": 21.2542, + "step": 499590 + }, + { + "epoch": 1.0092236088834303, + "grad_norm": 399.3600158691406, + "learning_rate": 1.9495502197042214e-11, + "loss": 20.4745, + "step": 499600 + }, + { + "epoch": 1.009243809516114, + "grad_norm": 381.07659912109375, + "learning_rate": 1.8532912370861077e-11, + "loss": 16.8256, + "step": 499610 + }, + { + "epoch": 1.009264010148798, + "grad_norm": 175.41429138183594, + "learning_rate": 1.759469184792195e-11, + "loss": 14.3605, + "step": 499620 + }, + { + "epoch": 1.0092842107814817, + "grad_norm": 112.5757064819336, + "learning_rate": 1.668084063266573e-11, + "loss": 15.3384, + "step": 499630 + }, + { + "epoch": 1.0093044114141656, + "grad_norm": 144.8412628173828, + "learning_rate": 1.57913587295333e-11, + "loss": 13.9982, + "step": 499640 + }, + { + "epoch": 1.0093246120468493, + "grad_norm": 304.6864929199219, + "learning_rate": 1.4926246142965562e-11, + "loss": 19.6713, + "step": 499650 + }, + { + "epoch": 1.009344812679533, + "grad_norm": 294.0633850097656, + "learning_rate": 1.40855028774034e-11, + "loss": 13.0582, + "step": 499660 + }, + { + "epoch": 1.009365013312217, + "grad_norm": 334.11712646484375, + "learning_rate": 1.32691289367326e-11, + "loss": 18.502, + "step": 499670 + }, + { + "epoch": 1.0093852139449007, + "grad_norm": 282.59930419921875, + "learning_rate": 1.2477124325394052e-11, + "loss": 12.432, + "step": 499680 + }, + { + "epoch": 1.0094054145775846, + "grad_norm": 356.05767822265625, + "learning_rate": 1.1709489046163313e-11, + "loss": 10.9972, + "step": 499690 + }, + { + "epoch": 1.0094256152102683, + "grad_norm": 369.1632995605469, + "learning_rate": 1.0966223103481278e-11, + "loss": 20.4087, + "step": 499700 + }, + { + "epoch": 1.0094458158429522, + "grad_norm": 247.69676208496094, + "learning_rate": 1.0247326501233723e-11, + "loss": 17.2, + "step": 499710 + }, + { + "epoch": 1.009466016475636, + "grad_norm": 99.99536895751953, + "learning_rate": 9.55279924275132e-12, + "loss": 19.528, + "step": 499720 + }, + { + "epoch": 1.0094862171083199, + "grad_norm": 48.60514450073242, + "learning_rate": 8.882641330809627e-12, + "loss": 18.8098, + "step": 499730 + }, + { + "epoch": 1.0095064177410036, + "grad_norm": 170.80792236328125, + "learning_rate": 8.236852769294424e-12, + "loss": 7.6632, + "step": 499740 + }, + { + "epoch": 1.0095266183736875, + "grad_norm": 262.50811767578125, + "learning_rate": 7.615433561536379e-12, + "loss": 23.2691, + "step": 499750 + }, + { + "epoch": 1.0095468190063712, + "grad_norm": 280.09454345703125, + "learning_rate": 7.018383709755938e-12, + "loss": 16.5512, + "step": 499760 + }, + { + "epoch": 1.0095670196390552, + "grad_norm": 148.12118530273438, + "learning_rate": 6.445703217838883e-12, + "loss": 19.9449, + "step": 499770 + }, + { + "epoch": 1.0095872202717389, + "grad_norm": 165.4951171875, + "learning_rate": 5.89739208800566e-12, + "loss": 9.0539, + "step": 499780 + }, + { + "epoch": 1.0096074209044228, + "grad_norm": 110.9627456665039, + "learning_rate": 5.373450322476714e-12, + "loss": 10.8923, + "step": 499790 + }, + { + "epoch": 1.0096276215371065, + "grad_norm": 315.8805847167969, + "learning_rate": 4.873877924582715e-12, + "loss": 15.6297, + "step": 499800 + }, + { + "epoch": 1.0096478221697904, + "grad_norm": 87.63687133789062, + "learning_rate": 4.398674896544109e-12, + "loss": 10.1378, + "step": 499810 + }, + { + "epoch": 1.0096680228024741, + "grad_norm": 235.2276611328125, + "learning_rate": 3.947841241136452e-12, + "loss": 11.2597, + "step": 499820 + }, + { + "epoch": 1.009688223435158, + "grad_norm": 351.1982116699219, + "learning_rate": 3.5213769594699687e-12, + "loss": 12.2747, + "step": 499830 + }, + { + "epoch": 1.0097084240678418, + "grad_norm": 21.972841262817383, + "learning_rate": 3.119282054320216e-12, + "loss": 22.4986, + "step": 499840 + }, + { + "epoch": 1.0097286247005257, + "grad_norm": 273.17852783203125, + "learning_rate": 2.741556527352529e-12, + "loss": 17.6205, + "step": 499850 + }, + { + "epoch": 1.0097488253332094, + "grad_norm": 580.908447265625, + "learning_rate": 2.388200380787353e-12, + "loss": 14.6109, + "step": 499860 + }, + { + "epoch": 1.0097690259658934, + "grad_norm": 360.0311584472656, + "learning_rate": 2.0592136162900234e-12, + "loss": 25.966, + "step": 499870 + }, + { + "epoch": 1.009789226598577, + "grad_norm": 468.5033874511719, + "learning_rate": 1.754596235525874e-12, + "loss": 17.6713, + "step": 499880 + }, + { + "epoch": 1.009809427231261, + "grad_norm": 415.4053649902344, + "learning_rate": 1.4743482390500164e-12, + "loss": 9.554, + "step": 499890 + }, + { + "epoch": 1.0098296278639447, + "grad_norm": 89.76799774169922, + "learning_rate": 1.2184696296380083e-12, + "loss": 22.074, + "step": 499900 + }, + { + "epoch": 1.0098498284966284, + "grad_norm": 345.9091796875, + "learning_rate": 9.869604078449612e-13, + "loss": 18.3392, + "step": 499910 + }, + { + "epoch": 1.0098700291293123, + "grad_norm": 52.05888748168945, + "learning_rate": 7.798205742259868e-13, + "loss": 22.3531, + "step": 499920 + }, + { + "epoch": 1.009890229761996, + "grad_norm": 8.85113525390625, + "learning_rate": 5.970501310015308e-13, + "loss": 15.7406, + "step": 499930 + }, + { + "epoch": 1.00991043039468, + "grad_norm": 112.59426879882812, + "learning_rate": 4.386490781715935e-13, + "loss": 25.1836, + "step": 499940 + }, + { + "epoch": 1.0099306310273637, + "grad_norm": 357.5207824707031, + "learning_rate": 3.046174168463978e-13, + "loss": 15.5821, + "step": 499950 + }, + { + "epoch": 1.0099508316600476, + "grad_norm": 58.12090301513672, + "learning_rate": 1.9495514758105516e-13, + "loss": 24.2742, + "step": 499960 + }, + { + "epoch": 1.0099710322927313, + "grad_norm": 277.7957458496094, + "learning_rate": 1.0966227093067716e-13, + "loss": 18.9998, + "step": 499970 + }, + { + "epoch": 1.0099912329254153, + "grad_norm": 55.04225158691406, + "learning_rate": 4.873878689526379e-14, + "loss": 14.4218, + "step": 499980 + }, + { + "epoch": 1.010011433558099, + "grad_norm": 32.48318862915039, + "learning_rate": 1.218469658503807e-14, + "loss": 12.5496, + "step": 499990 + }, + { + "epoch": 1.010031634190783, + "grad_norm": 244.59695434570312, + "learning_rate": 0.0, + "loss": 9.7267, + "step": 500000 } ], "logging_steps": 10, @@ -302421,7 +350029,7 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": false + "should_training_stop": true }, "attributes": {} }